diff options
Diffstat (limited to 'src')
254 files changed, 7556 insertions, 5626 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d1d177b519..a0ae07752e 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -15,6 +15,8 @@ add_library(audio_core STATIC command_generator.cpp command_generator.h common.h + delay_line.cpp + delay_line.h effect_context.cpp effect_context.h info_updater.cpp diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp index 5b10655205..437cc5ccd0 100644 --- a/src/audio_core/command_generator.cpp +++ b/src/audio_core/command_generator.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cmath> +#include <numbers> #include "audio_core/algorithm/interpolate.h" #include "audio_core/command_generator.h" #include "audio_core/effect_context.h" @@ -13,6 +15,20 @@ namespace AudioCore { namespace { constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00; constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL; +using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>; + +constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f}; +constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f}; +constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f}; +constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f}; +constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{ + 0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f, + 0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f, + 0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f}; +constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{ + 0.67096f, 0.61027f, 1.0f, 0.35680f, 0.68361f, 0.65978f, 0.51939f, + 0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f, + 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; template <std::size_t N> void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { @@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { } } +float Pow10(float x) { + if (x >= 0.0f) { + return 1.0f; + } else if (x <= -5.3f) { + return 0.0f; + } + return std::pow(10.0f, x); +} + +float SinD(float degrees) { + return std::sin(degrees * std::numbers::pi_v<float> / 180.0f); +} + +float CosD(float degrees) { + return std::cos(degrees * std::numbers::pi_v<float> / 180.0f); +} + +float ToFloat(s32 sample) { + return static_cast<float>(sample) / 65536.f; +} + +s32 ToS32(float sample) { + constexpr auto min = -8388608.0f; + constexpr auto max = 8388607.f; + float rescaled_sample = sample * 65536.0f; + if (rescaled_sample < min) { + rescaled_sample = min; + } + if (rescaled_sample > max) { + rescaled_sample = max; + } + return static_cast<s32>(rescaled_sample); +} + +constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, + 1, 1, 1, 0, 0, 0, 0, 1, 1, 1}; + +constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2, + 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; + +constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2, + 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; + +template <std::size_t CHANNEL_COUNT> +void ApplyReverbGeneric(I3dl2ReverbState& state, + const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input, + const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output, + s32 sample_count) { + + auto GetTapLookup = []() { + if constexpr (CHANNEL_COUNT == 1) { + return REVERB_TAP_INDEX_1CH; + } else if constexpr (CHANNEL_COUNT == 2) { + return REVERB_TAP_INDEX_2CH; + } else if constexpr (CHANNEL_COUNT == 4) { + return REVERB_TAP_INDEX_4CH; + } else if constexpr (CHANNEL_COUNT == 6) { + return REVERB_TAP_INDEX_6CH; + } + }; + + const auto& tap_index_lut = GetTapLookup(); + for (s32 sample = 0; sample < sample_count; sample++) { + std::array<f32, CHANNEL_COUNT> out_samples{}; + std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{}; + std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{}; + std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{}; + + // Mix everything into a single sample + s32 temp_mixed_sample = 0; + for (std::size_t i = 0; i < CHANNEL_COUNT; i++) { + temp_mixed_sample += input[i][sample]; + } + const auto current_sample = ToFloat(temp_mixed_sample); + const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps); + + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) { + const auto tapped_samp = + state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i]; + out_samples[tap_index_lut[i]] += tapped_samp; + + if constexpr (CHANNEL_COUNT == 6) { + // handle lfe + out_samples[5] += tapped_samp; + } + } + + state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1; + state.early_delay_line.Tick(state.lowpass_0); + + for (std::size_t i = 0; i < CHANNEL_COUNT; i++) { + out_samples[i] *= state.early_gain; + } + + // Two channel seems to apply a latet gain, we require to save this + f32 filter{}; + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + filter = state.fdn_delay_line[i].GetOutputSample(); + const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i]; + state.shelf_filter[i] = + filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i]; + fsamp[i] = computed; + } + + // Mixing matrix + mixed[0] = fsamp[1] + fsamp[2]; + mixed[1] = -fsamp[0] - fsamp[3]; + mixed[2] = fsamp[0] - fsamp[3]; + mixed[3] = fsamp[1] - fsamp[2]; + + if constexpr (CHANNEL_COUNT == 2) { + for (auto& mix : mixed) { + mix *= (filter * state.late_gain); + } + } + + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + const auto late = early_tap * state.late_gain; + osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]); + osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]); + state.fdn_delay_line[i].Tick(osamp[i]); + } + + if constexpr (CHANNEL_COUNT == 1) { + output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) + + (out_samples[0] + osamp[0] + osamp[1])); + } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) { + for (std::size_t i = 0; i < CHANNEL_COUNT; i++) { + output[i][sample] = + ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i])); + } + } else if constexpr (CHANNEL_COUNT == 6) { + const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3])); + for (std::size_t i = 0; i < 4; i++) { + output[i][sample] = + ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i])); + } + output[4][sample] = + ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center)); + output[5][sample] = + ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3])); + } + } +} + } // namespace CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_, @@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic } // Generate biquad filter - // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter, - // dsp_state.biquad_filter_state, - // mix_buffer_count + channel, mix_buffer_count + - // channel, worker_params.sample_count, - // voice_info.GetInParams().node_id); + // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter, + // dsp_state.biquad_filter_state, + // mix_buffer_count + channel, mix_buffer_count + channel, + // worker_params.sample_count, voice_info.GetInParams().node_id); } } @@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) { void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled) { - if (!enabled) { + auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info); + const auto& params = reverb->GetParams(); + auto& state = reverb->GetState(); + const auto channel_count = params.channel_count; + + if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) { return; } - const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams(); - const auto channel_count = params.channel_count; + + std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{}; + std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{}; + + const auto status = params.status; for (s32 i = 0; i < channel_count; i++) { - // TODO(ogniK): Actually implement reverb - /* - if (params.input[i] != params.output[i]) { - const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); - auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); - ApplyMix<1>(output, input, 32768, worker_params.sample_count); - }*/ - auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); - std::memset(output, 0, worker_params.sample_count * sizeof(s32)); + input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]); + output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]); + } + + if (enabled) { + if (status == ParameterStatus::Initialized) { + InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer()); + } else if (status == ParameterStatus::Updating) { + UpdateI3dl2Reverb(reverb->GetParams(), state, false); + } + } + + if (enabled) { + switch (channel_count) { + case 1: + ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count); + break; + case 2: + ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count); + break; + case 4: + ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count); + break; + case 6: + ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count); + break; + } + } else { + for (s32 i = 0; i < channel_count; i++) { + // Only copy if the buffer input and output do not match! + if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { + std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); + } + } } } @@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3 return sample_count; } +void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, + std::vector<u8>& work_buffer) { + // Reset state + state.lowpass_0 = 0.0f; + state.lowpass_1 = 0.0f; + state.lowpass_2 = 0.0f; + + state.early_delay_line.Reset(); + state.early_tap_steps.fill(0); + state.early_gain = 0.0f; + state.late_gain = 0.0f; + state.early_to_late_taps = 0; + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + state.fdn_delay_line[i].Reset(); + state.decay_delay_line0[i].Reset(); + state.decay_delay_line1[i].Reset(); + } + state.last_reverb_echo = 0.0f; + state.center_delay_line.Reset(); + for (auto& coef : state.lpf_coefficients) { + coef.fill(0.0f); + } + state.shelf_filter.fill(0.0f); + state.dry_gain = 0.0f; + + const auto sample_rate = info.sample_rate / 1000; + f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data()); + + s32 delay_samples{}; + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + delay_samples = + AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]); + state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr); + work_buffer_ptr += delay_samples + 1; + + delay_samples = + AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]); + state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr); + work_buffer_ptr += delay_samples + 1; + + delay_samples = + AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]); + state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr); + work_buffer_ptr += delay_samples + 1; + } + delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f); + state.center_delay_line.Initialize(delay_samples, work_buffer_ptr); + work_buffer_ptr += delay_samples + 1; + + delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f); + state.early_delay_line.Initialize(delay_samples, work_buffer_ptr); + + UpdateI3dl2Reverb(info, state, true); +} + +void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, + bool should_clear) { + + state.dry_gain = info.dry_gain; + state.shelf_filter.fill(0.0f); + state.lowpass_0 = 0.0f; + state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f); + state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f); + + const auto sample_rate = info.sample_rate / 1000; + const f32 hf_gain = Pow10(info.room_hf / 2000.0f); + if (hf_gain >= 1.0f) { + state.lowpass_2 = 1.0f; + state.lowpass_1 = 0.0f; + } else { + const auto a = 1.0f - hf_gain; + const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference / + static_cast<f32>(info.sample_rate))); + const auto c = std::sqrt(b * b - 4.0f * a * a); + + state.lowpass_1 = (b - c) / (2.0f * a); + state.lowpass_2 = 1.0f - state.lowpass_1; + } + state.early_to_late_taps = AudioCommon::CalculateDelaySamples( + sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay)); + + state.last_reverb_echo = 0.6f * info.diffusion * 0.01f; + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + const auto length = + FDN_MIN_DELAY_LINE_TIMES[i] + + (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]); + state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length)); + + const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() + + state.decay_delay_line0[i].GetDelay() + + state.decay_delay_line1[i].GetDelay(); + + float a = (-60.0f * static_cast<f32>(delay_sample_counts)) / + (info.decay_time * static_cast<f32>(info.sample_rate)); + float b = a / info.hf_decay_ratio; + float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) / + SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)); + float d = Pow10((b - a) / 40.0f); + float e = Pow10((b + a) / 40.0f) * 0.7071f; + + state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d); + state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d); + state.lpf_coefficients[2][i] = (c - d) / (c + d); + + state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo); + state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo); + } + + if (should_clear) { + for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) { + state.fdn_delay_line[i].Clear(); + state.decay_delay_line0[i].Clear(); + state.decay_delay_line1[i].Clear(); + } + state.early_delay_line.Clear(); + state.center_delay_line.Clear(); + } + + const auto max_early_delay = state.early_delay_line.GetMaxDelay(); + const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f); + for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) { + const auto length = AudioCommon::CalculateDelaySamples( + sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]); + state.early_tap_steps[tap] = std::min(length, max_early_delay); + } +} + void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume, s32 channel, s32 node_id) { const auto last = static_cast<s32>(last_volume * 32768.0f); diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h index b937350b17..2ebb755b04 100644 --- a/src/audio_core/command_generator.h +++ b/src/audio_core/command_generator.h @@ -21,6 +21,8 @@ class ServerMixInfo; class EffectContext; class EffectBase; struct AuxInfoDSP; +struct I3dl2ReverbParams; +struct I3dl2ReverbState; using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>; class CommandGenerator { @@ -80,6 +82,9 @@ private: s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, u32 sample_count, u32 read_offset, u32 read_count); + void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, + std::vector<u8>& work_buffer); + void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear); // DSP Code s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, s32 channel, std::size_t mix_offset); diff --git a/src/audio_core/common.h b/src/audio_core/common.h index ec59a3ba9e..fe546c55d0 100644 --- a/src/audio_core/common.h +++ b/src/audio_core/common.h @@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this // and our const ends up being 0x3f04, the 4 bytes are most // likely the sample history constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY; +constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f; +constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f; +constexpr std::size_t I3DL2REVERB_TAPS = 20; +constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4; +using Fractional = s32; + +template <typename T> +constexpr Fractional ToFractional(T x) { + return static_cast<Fractional>(x * static_cast<T>(0x4000)); +} + +constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) { + return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14); +} + +constexpr s32 FractionalToFixed(Fractional x) { + const auto s = x & (1 << 13); + return static_cast<s32>(x >> 14) + s; +} + +constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) { + return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time))); +} static constexpr u32 VersionFromRevision(u32_le rev) { // "REV7" -> 7 diff --git a/src/audio_core/delay_line.cpp b/src/audio_core/delay_line.cpp new file mode 100644 index 0000000000..f4e4dd8d29 --- /dev/null +++ b/src/audio_core/delay_line.cpp @@ -0,0 +1,104 @@ +#include <cstring> +#include "audio_core/delay_line.h" + +namespace AudioCore { +DelayLineBase::DelayLineBase() = default; +DelayLineBase::~DelayLineBase() = default; + +void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) { + buffer = src_buffer; + buffer_end = buffer + max_delay_; + max_delay = max_delay_; + output = buffer; + SetDelay(max_delay_); + Clear(); +} + +void DelayLineBase::SetDelay(s32 new_delay) { + if (max_delay < new_delay) { + return; + } + delay = new_delay; + input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1)); +} + +s32 DelayLineBase::GetDelay() const { + return delay; +} + +s32 DelayLineBase::GetMaxDelay() const { + return max_delay; +} + +f32 DelayLineBase::TapOut(s32 last_sample) { + const float* ptr = input - (last_sample + 1); + if (ptr < buffer) { + ptr += (max_delay + 1); + } + + return *ptr; +} + +f32 DelayLineBase::Tick(f32 sample) { + *(input++) = sample; + const auto out_sample = *(output++); + + if (buffer_end < input) { + input = buffer; + } + + if (buffer_end < output) { + output = buffer; + } + + return out_sample; +} + +float* DelayLineBase::GetInput() { + return input; +} + +const float* DelayLineBase::GetInput() const { + return input; +} + +f32 DelayLineBase::GetOutputSample() const { + return *output; +} + +void DelayLineBase::Clear() { + std::memset(buffer, 0, sizeof(float) * max_delay); +} + +void DelayLineBase::Reset() { + buffer = nullptr; + buffer_end = nullptr; + max_delay = 0; + input = nullptr; + output = nullptr; + delay = 0; +} + +DelayLineAllPass::DelayLineAllPass() = default; +DelayLineAllPass::~DelayLineAllPass() = default; + +void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) { + DelayLineBase::Initialize(delay_, src_buffer); + SetCoefficient(coeffcient_); +} + +void DelayLineAllPass::SetCoefficient(float coeffcient_) { + coefficient = coeffcient_; +} + +f32 DelayLineAllPass::Tick(f32 sample) { + const auto temp = sample - coefficient * *output; + return coefficient * temp + DelayLineBase::Tick(temp); +} + +void DelayLineAllPass::Reset() { + coefficient = 0.0f; + DelayLineBase::Reset(); +} + +} // namespace AudioCore diff --git a/src/audio_core/delay_line.h b/src/audio_core/delay_line.h new file mode 100644 index 0000000000..cafddd4328 --- /dev/null +++ b/src/audio_core/delay_line.h @@ -0,0 +1,46 @@ +#pragma once + +#include "common/common_types.h" + +namespace AudioCore { + +class DelayLineBase { +public: + DelayLineBase(); + ~DelayLineBase(); + + void Initialize(s32 max_delay_, float* src_buffer); + void SetDelay(s32 new_delay); + s32 GetDelay() const; + s32 GetMaxDelay() const; + f32 TapOut(s32 last_sample); + f32 Tick(f32 sample); + float* GetInput(); + const float* GetInput() const; + f32 GetOutputSample() const; + void Clear(); + void Reset(); + +protected: + float* buffer{nullptr}; + float* buffer_end{nullptr}; + s32 max_delay{}; + float* input{nullptr}; + float* output{nullptr}; + s32 delay{}; +}; + +class DelayLineAllPass final : public DelayLineBase { +public: + DelayLineAllPass(); + ~DelayLineAllPass(); + + void Initialize(u32 delay, float coeffcient_, f32* src_buffer); + void SetCoefficient(float coeffcient_); + f32 Tick(f32 sample); + void Reset(); + +private: + float coefficient{}; +}; +} // namespace AudioCore diff --git a/src/audio_core/effect_context.cpp b/src/audio_core/effect_context.cpp index f770b96082..89e4573c79 100644 --- a/src/audio_core/effect_context.cpp +++ b/src/audio_core/effect_context.cpp @@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const { return processing_order; } +std::vector<u8>& EffectBase::GetWorkBuffer() { + return work_buffer; +} + +const std::vector<u8>& EffectBase::GetWorkBuffer() const { + return work_buffer; +} + EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {} EffectI3dl2Reverb::~EffectI3dl2Reverb() = default; @@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) { usage = UsageState::Initialized; params.status = ParameterStatus::Initialized; skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0; + if (!skipped) { + auto& cur_work_buffer = GetWorkBuffer(); + // Has two buffers internally + cur_work_buffer.resize(in_params.buffer_size * 2); + std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0); + } } } @@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() { GetParams().status = ParameterStatus::Updated; } +I3dl2ReverbState& EffectI3dl2Reverb::GetState() { + return state; +} + +const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const { + return state; +} + EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {} EffectBiquadFilter::~EffectBiquadFilter() = default; diff --git a/src/audio_core/effect_context.h b/src/audio_core/effect_context.h index c5e0b398ce..5e0655dd7e 100644 --- a/src/audio_core/effect_context.h +++ b/src/audio_core/effect_context.h @@ -8,6 +8,7 @@ #include <memory> #include <vector> #include "audio_core/common.h" +#include "audio_core/delay_line.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/swap.h" @@ -194,6 +195,8 @@ public: [[nodiscard]] bool IsEnabled() const; [[nodiscard]] s32 GetMixID() const; [[nodiscard]] s32 GetProcessingOrder() const; + [[nodiscard]] std::vector<u8>& GetWorkBuffer(); + [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const; protected: UsageState usage{UsageState::Invalid}; @@ -201,6 +204,7 @@ protected: s32 mix_id{}; s32 processing_order{}; bool enabled = false; + std::vector<u8> work_buffer{}; }; template <typename T> @@ -212,7 +216,7 @@ public: return internal_params; } - const I3dl2ReverbParams& GetParams() const { + const T& GetParams() const { return internal_params; } @@ -229,6 +233,27 @@ public: void UpdateForCommandGeneration() override; }; +struct I3dl2ReverbState { + f32 lowpass_0{}; + f32 lowpass_1{}; + f32 lowpass_2{}; + + DelayLineBase early_delay_line{}; + std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{}; + f32 early_gain{}; + f32 late_gain{}; + + u32 early_to_late_taps{}; + std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{}; + std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{}; + std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{}; + f32 last_reverb_echo{}; + DelayLineBase center_delay_line{}; + std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{}; + std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{}; + f32 dry_gain{}; +}; + class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> { public: explicit EffectI3dl2Reverb(); @@ -237,8 +262,12 @@ public: void Update(EffectInfo::InParams& in_params) override; void UpdateForCommandGeneration() override; + I3dl2ReverbState& GetState(); + const I3dl2ReverbState& GetState() const; + private: bool skipped = false; + I3dl2ReverbState state{}; }; class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> { diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 5b0b285cdd..b0f6f0c346 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -111,7 +111,14 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) { sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); - core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer) - ns_late, release_event, {}); + const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer); + + // If ns_late is higher than the update rate ignore the delay + if (ns_late > buffer_release_ns) { + ns_late = {}; + } + + core_timing.ScheduleEvent(buffer_release_ns - ns_late, release_event, {}); } void Stream::ReleaseActiveBuffer(std::chrono::nanoseconds ns_late) { diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index bfd11e76da..788516dedc 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -167,8 +167,8 @@ add_library(common STATIC threadsafe_queue.h time_zone.cpp time_zone.h + tiny_mt.h tree.h - uint128.cpp uint128.h uuid.cpp uuid.h @@ -206,6 +206,8 @@ if (MSVC) else() target_compile_options(common PRIVATE -Werror + + $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation> ) endif() diff --git a/src/common/alignment.h b/src/common/alignment.h index fb81f10d8b..32d796ffa6 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -42,6 +42,11 @@ requires std::is_integral_v<T>[[nodiscard]] constexpr bool IsAligned(T value, si return (value & mask) == 0; } +template <typename T, typename U> +requires std::is_integral_v<T>[[nodiscard]] constexpr T DivideUp(T x, U y) { + return (x + (y - 1)) / y; +} + template <typename T, size_t Align = 16> class AlignmentAllocator { public: diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp index 4e1d874b53..66218fc21b 100644 --- a/src/common/cityhash.cpp +++ b/src/common/cityhash.cpp @@ -28,8 +28,10 @@ // compromising on hash quality. #include <algorithm> -#include <string.h> // for memcpy and memset -#include "cityhash.h" +#include <cstring> +#include <utility> + +#include "common/cityhash.h" #include "common/swap.h" // #include "config.h" @@ -42,21 +44,17 @@ using namespace std; -typedef uint8_t uint8; -typedef uint32_t uint32; -typedef uint64_t uint64; - namespace Common { -static uint64 UNALIGNED_LOAD64(const char* p) { - uint64 result; - memcpy(&result, p, sizeof(result)); +static u64 unaligned_load64(const char* p) { + u64 result; + std::memcpy(&result, p, sizeof(result)); return result; } -static uint32 UNALIGNED_LOAD32(const char* p) { - uint32 result; - memcpy(&result, p, sizeof(result)); +static u32 unaligned_load32(const char* p) { + u32 result; + std::memcpy(&result, p, sizeof(result)); return result; } @@ -76,64 +74,64 @@ static uint32 UNALIGNED_LOAD32(const char* p) { #endif #endif -static uint64 Fetch64(const char* p) { - return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +static u64 Fetch64(const char* p) { + return uint64_in_expected_order(unaligned_load64(p)); } -static uint32 Fetch32(const char* p) { - return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +static u32 Fetch32(const char* p) { + return uint32_in_expected_order(unaligned_load32(p)); } // Some primes between 2^63 and 2^64 for various uses. -static const uint64 k0 = 0xc3a5c85c97cb3127ULL; -static const uint64 k1 = 0xb492b66fbe98f273ULL; -static const uint64 k2 = 0x9ae16a3b2f90404fULL; +static constexpr u64 k0 = 0xc3a5c85c97cb3127ULL; +static constexpr u64 k1 = 0xb492b66fbe98f273ULL; +static constexpr u64 k2 = 0x9ae16a3b2f90404fULL; // Bitwise right rotate. Normally this will compile to a single // instruction, especially if the shift is a manifest constant. -static uint64 Rotate(uint64 val, int shift) { +static u64 Rotate(u64 val, int shift) { // Avoid shifting by 64: doing so yields an undefined result. return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); } -static uint64 ShiftMix(uint64 val) { +static u64 ShiftMix(u64 val) { return val ^ (val >> 47); } -static uint64 HashLen16(uint64 u, uint64 v) { - return Hash128to64(uint128(u, v)); +static u64 HashLen16(u64 u, u64 v) { + return Hash128to64(u128{u, v}); } -static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { +static u64 HashLen16(u64 u, u64 v, u64 mul) { // Murmur-inspired hashing. - uint64 a = (u ^ v) * mul; + u64 a = (u ^ v) * mul; a ^= (a >> 47); - uint64 b = (v ^ a) * mul; + u64 b = (v ^ a) * mul; b ^= (b >> 47); b *= mul; return b; } -static uint64 HashLen0to16(const char* s, std::size_t len) { +static u64 HashLen0to16(const char* s, size_t len) { if (len >= 8) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) + k2; - uint64 b = Fetch64(s + len - 8); - uint64 c = Rotate(b, 37) * mul + a; - uint64 d = (Rotate(a, 25) + b) * mul; + u64 mul = k2 + len * 2; + u64 a = Fetch64(s) + k2; + u64 b = Fetch64(s + len - 8); + u64 c = Rotate(b, 37) * mul + a; + u64 d = (Rotate(a, 25) + b) * mul; return HashLen16(c, d, mul); } if (len >= 4) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch32(s); + u64 mul = k2 + len * 2; + u64 a = Fetch32(s); return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); } if (len > 0) { - uint8 a = s[0]; - uint8 b = s[len >> 1]; - uint8 c = s[len - 1]; - uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); - uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2); + u8 a = s[0]; + u8 b = s[len >> 1]; + u8 c = s[len - 1]; + u32 y = static_cast<u32>(a) + (static_cast<u32>(b) << 8); + u32 z = static_cast<u32>(len) + (static_cast<u32>(c) << 2); return ShiftMix(y * k2 ^ z * k0) * k2; } return k2; @@ -141,22 +139,21 @@ static uint64 HashLen0to16(const char* s, std::size_t len) { // This probably works well for 16-byte strings as well, but it may be overkill // in that case. -static uint64 HashLen17to32(const char* s, std::size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k1; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 8) * mul; - uint64 d = Fetch64(s + len - 16) * k2; +static u64 HashLen17to32(const char* s, size_t len) { + u64 mul = k2 + len * 2; + u64 a = Fetch64(s) * k1; + u64 b = Fetch64(s + 8); + u64 c = Fetch64(s + len - 8) * mul; + u64 d = Fetch64(s + len - 16) * k2; return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); } // Return a 16-byte hash for 48 bytes. Quick and dirty. // Callers do best to use "random-looking" values for a and b. -static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, - uint64 b) { +static pair<u64, u64> WeakHashLen32WithSeeds(u64 w, u64 x, u64 y, u64 z, u64 a, u64 b) { a += w; b = Rotate(b + a + z, 21); - uint64 c = a; + u64 c = a; a += x; a += y; b += Rotate(a, 44); @@ -164,34 +161,34 @@ static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, } // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { +static pair<u64, u64> WeakHashLen32WithSeeds(const char* s, u64 a, u64 b) { return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, b); } // Return an 8-byte hash for 33 to 64 bytes. -static uint64 HashLen33to64(const char* s, std::size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k2; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 24); - uint64 d = Fetch64(s + len - 32); - uint64 e = Fetch64(s + 16) * k2; - uint64 f = Fetch64(s + 24) * 9; - uint64 g = Fetch64(s + len - 8); - uint64 h = Fetch64(s + len - 16) * mul; - uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; - uint64 v = ((a + g) ^ d) + f + 1; - uint64 w = swap64((u + v) * mul) + h; - uint64 x = Rotate(e + f, 42) + c; - uint64 y = (swap64((v + w) * mul) + g) * mul; - uint64 z = e + f + c; +static u64 HashLen33to64(const char* s, size_t len) { + u64 mul = k2 + len * 2; + u64 a = Fetch64(s) * k2; + u64 b = Fetch64(s + 8); + u64 c = Fetch64(s + len - 24); + u64 d = Fetch64(s + len - 32); + u64 e = Fetch64(s + 16) * k2; + u64 f = Fetch64(s + 24) * 9; + u64 g = Fetch64(s + len - 8); + u64 h = Fetch64(s + len - 16) * mul; + u64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; + u64 v = ((a + g) ^ d) + f + 1; + u64 w = swap64((u + v) * mul) + h; + u64 x = Rotate(e + f, 42) + c; + u64 y = (swap64((v + w) * mul) + g) * mul; + u64 z = e + f + c; a = swap64((x + z) * mul + y) + b; b = ShiftMix((z + a) * mul + d + h) * mul; return b + x; } -uint64 CityHash64(const char* s, std::size_t len) { +u64 CityHash64(const char* s, size_t len) { if (len <= 32) { if (len <= 16) { return HashLen0to16(s, len); @@ -204,15 +201,15 @@ uint64 CityHash64(const char* s, std::size_t len) { // For strings over 64 bytes we hash the end first, and then as we // loop we keep 56 bytes of state: v, w, x, y, and z. - uint64 x = Fetch64(s + len - 40); - uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); - uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); - pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); - pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + u64 x = Fetch64(s + len - 40); + u64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + u64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair<u64, u64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair<u64, u64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); x = x * k1 + Fetch64(s); // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~static_cast<std::size_t>(63); + len = (len - 1) & ~static_cast<size_t>(63); do { x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; @@ -229,21 +226,21 @@ uint64 CityHash64(const char* s, std::size_t len) { HashLen16(v.second, w.second) + x); } -uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) { +u64 CityHash64WithSeed(const char* s, size_t len, u64 seed) { return CityHash64WithSeeds(s, len, k2, seed); } -uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) { +u64 CityHash64WithSeeds(const char* s, size_t len, u64 seed0, u64 seed1) { return HashLen16(CityHash64(s, len) - seed0, seed1); } // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings // of any length representable in signed long. Based on City and Murmur. -static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) { - uint64 a = Uint128Low64(seed); - uint64 b = Uint128High64(seed); - uint64 c = 0; - uint64 d = 0; +static u128 CityMurmur(const char* s, size_t len, u128 seed) { + u64 a = seed[0]; + u64 b = seed[1]; + u64 c = 0; + u64 d = 0; signed long l = static_cast<long>(len) - 16; if (l <= 0) { // len <= 16 a = ShiftMix(a * k1) * k1; @@ -266,20 +263,20 @@ static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) { } a = HashLen16(a, c); b = HashLen16(d, b); - return uint128(a ^ b, HashLen16(b, a)); + return u128{a ^ b, HashLen16(b, a)}; } -uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) { +u128 CityHash128WithSeed(const char* s, size_t len, u128 seed) { if (len < 128) { return CityMurmur(s, len, seed); } // We expect len >= 128 to be the common case. Keep 56 bytes of state: // v, w, x, y, and z. - pair<uint64, uint64> v, w; - uint64 x = Uint128Low64(seed); - uint64 y = Uint128High64(seed); - uint64 z = len * k1; + pair<u64, u64> v, w; + u64 x = seed[0]; + u64 y = seed[1]; + u64 z = len * k1; v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); w.first = Rotate(y + z, 35) * k1 + x; @@ -313,7 +310,7 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) { w.first *= 9; v.first *= k0; // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. - for (std::size_t tail_done = 0; tail_done < len;) { + for (size_t tail_done = 0; tail_done < len;) { tail_done += 32; y = Rotate(x + y, 42) * k0 + v.second; w.first += Fetch64(s + len - tail_done + 16); @@ -328,13 +325,12 @@ uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) { // different 56-byte-to-8-byte hashes to get a 16-byte final result. x = HashLen16(x, v.first); y = HashLen16(y + z, w.first); - return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); + return u128{HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)}; } -uint128 CityHash128(const char* s, std::size_t len) { - return len >= 16 - ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) - : CityHash128WithSeed(s, len, uint128(k0, k1)); +u128 CityHash128(const char* s, size_t len) { + return len >= 16 ? CityHash128WithSeed(s + 16, len - 16, u128{Fetch64(s), Fetch64(s + 8) + k0}) + : CityHash128WithSeed(s, len, u128{k0, k1}); } } // namespace Common diff --git a/src/common/cityhash.h b/src/common/cityhash.h index a00804e01e..d74fc76393 100644 --- a/src/common/cityhash.h +++ b/src/common/cityhash.h @@ -62,49 +62,38 @@ #pragma once #include <cstddef> -#include <cstdint> -#include <utility> +#include "common/common_types.h" namespace Common { -using uint128 = std::pair<uint64_t, uint64_t>; - -[[nodiscard]] inline uint64_t Uint128Low64(const uint128& x) { - return x.first; -} -[[nodiscard]] inline uint64_t Uint128High64(const uint128& x) { - return x.second; -} - // Hash function for a byte array. -[[nodiscard]] uint64_t CityHash64(const char* buf, std::size_t len); +[[nodiscard]] u64 CityHash64(const char* buf, size_t len); // Hash function for a byte array. For convenience, a 64-bit seed is also // hashed into the result. -[[nodiscard]] uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed); +[[nodiscard]] u64 CityHash64WithSeed(const char* buf, size_t len, u64 seed); // Hash function for a byte array. For convenience, two seeds are also // hashed into the result. -[[nodiscard]] uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0, - uint64_t seed1); +[[nodiscard]] u64 CityHash64WithSeeds(const char* buf, size_t len, u64 seed0, u64 seed1); // Hash function for a byte array. -[[nodiscard]] uint128 CityHash128(const char* s, std::size_t len); +[[nodiscard]] u128 CityHash128(const char* s, size_t len); // Hash function for a byte array. For convenience, a 128-bit seed is also // hashed into the result. -[[nodiscard]] uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed); +[[nodiscard]] u128 CityHash128WithSeed(const char* s, size_t len, u128 seed); // Hash 128 input bits down to 64 bits of output. // This is intended to be a reasonably good hash function. -[[nodiscard]] inline uint64_t Hash128to64(const uint128& x) { +[[nodiscard]] inline u64 Hash128to64(const u128& x) { // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + const u64 mul = 0x9ddfea08eb382d69ULL; + u64 a = (x[0] ^ x[1]) * mul; a ^= (a >> 47); - uint64_t b = (Uint128High64(x) ^ a) * kMul; + u64 b = (x[1] ^ a) * mul; b ^= (b >> 47); - b *= kMul; + b *= mul; return b; } diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 4cba2aaa4e..7b614ad89f 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -141,27 +141,13 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st } std::string UTF16ToUTF8(const std::u16string& input) { -#ifdef _MSC_VER - // Workaround for missing char16_t/char32_t instantiations in MSVC2017 - std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; - std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend()); - return convert.to_bytes(tmp_buffer); -#else std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; return convert.to_bytes(input); -#endif } std::u16string UTF8ToUTF16(const std::string& input) { -#ifdef _MSC_VER - // Workaround for missing char16_t/char32_t instantiations in MSVC2017 - std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; - auto tmp_buffer = convert.from_bytes(input); - return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend()); -#else std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; return convert.from_bytes(input); -#endif } #ifdef _WIN32 diff --git a/src/common/tiny_mt.h b/src/common/tiny_mt.h new file mode 100644 index 0000000000..19ae5b7d6f --- /dev/null +++ b/src/common/tiny_mt.h @@ -0,0 +1,250 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "common/alignment.h" +#include "common/common_types.h" + +namespace Common { + +// Implementation of TinyMT (mersenne twister RNG). +// Like Nintendo, we will use the sample parameters. +class TinyMT { +public: + static constexpr std::size_t NumStateWords = 4; + + struct State { + std::array<u32, NumStateWords> data{}; + }; + +private: + static constexpr u32 ParamMat1 = 0x8F7011EE; + static constexpr u32 ParamMat2 = 0xFC78FF1F; + static constexpr u32 ParamTmat = 0x3793FDFF; + + static constexpr u32 ParamMult = 0x6C078965; + static constexpr u32 ParamPlus = 0x0019660D; + static constexpr u32 ParamXor = 0x5D588B65; + + static constexpr u32 TopBitmask = 0x7FFFFFFF; + + static constexpr int MinimumInitIterations = 8; + static constexpr int NumDiscardedInitOutputs = 8; + + static constexpr u32 XorByShifted27(u32 value) { + return value ^ (value >> 27); + } + + static constexpr u32 XorByShifted30(u32 value) { + return value ^ (value >> 30); + } + +private: + State state{}; + +private: + // Internal API. + void FinalizeInitialization() { + const u32 state0 = this->state.data[0] & TopBitmask; + const u32 state1 = this->state.data[1]; + const u32 state2 = this->state.data[2]; + const u32 state3 = this->state.data[3]; + + if (state0 == 0 && state1 == 0 && state2 == 0 && state3 == 0) { + this->state.data[0] = 'T'; + this->state.data[1] = 'I'; + this->state.data[2] = 'N'; + this->state.data[3] = 'Y'; + } + + for (int i = 0; i < NumDiscardedInitOutputs; i++) { + this->GenerateRandomU32(); + } + } + + u32 GenerateRandomU24() { + return (this->GenerateRandomU32() >> 8); + } + + static void GenerateInitialValuePlus(TinyMT::State* state, int index, u32 value) { + u32& state0 = state->data[(index + 0) % NumStateWords]; + u32& state1 = state->data[(index + 1) % NumStateWords]; + u32& state2 = state->data[(index + 2) % NumStateWords]; + u32& state3 = state->data[(index + 3) % NumStateWords]; + + const u32 x = XorByShifted27(state0 ^ state1 ^ state3) * ParamPlus; + const u32 y = x + index + value; + + state0 = y; + state1 += x; + state2 += y; + } + + static void GenerateInitialValueXor(TinyMT::State* state, int index) { + u32& state0 = state->data[(index + 0) % NumStateWords]; + u32& state1 = state->data[(index + 1) % NumStateWords]; + u32& state2 = state->data[(index + 2) % NumStateWords]; + u32& state3 = state->data[(index + 3) % NumStateWords]; + + const u32 x = XorByShifted27(state0 + state1 + state3) * ParamXor; + const u32 y = x - index; + + state0 = y; + state1 ^= x; + state2 ^= y; + } + +public: + constexpr TinyMT() = default; + + // Public API. + + // Initialization. + void Initialize(u32 seed) { + this->state.data[0] = seed; + this->state.data[1] = ParamMat1; + this->state.data[2] = ParamMat2; + this->state.data[3] = ParamTmat; + + for (int i = 1; i < MinimumInitIterations; i++) { + const u32 mixed = XorByShifted30(this->state.data[(i - 1) % NumStateWords]); + this->state.data[i % NumStateWords] ^= mixed * ParamMult + i; + } + + this->FinalizeInitialization(); + } + + void Initialize(const u32* seed, int seed_count) { + this->state.data[0] = 0; + this->state.data[1] = ParamMat1; + this->state.data[2] = ParamMat2; + this->state.data[3] = ParamTmat; + + { + const int num_init_iterations = std::max(seed_count + 1, MinimumInitIterations) - 1; + + GenerateInitialValuePlus(&this->state, 0, seed_count); + + for (int i = 0; i < num_init_iterations; i++) { + GenerateInitialValuePlus(&this->state, (i + 1) % NumStateWords, + (i < seed_count) ? seed[i] : 0); + } + + for (int i = 0; i < static_cast<int>(NumStateWords); i++) { + GenerateInitialValueXor(&this->state, + (i + 1 + num_init_iterations) % NumStateWords); + } + } + + this->FinalizeInitialization(); + } + + // State management. + void GetState(TinyMT::State& out) const { + out.data = this->state.data; + } + + void SetState(const TinyMT::State& state_) { + this->state.data = state_.data; + } + + // Random generation. + void GenerateRandomBytes(void* dst, std::size_t size) { + const uintptr_t start = reinterpret_cast<uintptr_t>(dst); + const uintptr_t end = start + size; + const uintptr_t aligned_start = Common::AlignUp(start, 4); + const uintptr_t aligned_end = Common::AlignDown(end, 4); + + // Make sure we're aligned. + if (start < aligned_start) { + const u32 rnd = this->GenerateRandomU32(); + std::memcpy(dst, &rnd, aligned_start - start); + } + + // Write as many aligned u32s as we can. + { + u32* cur_dst = reinterpret_cast<u32*>(aligned_start); + u32* const end_dst = reinterpret_cast<u32*>(aligned_end); + + while (cur_dst < end_dst) { + *(cur_dst++) = this->GenerateRandomU32(); + } + } + + // Handle any leftover unaligned data. + if (aligned_end < end) { + const u32 rnd = this->GenerateRandomU32(); + std::memcpy(reinterpret_cast<void*>(aligned_end), &rnd, end - aligned_end); + } + } + + u32 GenerateRandomU32() { + // Advance state. + const u32 x0 = + (this->state.data[0] & TopBitmask) ^ this->state.data[1] ^ this->state.data[2]; + const u32 y0 = this->state.data[3]; + const u32 x1 = x0 ^ (x0 << 1); + const u32 y1 = y0 ^ (y0 >> 1) ^ x1; + + const u32 state0 = this->state.data[1]; + u32 state1 = this->state.data[2]; + u32 state2 = x1 ^ (y1 << 10); + const u32 state3 = y1; + + if ((y1 & 1) != 0) { + state1 ^= ParamMat1; + state2 ^= ParamMat2; + } + + this->state.data[0] = state0; + this->state.data[1] = state1; + this->state.data[2] = state2; + this->state.data[3] = state3; + + // Temper. + const u32 t1 = state0 + (state2 >> 8); + u32 t0 = state3 ^ t1; + + if ((t1 & 1) != 0) { + t0 ^= ParamTmat; + } + + return t0; + } + + u64 GenerateRandomU64() { + const u32 lo = this->GenerateRandomU32(); + const u32 hi = this->GenerateRandomU32(); + return (u64{hi} << 32) | u64{lo}; + } + + float GenerateRandomF32() { + // Floats have 24 bits of mantissa. + constexpr u32 MantissaBits = 24; + return static_cast<float>(GenerateRandomU24()) * (1.0f / (1U << MantissaBits)); + } + + double GenerateRandomF64() { + // Doubles have 53 bits of mantissa. + // The smart way to generate 53 bits of random would be to use 32 bits + // from the first rnd32() call, and then 21 from the second. + // Nintendo does not. They use (32 - 5) = 27 bits from the first rnd32() + // call, and (32 - 6) bits from the second. We'll do what they do, but + // There's not a clear reason why. + constexpr u32 MantissaBits = 53; + constexpr u32 Shift1st = (64 - MantissaBits) / 2; + constexpr u32 Shift2nd = (64 - MantissaBits) - Shift1st; + + const u32 first = (this->GenerateRandomU32() >> Shift1st); + const u32 second = (this->GenerateRandomU32() >> Shift2nd); + + return (1.0 * first * (u64{1} << (32 - Shift2nd)) + second) * + (1.0 / (u64{1} << MantissaBits)); + } +}; + +} // namespace Common diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp deleted file mode 100644 index 16bf7c8283..0000000000 --- a/src/common/uint128.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#ifdef _MSC_VER -#include <intrin.h> - -#pragma intrinsic(_umul128) -#pragma intrinsic(_udiv128) -#endif -#include <cstring> -#include "common/uint128.h" - -namespace Common { - -#ifdef _MSC_VER - -u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { - u128 r{}; - r[0] = _umul128(a, b, &r[1]); - u64 remainder; -#if _MSC_VER < 1923 - return udiv128(r[1], r[0], d, &remainder); -#else - return _udiv128(r[1], r[0], d, &remainder); -#endif -} - -#else - -u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { - const u64 diva = a / d; - const u64 moda = a % d; - const u64 divb = b / d; - const u64 modb = b % d; - return diva * b + moda * divb + moda * modb / d; -} - -#endif - -u128 Multiply64Into128(u64 a, u64 b) { - u128 result; -#ifdef _MSC_VER - result[0] = _umul128(a, b, &result[1]); -#else - unsigned __int128 tmp = a; - tmp *= b; - std::memcpy(&result, &tmp, sizeof(u128)); -#endif - return result; -} - -std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) { - u64 remainder = dividend[0] % divisor; - u64 accum = dividend[0] / divisor; - if (dividend[1] == 0) - return {accum, remainder}; - // We ignore dividend[1] / divisor as that overflows - const u64 first_segment = (dividend[1] % divisor) << 32; - accum += (first_segment / divisor) << 32; - const u64 second_segment = (first_segment % divisor) << 32; - accum += (second_segment / divisor); - remainder += second_segment % divisor; - if (remainder >= divisor) { - accum++; - remainder -= divisor; - } - return {accum, remainder}; -} - -} // namespace Common diff --git a/src/common/uint128.h b/src/common/uint128.h index 969259ab6c..4780b2f9d3 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -4,19 +4,118 @@ #pragma once +#include <cstring> #include <utility> + +#ifdef _MSC_VER +#include <intrin.h> +#pragma intrinsic(__umulh) +#pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128) +#else +#include <x86intrin.h> +#endif + #include "common/common_types.h" namespace Common { // This function multiplies 2 u64 values and divides it by a u64 value. -[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); +[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { +#ifdef _MSC_VER + u128 r{}; + r[0] = _umul128(a, b, &r[1]); + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], d, &remainder); +#else + return _udiv128(r[1], r[0], d, &remainder); +#endif +#else + const u64 diva = a / d; + const u64 moda = a % d; + const u64 divb = b / d; + const u64 modb = b % d; + return diva * b + moda * divb + moda * modb / d; +#endif +} // This function multiplies 2 u64 values and produces a u128 value; -[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b); +[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) { + u128 result; +#ifdef _MSC_VER + result[0] = _umul128(a, b, &result[1]); +#else + unsigned __int128 tmp = a; + tmp *= b; + std::memcpy(&result, &tmp, sizeof(u128)); +#endif + return result; +} + +[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { +#ifdef __SIZEOF_INT128__ + const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; + return static_cast<u64>(base / divisor); +#elif defined(_M_X64) || defined(_M_ARM64) + std::array<u64, 2> r = {0, numerator}; + u64 remainder; +#if _MSC_VER < 1923 + return udiv128(r[1], r[0], divisor, &remainder); +#else + return _udiv128(r[1], r[0], divisor, &remainder); +#endif +#else + // This one is bit more inaccurate. + return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); +#endif +} + +[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) { +#ifdef __SIZEOF_INT128__ + return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; +#elif defined(_M_X64) || defined(_M_ARM64) + return __umulh(a, b); // MSVC +#else + // Generic fallback + const u64 a_lo = u32(a); + const u64 a_hi = a >> 32; + const u64 b_lo = u32(b); + const u64 b_hi = b >> 32; + + const u64 a_x_b_hi = a_hi * b_hi; + const u64 a_x_b_mid = a_hi * b_lo; + const u64 b_x_a_mid = b_hi * a_lo; + const u64 a_x_b_lo = a_lo * b_lo; + + const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + + static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> + 32; + + const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +#endif +} // This function divides a u128 by a u32 value and produces two u64 values: // the result of division and the remainder -[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor); +[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) { + u64 remainder = dividend[0] % divisor; + u64 accum = dividend[0] / divisor; + if (dividend[1] == 0) + return {accum, remainder}; + // We ignore dividend[1] / divisor as that overflows + const u64 first_segment = (dividend[1] % divisor) << 32; + accum += (first_segment / divisor) << 32; + const u64 second_segment = (first_segment % divisor) << 32; + accum += (second_segment / divisor); + remainder += second_segment % divisor; + if (remainder >= divisor) { + accum++; + remainder -= divisor; + } + return {accum, remainder}; +} } // namespace Common diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index a8c143f853..49830b8ab6 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstdint> + #include "common/uint128.h" #include "common/wall_clock.h" diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index a65f6b832c..87de406240 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -8,68 +8,10 @@ #include <mutex> #include <thread> -#ifdef _MSC_VER -#include <intrin.h> - -#pragma intrinsic(__umulh) -#pragma intrinsic(_udiv128) -#else -#include <x86intrin.h> -#endif - #include "common/atomic_ops.h" #include "common/uint128.h" #include "common/x64/native_clock.h" -namespace { - -[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { -#ifdef __SIZEOF_INT128__ - const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; - return static_cast<u64>(base / divisor); -#elif defined(_M_X64) || defined(_M_ARM64) - std::array<u64, 2> r = {0, numerator}; - u64 remainder; -#if _MSC_VER < 1923 - return udiv128(r[1], r[0], divisor, &remainder); -#else - return _udiv128(r[1], r[0], divisor, &remainder); -#endif -#else - // This one is bit more inaccurate. - return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); -#endif -} - -[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { -#ifdef __SIZEOF_INT128__ - return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; -#elif defined(_M_X64) || defined(_M_ARM64) - return __umulh(a, b); // MSVC -#else - // Generic fallback - const u64 a_lo = u32(a); - const u64 a_hi = a >> 32; - const u64 b_lo = u32(b); - const u64 b_hi = b >> 32; - - const u64 a_x_b_hi = a_hi * b_hi; - const u64 a_x_b_mid = a_hi * b_lo; - const u64 b_x_a_mid = b_hi * a_lo; - const u64 a_x_b_lo = a_lo * b_lo; - - const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + - static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> - 32; - - const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; - - return multhi; -#endif -} - -} // namespace - namespace Common { u64 EstimateRDTSCFrequency() { diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 386d7bddf4..17f251c37b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -19,7 +19,6 @@ add_library(core STATIC core.h core_timing.cpp core_timing.h - core_timing_util.cpp core_timing_util.h cpu_manager.cpp cpu_manager.h @@ -148,7 +147,7 @@ add_library(core STATIC hle/kernel/client_session.h hle/kernel/code_set.cpp hle/kernel/code_set.h - hle/kernel/errors.h + hle/kernel/svc_results.h hle/kernel/global_scheduler_context.cpp hle/kernel/global_scheduler_context.h hle/kernel/handle_table.cpp @@ -157,6 +156,8 @@ add_library(core STATIC hle/kernel/hle_ipc.h hle/kernel/k_address_arbiter.cpp hle/kernel/k_address_arbiter.h + hle/kernel/k_address_space_info.cpp + hle/kernel/k_address_space_info.h hle/kernel/k_affinity_mask.h hle/kernel/k_condition_variable.cpp hle/kernel/k_condition_variable.h @@ -165,6 +166,18 @@ add_library(core STATIC hle/kernel/k_light_condition_variable.h hle/kernel/k_light_lock.cpp hle/kernel/k_light_lock.h + hle/kernel/k_memory_block.h + hle/kernel/k_memory_block_manager.cpp + hle/kernel/k_memory_block_manager.h + hle/kernel/k_memory_layout.h + hle/kernel/k_memory_manager.cpp + hle/kernel/k_memory_manager.h + hle/kernel/k_page_bitmap.h + hle/kernel/k_page_heap.cpp + hle/kernel/k_page_heap.h + hle/kernel/k_page_linked_list.h + hle/kernel/k_page_table.cpp + hle/kernel/k_page_table.h hle/kernel/k_priority_queue.h hle/kernel/k_readable_event.cpp hle/kernel/k_readable_event.h @@ -174,9 +187,17 @@ add_library(core STATIC hle/kernel/k_scheduler.h hle/kernel/k_scheduler_lock.h hle/kernel/k_scoped_lock.h + hle/kernel/k_scoped_resource_reservation.h hle/kernel/k_scoped_scheduler_lock_and_sleep.h + hle/kernel/k_shared_memory.cpp + hle/kernel/k_shared_memory.h + hle/kernel/k_slab_heap.h + hle/kernel/k_spin_lock.cpp + hle/kernel/k_spin_lock.h hle/kernel/k_synchronization_object.cpp hle/kernel/k_synchronization_object.h + hle/kernel/k_system_control.cpp + hle/kernel/k_system_control.h hle/kernel/k_thread.cpp hle/kernel/k_thread.h hle/kernel/k_thread_queue.h @@ -184,23 +205,7 @@ add_library(core STATIC hle/kernel/k_writable_event.h hle/kernel/kernel.cpp hle/kernel/kernel.h - hle/kernel/memory/address_space_info.cpp - hle/kernel/memory/address_space_info.h - hle/kernel/memory/memory_block.h - hle/kernel/memory/memory_block_manager.cpp - hle/kernel/memory/memory_block_manager.h - hle/kernel/memory/memory_layout.h - hle/kernel/memory/memory_manager.cpp - hle/kernel/memory/memory_manager.h - hle/kernel/memory/memory_types.h - hle/kernel/memory/page_linked_list.h - hle/kernel/memory/page_heap.cpp - hle/kernel/memory/page_heap.h - hle/kernel/memory/page_table.cpp - hle/kernel/memory/page_table.h - hle/kernel/memory/slab_heap.h - hle/kernel/memory/system_control.cpp - hle/kernel/memory/system_control.h + hle/kernel/memory_types.h hle/kernel/object.cpp hle/kernel/object.h hle/kernel/physical_core.cpp @@ -218,12 +223,9 @@ add_library(core STATIC hle/kernel/service_thread.h hle/kernel/session.cpp hle/kernel/session.h - hle/kernel/shared_memory.cpp - hle/kernel/shared_memory.h hle/kernel/svc.cpp hle/kernel/svc.h hle/kernel/svc_common.h - hle/kernel/svc_results.h hle/kernel/svc_types.h hle/kernel/svc_wrap.h hle/kernel/time_manager.cpp @@ -266,6 +268,7 @@ add_library(core STATIC hle/service/am/applets/software_keyboard.h hle/service/am/applets/web_browser.cpp hle/service/am/applets/web_browser.h + hle/service/am/applets/web_types.h hle/service/am/idle.cpp hle/service/am/idle.h hle/service/am/omm.cpp @@ -400,6 +403,7 @@ add_library(core STATIC hle/service/hid/controllers/xpad.h hle/service/lbl/lbl.cpp hle/service/lbl/lbl.h + hle/service/ldn/errors.h hle/service/ldn/ldn.cpp hle/service/ldn/ldn.h hle/service/ldr/ldr.cpp @@ -653,6 +657,8 @@ else() $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> + $<$<CXX_COMPILER_ID:Clang>:-fsized-deallocation> + -Wno-sign-conversion ) endif() diff --git a/src/core/core.cpp b/src/core/core.cpp index 30f5e11280..de6305e2a4 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -308,6 +308,9 @@ struct System::Impl { // Close all CPU/threading state cpu_manager.Shutdown(); + // Release the Time Manager's resources + time_manager.Shutdown(); + // Shutdown kernel and core timing core_timing.Shutdown(); kernel.Shutdown(); diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp deleted file mode 100644 index 8ce8e602e5..0000000000 --- a/src/core/core_timing_util.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "core/core_timing_util.h" - -#include <cinttypes> -#include <limits> -#include "common/logging/log.h" -#include "common/uint128.h" -#include "core/hardware_properties.h" - -namespace Core::Timing { - -constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE; - -s64 msToCycles(std::chrono::milliseconds ms) { - if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) { - LOG_ERROR(Core_Timing, "Integer overflow, use max value"); - return std::numeric_limits<s64>::max(); - } - if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) { - LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000); - } - return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000; -} - -s64 usToCycles(std::chrono::microseconds us) { - if (static_cast<u64>(us.count() / 1000000) > MAX_VALUE_TO_MULTIPLY) { - LOG_ERROR(Core_Timing, "Integer overflow, use max value"); - return std::numeric_limits<s64>::max(); - } - if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) { - LOG_DEBUG(Core_Timing, "Time very big, do rounding"); - return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000); - } - return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000; -} - -s64 nsToCycles(std::chrono::nanoseconds ns) { - const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE); - return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first; -} - -u64 msToClockCycles(std::chrono::milliseconds ns) { - const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); - return Common::Divide128On32(temp, 1000).first; -} - -u64 usToClockCycles(std::chrono::microseconds ns) { - const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); - return Common::Divide128On32(temp, 1000000).first; -} - -u64 nsToClockCycles(std::chrono::nanoseconds ns) { - const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); - return Common::Divide128On32(temp, 1000000000).first; -} - -u64 CpuCyclesToClockCycles(u64 ticks) { - const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ); - return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; -} - -std::chrono::milliseconds CyclesToMs(s64 cycles) { - const u128 temporal = Common::Multiply64Into128(cycles, 1000); - u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; - return std::chrono::milliseconds(ms); -} - -std::chrono::nanoseconds CyclesToNs(s64 cycles) { - const u128 temporal = Common::Multiply64Into128(cycles, 1000000000); - u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; - return std::chrono::nanoseconds(ns); -} - -std::chrono::microseconds CyclesToUs(s64 cycles) { - const u128 temporal = Common::Multiply64Into128(cycles, 1000000); - u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; - return std::chrono::microseconds(us); -} - -} // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index e4a046bf93..14c36a4854 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -1,24 +1,59 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include <chrono> + #include "common/common_types.h" +#include "core/hardware_properties.h" namespace Core::Timing { -s64 msToCycles(std::chrono::milliseconds ms); -s64 usToCycles(std::chrono::microseconds us); -s64 nsToCycles(std::chrono::nanoseconds ns); -u64 msToClockCycles(std::chrono::milliseconds ns); -u64 usToClockCycles(std::chrono::microseconds ns); -u64 nsToClockCycles(std::chrono::nanoseconds ns); -std::chrono::milliseconds CyclesToMs(s64 cycles); -std::chrono::nanoseconds CyclesToNs(s64 cycles); -std::chrono::microseconds CyclesToUs(s64 cycles); - -u64 CpuCyclesToClockCycles(u64 ticks); +namespace detail { +constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000; +constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000; +} // namespace detail + +[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) { + return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED; +} + +[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) { + return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000; +} + +[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) { + return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000; +} + +[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) { + return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED; +} + +[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) { + return us.count() * detail::CNTFREQ_ADJUSTED / 1000; +} + +[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) { + return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000; +} + +[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) { + return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED; +} + +[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) { + return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED); +} + +[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) { + return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED); +} + +[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) { + return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED); +} } // namespace Core::Timing diff --git a/src/core/frontend/applets/controller.h b/src/core/frontend/applets/controller.h index dff71d8d98..b0626a0f9c 100644 --- a/src/core/frontend/applets/controller.h +++ b/src/core/frontend/applets/controller.h @@ -31,6 +31,7 @@ struct ControllerParameters { bool allow_dual_joycons{}; bool allow_left_joycon{}; bool allow_right_joycon{}; + bool allow_gamecube_controller{}; }; class ControllerApplet { diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index f8f005f155..0b6957e317 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp @@ -4,11 +4,11 @@ #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/server_port.h" #include "core/hle/kernel/session.h" +#include "core/hle/kernel/svc_results.h" namespace Kernel { @@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const { ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { if (active_sessions >= max_sessions) { - return ERR_MAX_CONNECTIONS_REACHED; + return ResultMaxConnectionsReached; } active_sessions++; diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index a2be1a8f6a..e230f365ad 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp @@ -3,11 +3,11 @@ // Refer to the license.txt file included. #include "core/hle/kernel/client_session.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/session.h" +#include "core/hle/kernel/svc_results.h" #include "core/hle/result.h" namespace Kernel { @@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread, Core::Timing::CoreTiming& core_timing) { // Keep ServerSession alive until we're done working with it. if (!parent->Server()) { - return ERR_SESSION_CLOSED_BY_REMOTE; + return ResultSessionClosedByRemote; } // Signal the server session that new data is available diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h deleted file mode 100644 index 7d32a39f06..0000000000 --- a/src/core/hle/kernel/errors.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "core/hle/result.h" - -namespace Kernel { - -// Confirmed Switch kernel error codes - -constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; -constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; -constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59}; -constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59}; -constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; -constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; -constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; -constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; -constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; -constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; -constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106}; -constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; -constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110}; -constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113}; -constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112}; -constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114}; -constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115}; -constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116}; -constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117}; -constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118}; -constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118}; -constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119}; -constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120}; -constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121}; -constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122}; -constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123}; -constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125}; -constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126}; -constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132}; - -} // namespace Kernel diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp index 1a2fa9cd83..f96d340788 100644 --- a/src/core/hle/kernel/handle_table.cpp +++ b/src/core/hle/kernel/handle_table.cpp @@ -6,12 +6,12 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/svc_results.h" namespace Kernel { namespace { @@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default; ResultCode HandleTable::SetSize(s32 handle_table_size) { if (static_cast<u32>(handle_table_size) > MAX_COUNT) { LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT); - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } // Values less than or equal to zero indicate to use the maximum allowable @@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) { const u16 slot = next_free_slot; if (slot >= table_size) { LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); - return ERR_HANDLE_TABLE_FULL; + return ResultHandleTableFull; } next_free_slot = generations[slot]; @@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { std::shared_ptr<Object> object = GetGeneric(handle); if (object == nullptr) { LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } return Create(std::move(object)); } @@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { ResultCode HandleTable::Close(Handle handle) { if (!IsValid(handle)) { LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } const u16 slot = GetSlot(handle); diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 7ec62cf184..161d9f7826 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -14,7 +14,6 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/hle_ipc.h" #include "core/hle/kernel/k_readable_event.h" @@ -26,6 +25,7 @@ #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/server_session.h" +#include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/time_manager.h" #include "core/memory.h" diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index d0e90fd60e..7018f56da4 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 s32 user_value{}; if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) { LOG_ERROR(Kernel, "Invalid current memory!"); - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } if (user_value != value) { - return Svc::ResultInvalidState; + return ResultInvalidState; } auto it = thread_tree.nfind_light({addr, -1}); @@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 if (!succeeded) { LOG_ERROR(Kernel, "Invalid current memory!"); - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } if (user_value != value) { - return Svc::ResultInvalidState; + return ResultInvalidState; } while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && @@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement // Check that the thread isn't terminating. if (cur_thread->IsTerminationRequested()) { slp.CancelSleep(); - return Svc::ResultTerminationRequested; + return ResultTerminationRequested; } // Set the synced object. - cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + cur_thread->SetSyncedObject(nullptr, ResultTimedOut); // Read the value from userspace. s32 user_value{}; @@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement if (!succeeded) { slp.CancelSleep(); - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } // Check that the value is less than the specified one. if (user_value >= value) { slp.CancelSleep(); - return Svc::ResultInvalidState; + return ResultInvalidState; } // Check that the timeout is non-zero. if (timeout == 0) { slp.CancelSleep(); - return Svc::ResultTimedOut; + return ResultTimedOut; } // Set the arbiter. @@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) { // Check that the thread isn't terminating. if (cur_thread->IsTerminationRequested()) { slp.CancelSleep(); - return Svc::ResultTerminationRequested; + return ResultTerminationRequested; } // Set the synced object. - cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + cur_thread->SetSyncedObject(nullptr, ResultTimedOut); // Read the value from userspace. s32 user_value{}; if (!ReadFromUser(system, &user_value, addr)) { slp.CancelSleep(); - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } // Check that the value is equal. if (value != user_value) { slp.CancelSleep(); - return Svc::ResultInvalidState; + return ResultInvalidState; } // Check that the timeout is non-zero. if (timeout == 0) { slp.CancelSleep(); - return Svc::ResultTimedOut; + return ResultTimedOut; } // Set the arbiter. diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp index 6cf43ba247..24944d15b6 100644 --- a/src/core/hle/kernel/memory/address_space_info.cpp +++ b/src/core/hle/kernel/k_address_space_info.cpp @@ -2,15 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - #include <array> #include "common/assert.h" -#include "core/hle/kernel/memory/address_space_info.h" +#include "core/hle/kernel/k_address_space_info.h" -namespace Kernel::Memory { +namespace Kernel { namespace { @@ -28,20 +25,20 @@ enum : u64 { }; // clang-format off -constexpr std::array<AddressSpaceInfo, 13> AddressSpaceInfos{{ - { .bit_width = 32, .address = Size_2_MB , .size = Size_1_GB - Size_2_MB , .type = AddressSpaceInfo::Type::Is32Bit, }, - { .bit_width = 32, .address = Size_1_GB , .size = Size_4_GB - Size_1_GB , .type = AddressSpaceInfo::Type::Small64Bit, }, - { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = AddressSpaceInfo::Type::Heap, }, - { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = AddressSpaceInfo::Type::Alias, }, - { .bit_width = 36, .address = Size_128_MB, .size = Size_2_GB - Size_128_MB, .type = AddressSpaceInfo::Type::Is32Bit, }, - { .bit_width = 36, .address = Size_2_GB , .size = Size_64_GB - Size_2_GB , .type = AddressSpaceInfo::Type::Small64Bit, }, - { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Heap, }, - { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Alias, }, - { .bit_width = 39, .address = Size_128_MB, .size = Size_512_GB - Size_128_MB, .type = AddressSpaceInfo::Type::Large64Bit, }, - { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = AddressSpaceInfo::Type::Is32Bit }, - { .bit_width = 39, .address = Invalid , .size = Size_6_GB , .type = AddressSpaceInfo::Type::Heap, }, - { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = AddressSpaceInfo::Type::Alias, }, - { .bit_width = 39, .address = Invalid , .size = Size_2_GB , .type = AddressSpaceInfo::Type::Stack, }, +constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{ + { .bit_width = 32, .address = Size_2_MB , .size = Size_1_GB - Size_2_MB , .type = KAddressSpaceInfo::Type::MapSmall, }, + { .bit_width = 32, .address = Size_1_GB , .size = Size_4_GB - Size_1_GB , .type = KAddressSpaceInfo::Type::MapLarge, }, + { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = KAddressSpaceInfo::Type::Heap, }, + { .bit_width = 32, .address = Invalid , .size = Size_1_GB , .type = KAddressSpaceInfo::Type::Alias, }, + { .bit_width = 36, .address = Size_128_MB, .size = Size_2_GB - Size_128_MB, .type = KAddressSpaceInfo::Type::MapSmall, }, + { .bit_width = 36, .address = Size_2_GB , .size = Size_64_GB - Size_2_GB , .type = KAddressSpaceInfo::Type::MapLarge, }, + { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, }, + { .bit_width = 36, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Alias, }, + { .bit_width = 39, .address = Size_128_MB, .size = Size_512_GB - Size_128_MB, .type = KAddressSpaceInfo::Type::Map39Bit, }, + { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = KAddressSpaceInfo::Type::MapSmall }, + { .bit_width = 39, .address = Invalid , .size = Size_6_GB , .type = KAddressSpaceInfo::Type::Heap, }, + { .bit_width = 39, .address = Invalid , .size = Size_64_GB , .type = KAddressSpaceInfo::Type::Alias, }, + { .bit_width = 39, .address = Invalid , .size = Size_2_GB , .type = KAddressSpaceInfo::Type::Stack, }, }}; // clang-format on @@ -49,7 +46,8 @@ constexpr bool IsAllowedIndexForAddress(std::size_t index) { return index < AddressSpaceInfos.size() && AddressSpaceInfos[index].address != Invalid; } -using IndexArray = std::array<std::size_t, static_cast<std::size_t>(AddressSpaceInfo::Type::Count)>; +using IndexArray = + std::array<std::size_t, static_cast<std::size_t>(KAddressSpaceInfo::Type::Count)>; constexpr IndexArray AddressSpaceIndices32Bit{ 0, 1, 0, 2, 0, 3, @@ -63,23 +61,23 @@ constexpr IndexArray AddressSpaceIndices39Bit{ 9, 8, 8, 10, 12, 11, }; -constexpr bool IsAllowed32BitType(AddressSpaceInfo::Type type) { - return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit && - type != AddressSpaceInfo::Type::Stack; +constexpr bool IsAllowed32BitType(KAddressSpaceInfo::Type type) { + return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::Map39Bit && + type != KAddressSpaceInfo::Type::Stack; } -constexpr bool IsAllowed36BitType(AddressSpaceInfo::Type type) { - return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Large64Bit && - type != AddressSpaceInfo::Type::Stack; +constexpr bool IsAllowed36BitType(KAddressSpaceInfo::Type type) { + return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::Map39Bit && + type != KAddressSpaceInfo::Type::Stack; } -constexpr bool IsAllowed39BitType(AddressSpaceInfo::Type type) { - return type < AddressSpaceInfo::Type::Count && type != AddressSpaceInfo::Type::Small64Bit; +constexpr bool IsAllowed39BitType(KAddressSpaceInfo::Type type) { + return type < KAddressSpaceInfo::Type::Count && type != KAddressSpaceInfo::Type::MapLarge; } } // namespace -u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) { +u64 KAddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) { const std::size_t index{static_cast<std::size_t>(type)}; switch (width) { case 32: @@ -99,7 +97,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) { return 0; } -std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { +std::size_t KAddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { const std::size_t index{static_cast<std::size_t>(type)}; switch (width) { case 32: @@ -116,4 +114,4 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) return 0; } -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/address_space_info.h b/src/core/hle/kernel/k_address_space_info.h index a4e6e91e57..06f31c6d58 100644 --- a/src/core/hle/kernel/memory/address_space_info.h +++ b/src/core/hle/kernel/k_address_space_info.h @@ -2,20 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - #pragma once #include "common/common_types.h" -namespace Kernel::Memory { +namespace Kernel { -struct AddressSpaceInfo final { +struct KAddressSpaceInfo final { enum class Type : u32 { - Is32Bit = 0, - Small64Bit = 1, - Large64Bit = 2, + MapSmall = 0, + MapLarge = 1, + Map39Bit = 2, Heap = 3, Stack = 4, Alias = 5, @@ -31,4 +28,4 @@ struct AddressSpaceInfo final { const Type type{}; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index f0ad8b3901..170d8fa0df 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) { // Write the value to userspace. if (!WriteToUser(system, addr, std::addressof(next_value))) { if (next_owner_thread) { - next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); + next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory); } - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } } @@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); // Check if the thread should terminate. - R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested); + R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested); { // Read the tag from userspace. u32 test_tag{}; R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), - Svc::ResultInvalidCurrentMemory); + ResultInvalidCurrentMemory); // If the tag isn't the handle (with wait mask), we're done. R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); // Get the lock owner thread. owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle); - R_UNLESS(owner_thread, Svc::ResultInvalidHandle); + R_UNLESS(owner_thread, ResultInvalidHandle); // Update the lock. cur_thread->SetAddressKey(addr, value); @@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) { thread_to_close = owner_thread.get(); } else { // The lock was tagged with a thread that doesn't exist. - thread->SetSyncedObject(nullptr, Svc::ResultInvalidState); + thread->SetSyncedObject(nullptr, ResultInvalidState); thread->Wakeup(); } } } else { // If the address wasn't accessible, note so. - thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); + thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory); thread->Wakeup(); } @@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout}; // Set the synced object. - cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + cur_thread->SetSyncedObject(nullptr, ResultTimedOut); // Check that the thread isn't terminating. if (cur_thread->IsTerminationRequested()) { slp.CancelSleep(); - return Svc::ResultTerminationRequested; + return ResultTerminationRequested; } // Update the value and process for the next owner. @@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) // Write the value to userspace. if (!WriteToUser(system, addr, std::addressof(next_value))) { slp.CancelSleep(); - return Svc::ResultInvalidCurrentMemory; + return ResultInvalidCurrentMemory; } } diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/k_memory_block.h index 83acece1e3..c5b9c5e85c 100644 --- a/src/core/hle/kernel/memory/memory_block.h +++ b/src/core/hle/kernel/k_memory_block.h @@ -2,20 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - #pragma once #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/kernel/memory_types.h" #include "core/hle/kernel/svc_types.h" -namespace Kernel::Memory { +namespace Kernel { -enum class MemoryState : u32 { +enum class KMemoryState : u32 { None = 0, Mask = 0xFF, All = ~None, @@ -97,31 +94,31 @@ enum class MemoryState : u32 { FlagReferenceCounted | FlagCanDebug, CodeOut = static_cast<u32>(Svc::MemoryState::CodeOut) | FlagMapped | FlagReferenceCounted, }; -DECLARE_ENUM_FLAG_OPERATORS(MemoryState); - -static_assert(static_cast<u32>(MemoryState::Free) == 0x00000000); -static_assert(static_cast<u32>(MemoryState::Io) == 0x00002001); -static_assert(static_cast<u32>(MemoryState::Static) == 0x00042002); -static_assert(static_cast<u32>(MemoryState::Code) == 0x00DC7E03); -static_assert(static_cast<u32>(MemoryState::CodeData) == 0x03FEBD04); -static_assert(static_cast<u32>(MemoryState::Normal) == 0x037EBD05); -static_assert(static_cast<u32>(MemoryState::Shared) == 0x00402006); -static_assert(static_cast<u32>(MemoryState::AliasCode) == 0x00DD7E08); -static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09); -static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); -static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); -static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); -static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D); -static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E); -static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); -static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); -static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); -static_assert(static_cast<u32>(MemoryState::NonDeviceIpc) == 0x004C2812); -static_assert(static_cast<u32>(MemoryState::Kernel) == 0x00002013); -static_assert(static_cast<u32>(MemoryState::GeneratedCode) == 0x00402214); -static_assert(static_cast<u32>(MemoryState::CodeOut) == 0x00402015); - -enum class MemoryPermission : u8 { +DECLARE_ENUM_FLAG_OPERATORS(KMemoryState); + +static_assert(static_cast<u32>(KMemoryState::Free) == 0x00000000); +static_assert(static_cast<u32>(KMemoryState::Io) == 0x00002001); +static_assert(static_cast<u32>(KMemoryState::Static) == 0x00042002); +static_assert(static_cast<u32>(KMemoryState::Code) == 0x00DC7E03); +static_assert(static_cast<u32>(KMemoryState::CodeData) == 0x03FEBD04); +static_assert(static_cast<u32>(KMemoryState::Normal) == 0x037EBD05); +static_assert(static_cast<u32>(KMemoryState::Shared) == 0x00402006); +static_assert(static_cast<u32>(KMemoryState::AliasCode) == 0x00DD7E08); +static_assert(static_cast<u32>(KMemoryState::AliasCodeData) == 0x03FFBD09); +static_assert(static_cast<u32>(KMemoryState::Ipc) == 0x005C3C0A); +static_assert(static_cast<u32>(KMemoryState::Stack) == 0x005C3C0B); +static_assert(static_cast<u32>(KMemoryState::ThreadLocal) == 0x0040200C); +static_assert(static_cast<u32>(KMemoryState::Transferred) == 0x015C3C0D); +static_assert(static_cast<u32>(KMemoryState::SharedTransferred) == 0x005C380E); +static_assert(static_cast<u32>(KMemoryState::SharedCode) == 0x0040380F); +static_assert(static_cast<u32>(KMemoryState::Inaccessible) == 0x00000010); +static_assert(static_cast<u32>(KMemoryState::NonSecureIpc) == 0x005C3811); +static_assert(static_cast<u32>(KMemoryState::NonDeviceIpc) == 0x004C2812); +static_assert(static_cast<u32>(KMemoryState::Kernel) == 0x00002013); +static_assert(static_cast<u32>(KMemoryState::GeneratedCode) == 0x00402214); +static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x00402015); + +enum class KMemoryPermission : u8 { None = 0, Mask = static_cast<u8>(~None), @@ -135,9 +132,9 @@ enum class MemoryPermission : u8 { UserMask = static_cast<u8>(Svc::MemoryPermission::Read | Svc::MemoryPermission::Write | Svc::MemoryPermission::Execute), }; -DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission); +DECLARE_ENUM_FLAG_OPERATORS(KMemoryPermission); -enum class MemoryAttribute : u8 { +enum class KMemoryAttribute : u8 { None = 0x00, Mask = 0x7F, All = Mask, @@ -152,18 +149,18 @@ enum class MemoryAttribute : u8 { LockedAndIpcLocked = Locked | IpcLocked, DeviceSharedAndUncached = DeviceShared | Uncached }; -DECLARE_ENUM_FLAG_OPERATORS(MemoryAttribute); +DECLARE_ENUM_FLAG_OPERATORS(KMemoryAttribute); -static_assert((static_cast<u8>(MemoryAttribute::Mask) & - static_cast<u8>(MemoryAttribute::DontCareMask)) == 0); +static_assert((static_cast<u8>(KMemoryAttribute::Mask) & + static_cast<u8>(KMemoryAttribute::DontCareMask)) == 0); -struct MemoryInfo { +struct KMemoryInfo { VAddr addr{}; std::size_t size{}; - MemoryState state{}; - MemoryPermission perm{}; - MemoryAttribute attribute{}; - MemoryPermission original_perm{}; + KMemoryState state{}; + KMemoryPermission perm{}; + KMemoryAttribute attribute{}; + KMemoryPermission original_perm{}; u16 ipc_lock_count{}; u16 device_use_count{}; @@ -171,9 +168,9 @@ struct MemoryInfo { return { addr, size, - static_cast<Svc::MemoryState>(state & MemoryState::Mask), - static_cast<Svc::MemoryAttribute>(attribute & MemoryAttribute::Mask), - static_cast<Svc::MemoryPermission>(perm & MemoryPermission::UserMask), + static_cast<Svc::MemoryState>(state & KMemoryState::Mask), + static_cast<Svc::MemoryAttribute>(attribute & KMemoryAttribute::Mask), + static_cast<Svc::MemoryPermission>(perm & KMemoryPermission::UserMask), ipc_lock_count, device_use_count, }; @@ -196,21 +193,21 @@ struct MemoryInfo { } }; -class MemoryBlock final { - friend class MemoryBlockManager; +class KMemoryBlock final { + friend class KMemoryBlockManager; private: VAddr addr{}; std::size_t num_pages{}; - MemoryState state{MemoryState::None}; + KMemoryState state{KMemoryState::None}; u16 ipc_lock_count{}; u16 device_use_count{}; - MemoryPermission perm{MemoryPermission::None}; - MemoryPermission original_perm{MemoryPermission::None}; - MemoryAttribute attribute{MemoryAttribute::None}; + KMemoryPermission perm{KMemoryPermission::None}; + KMemoryPermission original_perm{KMemoryPermission::None}; + KMemoryAttribute attribute{KMemoryAttribute::None}; public: - static constexpr int Compare(const MemoryBlock& lhs, const MemoryBlock& rhs) { + static constexpr int Compare(const KMemoryBlock& lhs, const KMemoryBlock& rhs) { if (lhs.GetAddress() < rhs.GetAddress()) { return -1; } else if (lhs.GetAddress() <= rhs.GetLastAddress()) { @@ -221,9 +218,9 @@ public: } public: - constexpr MemoryBlock() = default; - constexpr MemoryBlock(VAddr addr_, std::size_t num_pages_, MemoryState state_, - MemoryPermission perm_, MemoryAttribute attribute_) + constexpr KMemoryBlock() = default; + constexpr KMemoryBlock(VAddr addr_, std::size_t num_pages_, KMemoryState state_, + KMemoryPermission perm_, KMemoryAttribute attribute_) : addr{addr_}, num_pages(num_pages_), state{state_}, perm{perm_}, attribute{attribute_} {} constexpr VAddr GetAddress() const { @@ -246,40 +243,40 @@ public: return GetEndAddress() - 1; } - constexpr MemoryInfo GetMemoryInfo() const { + constexpr KMemoryInfo GetMemoryInfo() const { return { GetAddress(), GetSize(), state, perm, attribute, original_perm, ipc_lock_count, device_use_count, }; } - void ShareToDevice(MemoryPermission /*new_perm*/) { - ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared || + void ShareToDevice(KMemoryPermission /*new_perm*/) { + ASSERT((attribute & KMemoryAttribute::DeviceShared) == KMemoryAttribute::DeviceShared || device_use_count == 0); - attribute |= MemoryAttribute::DeviceShared; + attribute |= KMemoryAttribute::DeviceShared; const u16 new_use_count{++device_use_count}; ASSERT(new_use_count > 0); } - void UnshareToDevice(MemoryPermission /*new_perm*/) { - ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared); + void UnshareToDevice(KMemoryPermission /*new_perm*/) { + ASSERT((attribute & KMemoryAttribute::DeviceShared) == KMemoryAttribute::DeviceShared); const u16 prev_use_count{device_use_count--}; ASSERT(prev_use_count > 0); if (prev_use_count == 1) { - attribute &= ~MemoryAttribute::DeviceShared; + attribute &= ~KMemoryAttribute::DeviceShared; } } private: - constexpr bool HasProperties(MemoryState s, MemoryPermission p, MemoryAttribute a) const { - constexpr MemoryAttribute AttributeIgnoreMask{MemoryAttribute::DontCareMask | - MemoryAttribute::IpcLocked | - MemoryAttribute::DeviceShared}; + constexpr bool HasProperties(KMemoryState s, KMemoryPermission p, KMemoryAttribute a) const { + constexpr KMemoryAttribute AttributeIgnoreMask{KMemoryAttribute::DontCareMask | + KMemoryAttribute::IpcLocked | + KMemoryAttribute::DeviceShared}; return state == s && perm == p && (attribute | AttributeIgnoreMask) == (a | AttributeIgnoreMask); } - constexpr bool HasSameProperties(const MemoryBlock& rhs) const { + constexpr bool HasSameProperties(const KMemoryBlock& rhs) const { return state == rhs.state && perm == rhs.perm && original_perm == rhs.original_perm && attribute == rhs.attribute && ipc_lock_count == rhs.ipc_lock_count && device_use_count == rhs.device_use_count; @@ -296,25 +293,25 @@ private: num_pages += count; } - constexpr void Update(MemoryState new_state, MemoryPermission new_perm, - MemoryAttribute new_attribute) { - ASSERT(original_perm == MemoryPermission::None); - ASSERT((attribute & MemoryAttribute::IpcLocked) == MemoryAttribute::None); + constexpr void Update(KMemoryState new_state, KMemoryPermission new_perm, + KMemoryAttribute new_attribute) { + ASSERT(original_perm == KMemoryPermission::None); + ASSERT((attribute & KMemoryAttribute::IpcLocked) == KMemoryAttribute::None); state = new_state; perm = new_perm; - attribute = static_cast<MemoryAttribute>( + attribute = static_cast<KMemoryAttribute>( new_attribute | - (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared))); + (attribute & (KMemoryAttribute::IpcLocked | KMemoryAttribute::DeviceShared))); } - constexpr MemoryBlock Split(VAddr split_addr) { + constexpr KMemoryBlock Split(VAddr split_addr) { ASSERT(GetAddress() < split_addr); ASSERT(Contains(split_addr)); ASSERT(Common::IsAligned(split_addr, PageSize)); - MemoryBlock block; + KMemoryBlock block; block.addr = addr; block.num_pages = (split_addr - GetAddress()) / PageSize; block.state = state; @@ -330,6 +327,6 @@ private: return block; } }; -static_assert(std::is_trivially_destructible<MemoryBlock>::value); +static_assert(std::is_trivially_destructible<KMemoryBlock>::value); -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_block_manager.cpp b/src/core/hle/kernel/k_memory_block_manager.cpp index 0732fa5a1c..4a2d880084 100644 --- a/src/core/hle/kernel/memory/memory_block_manager.cpp +++ b/src/core/hle/kernel/k_memory_block_manager.cpp @@ -2,19 +2,19 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/hle/kernel/memory/memory_block_manager.h" -#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/kernel/k_memory_block_manager.h" +#include "core/hle/kernel/memory_types.h" -namespace Kernel::Memory { +namespace Kernel { -MemoryBlockManager::MemoryBlockManager(VAddr start_addr, VAddr end_addr) +KMemoryBlockManager::KMemoryBlockManager(VAddr start_addr, VAddr end_addr) : start_addr{start_addr}, end_addr{end_addr} { const u64 num_pages{(end_addr - start_addr) / PageSize}; - memory_block_tree.emplace_back(start_addr, num_pages, MemoryState::Free, MemoryPermission::None, - MemoryAttribute::None); + memory_block_tree.emplace_back(start_addr, num_pages, KMemoryState::Free, + KMemoryPermission::None, KMemoryAttribute::None); } -MemoryBlockManager::iterator MemoryBlockManager::FindIterator(VAddr addr) { +KMemoryBlockManager::iterator KMemoryBlockManager::FindIterator(VAddr addr) { auto node{memory_block_tree.begin()}; while (node != end()) { const VAddr end_addr{node->GetNumPages() * PageSize + node->GetAddress()}; @@ -26,9 +26,9 @@ MemoryBlockManager::iterator MemoryBlockManager::FindIterator(VAddr addr) { return end(); } -VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_num_pages, - std::size_t num_pages, std::size_t align, std::size_t offset, - std::size_t guard_pages) { +VAddr KMemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_num_pages, + std::size_t num_pages, std::size_t align, + std::size_t offset, std::size_t guard_pages) { if (num_pages == 0) { return {}; } @@ -41,7 +41,7 @@ VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_nu break; } - if (info.state != MemoryState::Free) { + if (info.state != KMemoryState::Free) { continue; } @@ -63,17 +63,17 @@ VAddr MemoryBlockManager::FindFreeArea(VAddr region_start, std::size_t region_nu return {}; } -void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState prev_state, - MemoryPermission prev_perm, MemoryAttribute prev_attribute, - MemoryState state, MemoryPermission perm, - MemoryAttribute attribute) { +void KMemoryBlockManager::Update(VAddr addr, std::size_t num_pages, KMemoryState prev_state, + KMemoryPermission prev_perm, KMemoryAttribute prev_attribute, + KMemoryState state, KMemoryPermission perm, + KMemoryAttribute attribute) { const VAddr end_addr{addr + num_pages * PageSize}; iterator node{memory_block_tree.begin()}; - prev_attribute |= MemoryAttribute::IpcAndDeviceMapped; + prev_attribute |= KMemoryAttribute::IpcAndDeviceMapped; while (node != memory_block_tree.end()) { - MemoryBlock* block{&(*node)}; + KMemoryBlock* block{&(*node)}; iterator next_node{std::next(node)}; const VAddr cur_addr{block->GetAddress()}; const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr}; @@ -106,13 +106,13 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState p } } -void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState state, - MemoryPermission perm, MemoryAttribute attribute) { +void KMemoryBlockManager::Update(VAddr addr, std::size_t num_pages, KMemoryState state, + KMemoryPermission perm, KMemoryAttribute attribute) { const VAddr end_addr{addr + num_pages * PageSize}; iterator node{memory_block_tree.begin()}; while (node != memory_block_tree.end()) { - MemoryBlock* block{&(*node)}; + KMemoryBlock* block{&(*node)}; iterator next_node{std::next(node)}; const VAddr cur_addr{block->GetAddress()}; const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr}; @@ -141,13 +141,13 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState s } } -void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, - MemoryPermission perm) { +void KMemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, + KMemoryPermission perm) { const VAddr end_addr{addr + num_pages * PageSize}; iterator node{memory_block_tree.begin()}; while (node != memory_block_tree.end()) { - MemoryBlock* block{&(*node)}; + KMemoryBlock* block{&(*node)}; iterator next_node{std::next(node)}; const VAddr cur_addr{block->GetAddress()}; const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr}; @@ -176,9 +176,9 @@ void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc& } } -void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) { +void KMemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) { const_iterator it{FindIterator(start)}; - MemoryInfo info{}; + KMemoryInfo info{}; do { info = it->GetMemoryInfo(); func(info); @@ -186,8 +186,8 @@ void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& f } while (info.addr + info.size - 1 < end - 1 && it != cend()); } -void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { - MemoryBlock* block{&(*it)}; +void KMemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { + KMemoryBlock* block{&(*it)}; auto EraseIt = [&](const iterator it_to_erase) { if (next_it == it_to_erase) { @@ -197,7 +197,7 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { }; if (it != memory_block_tree.begin()) { - MemoryBlock* prev{&(*std::prev(it))}; + KMemoryBlock* prev{&(*std::prev(it))}; if (block->HasSameProperties(*prev)) { const iterator prev_it{std::prev(it)}; @@ -211,7 +211,7 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { } if (it != cend()) { - const MemoryBlock* const next{&(*std::next(it))}; + const KMemoryBlock* const next{&(*std::next(it))}; if (block->HasSameProperties(*next)) { block->Add(next->GetNumPages()); @@ -220,4 +220,4 @@ void MemoryBlockManager::MergeAdjacent(iterator it, iterator& next_it) { } } -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_block_manager.h b/src/core/hle/kernel/k_memory_block_manager.h index f57d1bbcce..e11cc70c8b 100644 --- a/src/core/hle/kernel/memory/memory_block_manager.h +++ b/src/core/hle/kernel/k_memory_block_manager.h @@ -8,18 +8,18 @@ #include <list> #include "common/common_types.h" -#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/k_memory_block.h" -namespace Kernel::Memory { +namespace Kernel { -class MemoryBlockManager final { +class KMemoryBlockManager final { public: - using MemoryBlockTree = std::list<MemoryBlock>; + using MemoryBlockTree = std::list<KMemoryBlock>; using iterator = MemoryBlockTree::iterator; using const_iterator = MemoryBlockTree::const_iterator; public: - MemoryBlockManager(VAddr start_addr, VAddr end_addr); + KMemoryBlockManager(VAddr start_addr, VAddr end_addr); iterator end() { return memory_block_tree.end(); @@ -36,21 +36,22 @@ public: VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages, std::size_t align, std::size_t offset, std::size_t guard_pages); - void Update(VAddr addr, std::size_t num_pages, MemoryState prev_state, - MemoryPermission prev_perm, MemoryAttribute prev_attribute, MemoryState state, - MemoryPermission perm, MemoryAttribute attribute); + void Update(VAddr addr, std::size_t num_pages, KMemoryState prev_state, + KMemoryPermission prev_perm, KMemoryAttribute prev_attribute, KMemoryState state, + KMemoryPermission perm, KMemoryAttribute attribute); - void Update(VAddr addr, std::size_t num_pages, MemoryState state, - MemoryPermission perm = MemoryPermission::None, - MemoryAttribute attribute = MemoryAttribute::None); + void Update(VAddr addr, std::size_t num_pages, KMemoryState state, + KMemoryPermission perm = KMemoryPermission::None, + KMemoryAttribute attribute = KMemoryAttribute::None); - using LockFunc = std::function<void(iterator, MemoryPermission)>; - void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, MemoryPermission perm); + using LockFunc = std::function<void(iterator, KMemoryPermission)>; + void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, + KMemoryPermission perm); - using IterateFunc = std::function<void(const MemoryInfo&)>; + using IterateFunc = std::function<void(const KMemoryInfo&)>; void IterateForRange(VAddr start, VAddr end, IterateFunc&& func); - MemoryBlock& FindBlock(VAddr addr) { + KMemoryBlock& FindBlock(VAddr addr) { return *FindIterator(addr); } @@ -63,4 +64,4 @@ private: MemoryBlockTree memory_block_tree; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_layout.h b/src/core/hle/kernel/k_memory_layout.h index c7c0b2f491..0821d2d8c3 100644 --- a/src/core/hle/kernel/memory/memory_layout.h +++ b/src/core/hle/kernel/k_memory_layout.h @@ -7,7 +7,7 @@ #include "common/common_types.h" #include "core/device_memory.h" -namespace Kernel::Memory { +namespace Kernel { constexpr std::size_t KernelAslrAlignment = 2 * 1024 * 1024; constexpr std::size_t KernelVirtualAddressSpaceWidth = 1ULL << 39; @@ -27,8 +27,8 @@ constexpr bool IsKernelAddress(VAddr address) { return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd; } -class MemoryRegion final { - friend class MemoryLayout; +class KMemoryRegion final { + friend class KMemoryLayout; public: constexpr PAddr StartAddress() const { @@ -40,29 +40,29 @@ public: } private: - constexpr MemoryRegion() = default; - constexpr MemoryRegion(PAddr start_address, PAddr end_address) + constexpr KMemoryRegion() = default; + constexpr KMemoryRegion(PAddr start_address, PAddr end_address) : start_address{start_address}, end_address{end_address} {} const PAddr start_address{}; const PAddr end_address{}; }; -class MemoryLayout final { +class KMemoryLayout final { public: - constexpr const MemoryRegion& Application() const { + constexpr const KMemoryRegion& Application() const { return application; } - constexpr const MemoryRegion& Applet() const { + constexpr const KMemoryRegion& Applet() const { return applet; } - constexpr const MemoryRegion& System() const { + constexpr const KMemoryRegion& System() const { return system; } - static constexpr MemoryLayout GetDefaultLayout() { + static constexpr KMemoryLayout GetDefaultLayout() { constexpr std::size_t application_size{0xcd500000}; constexpr std::size_t applet_size{0x1fb00000}; constexpr PAddr application_start_address{Core::DramMemoryMap::End - application_size}; @@ -76,15 +76,15 @@ public: } private: - constexpr MemoryLayout(PAddr application_start_address, std::size_t application_size, - PAddr applet_start_address, std::size_t applet_size, - PAddr system_start_address, std::size_t system_size) + constexpr KMemoryLayout(PAddr application_start_address, std::size_t application_size, + PAddr applet_start_address, std::size_t applet_size, + PAddr system_start_address, std::size_t system_size) : application{application_start_address, application_size}, applet{applet_start_address, applet_size}, system{system_start_address, system_size} {} - const MemoryRegion application; - const MemoryRegion applet; - const MemoryRegion system; + const KMemoryRegion application; + const KMemoryRegion applet; + const KMemoryRegion system; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index acf13585c5..9027602bf1 100644 --- a/src/core/hle/kernel/memory/memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -8,20 +8,20 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/scope_exit.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/memory/memory_manager.h" -#include "core/hle/kernel/memory/page_linked_list.h" +#include "core/hle/kernel/k_memory_manager.h" +#include "core/hle/kernel/k_page_linked_list.h" +#include "core/hle/kernel/svc_results.h" -namespace Kernel::Memory { +namespace Kernel { -std::size_t MemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) { +std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) { const auto size{end_address - start_address}; // Calculate metadata sizes const auto ref_count_size{(size / PageSize) * sizeof(u16)}; const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)}; const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)}; - const auto page_heap_size{PageHeap::CalculateMetadataOverheadSize(size)}; + const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)}; const auto total_metadata_size{manager_size + page_heap_size}; ASSERT(manager_size <= total_metadata_size); ASSERT(Common::IsAligned(total_metadata_size, PageSize)); @@ -41,29 +41,30 @@ std::size_t MemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u6 return total_metadata_size; } -void MemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) { +void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) { ASSERT(pool < Pool::Count); managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address); } -VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool, - Direction dir) { +VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages, + u32 option) { // Early return if we're allocating no pages if (num_pages == 0) { return {}; } // Lock the pool that we're allocating from + const auto [pool, dir] = DecodeOption(option); const auto pool_index{static_cast<std::size_t>(pool)}; std::lock_guard lock{pool_locks[pool_index]}; // Choose a heap based on our page size request - const s32 heap_index{PageHeap::GetAlignedBlockIndex(num_pages, align_pages)}; + const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)}; // Loop, trying to iterate from each block // TODO (bunnei): Support multiple managers Impl& chosen_manager{managers[pool_index]}; - VAddr allocated_block{chosen_manager.AllocateBlock(heap_index)}; + VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)}; // If we failed to allocate, quit now if (!allocated_block) { @@ -71,7 +72,7 @@ VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align } // If we allocated more than we need, free some - const auto allocated_pages{PageHeap::GetBlockNumPages(heap_index)}; + const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)}; if (allocated_pages > num_pages) { chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages); } @@ -79,8 +80,8 @@ VAddr MemoryManager::AllocateContinuous(std::size_t num_pages, std::size_t align return allocated_block; } -ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool, - Direction dir) { +ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir) { ASSERT(page_list.GetNumPages() == 0); // Early return if we're allocating no pages @@ -93,9 +94,9 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa std::lock_guard lock{pool_locks[pool_index]}; // Choose a heap based on our page size request - const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; + const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)}; if (heap_index < 0) { - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } // TODO (bunnei): Support multiple managers @@ -112,11 +113,11 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa // Keep allocating until we've allocated all our pages for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) { - const auto pages_per_alloc{PageHeap::GetBlockNumPages(index)}; + const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)}; while (num_pages >= pages_per_alloc) { // Allocate a block - VAddr allocated_block{chosen_manager.AllocateBlock(index)}; + VAddr allocated_block{chosen_manager.AllocateBlock(index, false)}; if (!allocated_block) { break; } @@ -140,7 +141,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa // Only succeed if we allocated as many pages as we wanted if (num_pages) { - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } // We succeeded! @@ -148,8 +149,8 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa return RESULT_SUCCESS; } -ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool, - Direction dir) { +ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, + Direction dir) { // Early return if we're freeing no pages if (!num_pages) { return RESULT_SUCCESS; @@ -172,4 +173,4 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages, return RESULT_SUCCESS; } -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index 3cf4448570..ae9f683b82 100644 --- a/src/core/hle/kernel/memory/memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -6,16 +6,18 @@ #include <array> #include <mutex> +#include <tuple> +#include "common/common_funcs.h" #include "common/common_types.h" -#include "core/hle/kernel/memory/page_heap.h" +#include "core/hle/kernel/k_page_heap.h" #include "core/hle/result.h" -namespace Kernel::Memory { +namespace Kernel { -class PageLinkedList; +class KPageLinkedList; -class MemoryManager final : NonCopyable { +class KMemoryManager final : NonCopyable { public: enum class Pool : u32 { Application = 0, @@ -37,29 +39,50 @@ public: Mask = (0xF << Shift), }; - MemoryManager() = default; + KMemoryManager() = default; constexpr std::size_t GetSize(Pool pool) const { return managers[static_cast<std::size_t>(pool)].GetSize(); } void InitializeManager(Pool pool, u64 start_address, u64 end_address); - VAddr AllocateContinuous(std::size_t num_pages, std::size_t align_pages, Pool pool, - Direction dir = Direction::FromFront); - ResultCode Allocate(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + + VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option); + ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir = Direction::FromFront); - ResultCode Free(PageLinkedList& page_list, std::size_t num_pages, Pool pool, + ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir = Direction::FromFront); static constexpr std::size_t MaxManagerCount = 10; +public: + static constexpr u32 EncodeOption(Pool pool, Direction dir) { + return (static_cast<u32>(pool) << static_cast<u32>(Pool::Shift)) | + (static_cast<u32>(dir) << static_cast<u32>(Direction::Shift)); + } + + static constexpr Pool GetPool(u32 option) { + return static_cast<Pool>((static_cast<u32>(option) & static_cast<u32>(Pool::Mask)) >> + static_cast<u32>(Pool::Shift)); + } + + static constexpr Direction GetDirection(u32 option) { + return static_cast<Direction>( + (static_cast<u32>(option) & static_cast<u32>(Direction::Mask)) >> + static_cast<u32>(Direction::Shift)); + } + + static constexpr std::tuple<Pool, Direction> DecodeOption(u32 option) { + return std::make_tuple(GetPool(option), GetDirection(option)); + } + private: class Impl final : NonCopyable { private: using RefCount = u16; private: - PageHeap heap; + KPageHeap heap; Pool pool{}; public: @@ -67,8 +90,8 @@ private: std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address); - VAddr AllocateBlock(s32 index) { - return heap.AllocateBlock(index); + VAddr AllocateBlock(s32 index, bool random) { + return heap.AllocateBlock(index, random); } void Free(VAddr addr, std::size_t num_pages) { @@ -93,4 +116,4 @@ private: std::array<Impl, MaxManagerCount> managers; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/k_page_bitmap.h b/src/core/hle/kernel/k_page_bitmap.h new file mode 100644 index 0000000000..c75d667c93 --- /dev/null +++ b/src/core/hle/kernel/k_page_bitmap.h @@ -0,0 +1,279 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <bit> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/tiny_mt.h" +#include "core/hle/kernel/k_system_control.h" + +namespace Kernel { + +class KPageBitmap { +private: + class RandomBitGenerator { + private: + Common::TinyMT rng{}; + u32 entropy{}; + u32 bits_available{}; + + private: + void RefreshEntropy() { + entropy = rng.GenerateRandomU32(); + bits_available = static_cast<u32>(Common::BitSize<decltype(entropy)>()); + } + + bool GenerateRandomBit() { + if (bits_available == 0) { + this->RefreshEntropy(); + } + + const bool rnd_bit = (entropy & 1) != 0; + entropy >>= 1; + --bits_available; + return rnd_bit; + } + + public: + RandomBitGenerator() { + rng.Initialize(static_cast<u32>(KSystemControl::GenerateRandomU64())); + } + + std::size_t SelectRandomBit(u64 bitmap) { + u64 selected = 0; + + u64 cur_num_bits = Common::BitSize<decltype(bitmap)>() / 2; + u64 cur_mask = (1ULL << cur_num_bits) - 1; + + while (cur_num_bits) { + const u64 low = (bitmap >> 0) & cur_mask; + const u64 high = (bitmap >> cur_num_bits) & cur_mask; + + bool choose_low; + if (high == 0) { + // If only low val is set, choose low. + choose_low = true; + } else if (low == 0) { + // If only high val is set, choose high. + choose_low = false; + } else { + // If both are set, choose random. + choose_low = this->GenerateRandomBit(); + } + + // If we chose low, proceed with low. + if (choose_low) { + bitmap = low; + selected += 0; + } else { + bitmap = high; + selected += cur_num_bits; + } + + // Proceed. + cur_num_bits /= 2; + cur_mask >>= cur_num_bits; + } + + return selected; + } + }; + +public: + static constexpr std::size_t MaxDepth = 4; + +private: + std::array<u64*, MaxDepth> bit_storages{}; + RandomBitGenerator rng{}; + std::size_t num_bits{}; + std::size_t used_depths{}; + +public: + KPageBitmap() = default; + + constexpr std::size_t GetNumBits() const { + return num_bits; + } + constexpr s32 GetHighestDepthIndex() const { + return static_cast<s32>(used_depths) - 1; + } + + u64* Initialize(u64* storage, std::size_t size) { + // Initially, everything is un-set. + num_bits = 0; + + // Calculate the needed bitmap depth. + used_depths = static_cast<std::size_t>(GetRequiredDepth(size)); + ASSERT(used_depths <= MaxDepth); + + // Set the bitmap pointers. + for (s32 depth = this->GetHighestDepthIndex(); depth >= 0; depth--) { + bit_storages[depth] = storage; + size = Common::AlignUp(size, Common::BitSize<u64>()) / Common::BitSize<u64>(); + storage += size; + } + + return storage; + } + + s64 FindFreeBlock(bool random) { + uintptr_t offset = 0; + s32 depth = 0; + + if (random) { + do { + const u64 v = bit_storages[depth][offset]; + if (v == 0) { + // If depth is bigger than zero, then a previous level indicated a block was + // free. + ASSERT(depth == 0); + return -1; + } + offset = offset * Common::BitSize<u64>() + rng.SelectRandomBit(v); + ++depth; + } while (depth < static_cast<s32>(used_depths)); + } else { + do { + const u64 v = bit_storages[depth][offset]; + if (v == 0) { + // If depth is bigger than zero, then a previous level indicated a block was + // free. + ASSERT(depth == 0); + return -1; + } + offset = offset * Common::BitSize<u64>() + std::countr_zero(v); + ++depth; + } while (depth < static_cast<s32>(used_depths)); + } + + return static_cast<s64>(offset); + } + + void SetBit(std::size_t offset) { + this->SetBit(this->GetHighestDepthIndex(), offset); + num_bits++; + } + + void ClearBit(std::size_t offset) { + this->ClearBit(this->GetHighestDepthIndex(), offset); + num_bits--; + } + + bool ClearRange(std::size_t offset, std::size_t count) { + s32 depth = this->GetHighestDepthIndex(); + u64* bits = bit_storages[depth]; + std::size_t bit_ind = offset / Common::BitSize<u64>(); + if (count < Common::BitSize<u64>()) { + const std::size_t shift = offset % Common::BitSize<u64>(); + ASSERT(shift + count <= Common::BitSize<u64>()); + // Check that all the bits are set. + const u64 mask = ((u64(1) << count) - 1) << shift; + u64 v = bits[bit_ind]; + if ((v & mask) != mask) { + return false; + } + + // Clear the bits. + v &= ~mask; + bits[bit_ind] = v; + if (v == 0) { + this->ClearBit(depth - 1, bit_ind); + } + } else { + ASSERT(offset % Common::BitSize<u64>() == 0); + ASSERT(count % Common::BitSize<u64>() == 0); + // Check that all the bits are set. + std::size_t remaining = count; + std::size_t i = 0; + do { + if (bits[bit_ind + i++] != ~u64(0)) { + return false; + } + remaining -= Common::BitSize<u64>(); + } while (remaining > 0); + + // Clear the bits. + remaining = count; + i = 0; + do { + bits[bit_ind + i] = 0; + this->ClearBit(depth - 1, bit_ind + i); + i++; + remaining -= Common::BitSize<u64>(); + } while (remaining > 0); + } + + num_bits -= count; + return true; + } + +private: + void SetBit(s32 depth, std::size_t offset) { + while (depth >= 0) { + std::size_t ind = offset / Common::BitSize<u64>(); + std::size_t which = offset % Common::BitSize<u64>(); + const u64 mask = u64(1) << which; + + u64* bit = std::addressof(bit_storages[depth][ind]); + u64 v = *bit; + ASSERT((v & mask) == 0); + *bit = v | mask; + if (v) { + break; + } + offset = ind; + depth--; + } + } + + void ClearBit(s32 depth, std::size_t offset) { + while (depth >= 0) { + std::size_t ind = offset / Common::BitSize<u64>(); + std::size_t which = offset % Common::BitSize<u64>(); + const u64 mask = u64(1) << which; + + u64* bit = std::addressof(bit_storages[depth][ind]); + u64 v = *bit; + ASSERT((v & mask) != 0); + v &= ~mask; + *bit = v; + if (v) { + break; + } + offset = ind; + depth--; + } + } + +private: + static constexpr s32 GetRequiredDepth(std::size_t region_size) { + s32 depth = 0; + while (true) { + region_size /= Common::BitSize<u64>(); + depth++; + if (region_size == 0) { + return depth; + } + } + } + +public: + static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size) { + std::size_t overhead_bits = 0; + for (s32 depth = GetRequiredDepth(region_size) - 1; depth >= 0; depth--) { + region_size = + Common::AlignUp(region_size, Common::BitSize<u64>()) / Common::BitSize<u64>(); + overhead_bits += region_size; + } + return overhead_bits * sizeof(u64); + } +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp index 0ab1f7205e..07e0629222 100644 --- a/src/core/hle/kernel/memory/page_heap.cpp +++ b/src/core/hle/kernel/k_page_heap.cpp @@ -2,16 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - #include "core/core.h" -#include "core/hle/kernel/memory/page_heap.h" +#include "core/hle/kernel/k_page_heap.h" #include "core/memory.h" -namespace Kernel::Memory { +namespace Kernel { -void PageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) { +void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) { // Check our assumptions ASSERT(Common::IsAligned((address), PageSize)); ASSERT(Common::IsAligned(size, PageSize)); @@ -32,11 +29,11 @@ void PageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_ } } -VAddr PageHeap::AllocateBlock(s32 index) { +VAddr KPageHeap::AllocateBlock(s32 index, bool random) { const std::size_t needed_size{blocks[index].GetSize()}; for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) { - if (const VAddr addr{blocks[i].PopBlock()}; addr) { + if (const VAddr addr{blocks[i].PopBlock(random)}; addr) { if (const std::size_t allocated_size{blocks[i].GetSize()}; allocated_size > needed_size) { Free(addr + needed_size, (allocated_size - needed_size) / PageSize); @@ -48,13 +45,13 @@ VAddr PageHeap::AllocateBlock(s32 index) { return 0; } -void PageHeap::FreeBlock(VAddr block, s32 index) { +void KPageHeap::FreeBlock(VAddr block, s32 index) { do { block = blocks[index++].PushBlock(block); } while (block != 0); } -void PageHeap::Free(VAddr addr, std::size_t num_pages) { +void KPageHeap::Free(VAddr addr, std::size_t num_pages) { // Freeing no pages is a no-op if (num_pages == 0) { return; @@ -104,16 +101,16 @@ void PageHeap::Free(VAddr addr, std::size_t num_pages) { } } -std::size_t PageHeap::CalculateMetadataOverheadSize(std::size_t region_size) { +std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) { std::size_t overhead_size = 0; for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; const std::size_t next_block_shift{ (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; - overhead_size += PageHeap::Block::CalculateMetadataOverheadSize( + overhead_size += KPageHeap::Block::CalculateManagementOverheadSize( region_size, cur_block_shift, next_block_shift); } return Common::AlignUp(overhead_size, PageSize); } -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h new file mode 100644 index 0000000000..de5d6a1892 --- /dev/null +++ b/src/core/hle/kernel/k_page_heap.h @@ -0,0 +1,193 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <bit> +#include <vector> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "core/hle/kernel/k_page_bitmap.h" +#include "core/hle/kernel/memory_types.h" + +namespace Kernel { + +class KPageHeap final : NonCopyable { +public: + static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { + const auto target_pages{std::max(num_pages, align_pages)}; + for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { + if (target_pages <= + (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + return static_cast<s32>(i); + } + } + return -1; + } + + static constexpr s32 GetBlockIndex(std::size_t num_pages) { + for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) { + if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { + return i; + } + } + return -1; + } + + static constexpr std::size_t GetBlockSize(std::size_t index) { + return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index]; + } + + static constexpr std::size_t GetBlockNumPages(std::size_t index) { + return GetBlockSize(index) / PageSize; + } + +private: + static constexpr std::size_t NumMemoryBlockPageShifts{7}; + static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ + 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, + }; + + class Block final : NonCopyable { + private: + KPageBitmap bitmap; + VAddr heap_address{}; + uintptr_t end_offset{}; + std::size_t block_shift{}; + std::size_t next_block_shift{}; + + public: + Block() = default; + + constexpr std::size_t GetShift() const { + return block_shift; + } + constexpr std::size_t GetNextShift() const { + return next_block_shift; + } + constexpr std::size_t GetSize() const { + return static_cast<std::size_t>(1) << GetShift(); + } + constexpr std::size_t GetNumPages() const { + return GetSize() / PageSize; + } + constexpr std::size_t GetNumFreeBlocks() const { + return bitmap.GetNumBits(); + } + constexpr std::size_t GetNumFreePages() const { + return GetNumFreeBlocks() * GetNumPages(); + } + + u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs, + u64* bit_storage) { + // Set shifts + block_shift = bs; + next_block_shift = nbs; + + // Align up the address + VAddr end{addr + size}; + const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift) + : (1ULL << block_shift)}; + addr = Common::AlignDown((addr), align); + end = Common::AlignUp((end), align); + + heap_address = addr; + end_offset = (end - addr) / (1ULL << block_shift); + return bitmap.Initialize(bit_storage, end_offset); + } + + VAddr PushBlock(VAddr address) { + // Set the bit for the free block + std::size_t offset{(address - heap_address) >> GetShift()}; + bitmap.SetBit(offset); + + // If we have a next shift, try to clear the blocks below and return the address + if (GetNextShift()) { + const auto diff{1ULL << (GetNextShift() - GetShift())}; + offset = Common::AlignDown(offset, diff); + if (bitmap.ClearRange(offset, diff)) { + return heap_address + (offset << GetShift()); + } + } + + // We couldn't coalesce, or we're already as big as possible + return 0; + } + + VAddr PopBlock(bool random) { + // Find a free block + const s64 soffset{bitmap.FindFreeBlock(random)}; + if (soffset < 0) { + return 0; + } + const auto offset{static_cast<std::size_t>(soffset)}; + + // Update our tracking and return it + bitmap.ClearBit(offset); + return heap_address + (offset << GetShift()); + } + + public: + static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size, + std::size_t cur_block_shift, + std::size_t next_block_shift) { + const auto cur_block_size{(1ULL << cur_block_shift)}; + const auto next_block_size{(1ULL << next_block_shift)}; + const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size}; + return KPageBitmap::CalculateManagementOverheadSize( + (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); + } + }; + +public: + KPageHeap() = default; + + constexpr VAddr GetAddress() const { + return heap_address; + } + constexpr std::size_t GetSize() const { + return heap_size; + } + constexpr VAddr GetEndAddress() const { + return GetAddress() + GetSize(); + } + constexpr std::size_t GetPageOffset(VAddr block) const { + return (block - GetAddress()) / PageSize; + } + + void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); + VAddr AllocateBlock(s32 index, bool random); + void Free(VAddr addr, std::size_t num_pages); + + void UpdateUsedSize() { + used_size = heap_size - (GetNumFreePages() * PageSize); + } + + static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + +private: + constexpr std::size_t GetNumFreePages() const { + std::size_t num_free{}; + + for (const auto& block : blocks) { + num_free += block.GetNumFreePages(); + } + + return num_free; + } + + void FreeBlock(VAddr block, s32 index); + + VAddr heap_address{}; + std::size_t heap_size{}; + std::size_t used_size{}; + std::array<Block, NumMemoryBlockPageShifts> blocks{}; + std::vector<u64> metadata; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/page_linked_list.h b/src/core/hle/kernel/k_page_linked_list.h index 45dc13eaf6..64024d01f3 100644 --- a/src/core/hle/kernel/memory/page_linked_list.h +++ b/src/core/hle/kernel/k_page_linked_list.h @@ -8,12 +8,12 @@ #include "common/assert.h" #include "common/common_types.h" -#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/kernel/memory_types.h" #include "core/hle/result.h" -namespace Kernel::Memory { +namespace Kernel { -class PageLinkedList final { +class KPageLinkedList final { public: class Node final { public: @@ -33,8 +33,8 @@ public: }; public: - PageLinkedList() = default; - PageLinkedList(u64 address, u64 num_pages) { + KPageLinkedList() = default; + KPageLinkedList(u64 address, u64 num_pages) { ASSERT(AddBlock(address, num_pages).IsSuccess()); } @@ -54,7 +54,7 @@ public: return num_pages; } - bool IsEqual(PageLinkedList& other) const { + bool IsEqual(KPageLinkedList& other) const { auto this_node = nodes.begin(); auto other_node = other.nodes.begin(); while (this_node != nodes.end() && other_node != other.nodes.end()) { @@ -89,4 +89,4 @@ private: std::list<Node> nodes; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 7de91c7689..d09d5ce480 100644 --- a/src/core/hle/kernel/memory/page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -6,19 +6,20 @@ #include "common/assert.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/k_address_space_info.h" +#include "core/hle/kernel/k_memory_block.h" +#include "core/hle/kernel/k_memory_block_manager.h" +#include "core/hle/kernel/k_page_linked_list.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_resource_limit.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" +#include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/address_space_info.h" -#include "core/hle/kernel/memory/memory_block.h" -#include "core/hle/kernel/memory/memory_block_manager.h" -#include "core/hle/kernel/memory/page_linked_list.h" -#include "core/hle/kernel/memory/page_table.h" -#include "core/hle/kernel/memory/system_control.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/svc_results.h" #include "core/memory.h" -namespace Kernel::Memory { +namespace Kernel { namespace { @@ -37,14 +38,14 @@ constexpr std::size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceT } } -constexpr u64 GetAddressInRange(const MemoryInfo& info, VAddr addr) { +constexpr u64 GetAddressInRange(const KMemoryInfo& info, VAddr addr) { if (info.GetAddress() < addr) { return addr; } return info.GetAddress(); } -constexpr std::size_t GetSizeInRange(const MemoryInfo& info, VAddr start, VAddr end) { +constexpr std::size_t GetSizeInRange(const KMemoryInfo& info, VAddr start, VAddr end) { std::size_t size{info.GetSize()}; if (info.GetAddress() < start) { size -= start - info.GetAddress(); @@ -57,25 +58,25 @@ constexpr std::size_t GetSizeInRange(const MemoryInfo& info, VAddr start, VAddr } // namespace -PageTable::PageTable(Core::System& system) : system{system} {} +KPageTable::KPageTable(Core::System& system) : system{system} {} -ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, - bool enable_aslr, VAddr code_addr, std::size_t code_size, - Memory::MemoryManager::Pool pool) { +ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, + bool enable_aslr, VAddr code_addr, + std::size_t code_size, KMemoryManager::Pool pool) { - const auto GetSpaceStart = [this](AddressSpaceInfo::Type type) { - return AddressSpaceInfo::GetAddressSpaceStart(address_space_width, type); + const auto GetSpaceStart = [this](KAddressSpaceInfo::Type type) { + return KAddressSpaceInfo::GetAddressSpaceStart(address_space_width, type); }; - const auto GetSpaceSize = [this](AddressSpaceInfo::Type type) { - return AddressSpaceInfo::GetAddressSpaceSize(address_space_width, type); + const auto GetSpaceSize = [this](KAddressSpaceInfo::Type type) { + return KAddressSpaceInfo::GetAddressSpaceSize(address_space_width, type); }; // Set our width and heap/alias sizes address_space_width = GetAddressSpaceWidthFromType(as_type); const VAddr start = 0; const VAddr end{1ULL << address_space_width}; - std::size_t alias_region_size{GetSpaceSize(AddressSpaceInfo::Type::Alias)}; - std::size_t heap_region_size{GetSpaceSize(AddressSpaceInfo::Type::Heap)}; + std::size_t alias_region_size{GetSpaceSize(KAddressSpaceInfo::Type::Alias)}; + std::size_t heap_region_size{GetSpaceSize(KAddressSpaceInfo::Type::Heap)}; ASSERT(start <= code_addr); ASSERT(code_addr < code_addr + code_size); @@ -95,12 +96,12 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t std::size_t kernel_map_region_size{}; if (address_space_width == 39) { - alias_region_size = GetSpaceSize(AddressSpaceInfo::Type::Alias); - heap_region_size = GetSpaceSize(AddressSpaceInfo::Type::Heap); - stack_region_size = GetSpaceSize(AddressSpaceInfo::Type::Stack); - kernel_map_region_size = GetSpaceSize(AddressSpaceInfo::Type::Is32Bit); - code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Large64Bit); - code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Large64Bit); + alias_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Alias); + heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap); + stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack); + kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall); + code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit); + code_region_end = code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit); alias_code_region_start = code_region_start; alias_code_region_end = code_region_end; process_code_start = Common::AlignDown(code_addr, RegionAlignment); @@ -108,12 +109,12 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t } else { stack_region_size = 0; kernel_map_region_size = 0; - code_region_start = GetSpaceStart(AddressSpaceInfo::Type::Is32Bit); - code_region_end = code_region_start + GetSpaceSize(AddressSpaceInfo::Type::Is32Bit); + code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::MapSmall); + code_region_end = code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::MapSmall); stack_region_start = code_region_start; alias_code_region_start = code_region_start; - alias_code_region_end = GetSpaceStart(AddressSpaceInfo::Type::Small64Bit) + - GetSpaceSize(AddressSpaceInfo::Type::Small64Bit); + alias_code_region_end = GetSpaceStart(KAddressSpaceInfo::Type::MapLarge) + + GetSpaceSize(KAddressSpaceInfo::Type::MapLarge); stack_region_end = code_region_end; kernel_map_region_start = code_region_start; kernel_map_region_end = code_region_end; @@ -141,7 +142,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; if (alloc_size < needed_size) { UNREACHABLE(); - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } const std::size_t remaining_size{alloc_size - needed_size}; @@ -149,13 +150,13 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t // Determine random placements for each region std::size_t alias_rnd{}, heap_rnd{}, stack_rnd{}, kmap_rnd{}; if (enable_aslr) { - alias_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + alias_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * RegionAlignment; - heap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + heap_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * RegionAlignment; - stack_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + stack_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * RegionAlignment; - kmap_rnd = SystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * + kmap_rnd = KSystemControl::GenerateRandomRange(0, remaining_size / RegionAlignment) * RegionAlignment; } @@ -270,21 +271,21 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t return InitializeMemoryLayout(start, end); } -ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemoryState state, - MemoryPermission perm) { +ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state, + KMemoryPermission perm) { std::lock_guard lock{page_table_lock}; const u64 size{num_pages * PageSize}; if (!CanContain(addr, size, state)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (IsRegionMapped(addr, size)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - PageLinkedList page_linked_list; + KPageLinkedList page_linked_list; CASCADE_CODE( system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); @@ -294,44 +295,44 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt return RESULT_SUCCESS; } -ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; const std::size_t num_pages{size / PageSize}; - MemoryState state{}; - MemoryPermission perm{}; - CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, MemoryState::All, - MemoryState::Normal, MemoryPermission::Mask, - MemoryPermission::ReadAndWrite, MemoryAttribute::Mask, - MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + KMemoryState state{}; + KMemoryPermission perm{}; + CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, KMemoryState::All, + KMemoryState::Normal, KMemoryPermission::Mask, + KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, + KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); if (IsRegionMapped(dst_addr, size)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - PageLinkedList page_linked_list; + KPageLinkedList page_linked_list; AddRegionToPages(src_addr, num_pages, page_linked_list); { auto block_guard = detail::ScopeExit( [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); }); - CASCADE_CODE( - Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions)); - CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::None)); + CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None, + OperationType::ChangePermissions)); + CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None)); block_guard.Cancel(); } - block_manager->Update(src_addr, num_pages, state, MemoryPermission::None, - MemoryAttribute::Locked); - block_manager->Update(dst_addr, num_pages, MemoryState::AliasCode); + block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None, + KMemoryAttribute::Locked); + block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode); return RESULT_SUCCESS; } -ResultCode PageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; if (!size) { @@ -340,34 +341,35 @@ ResultCode PageTable::UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std const std::size_t num_pages{size / PageSize}; - CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, src_addr, size, MemoryState::All, - MemoryState::Normal, MemoryPermission::None, - MemoryPermission::None, MemoryAttribute::Mask, - MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped)); + CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, src_addr, size, KMemoryState::All, + KMemoryState::Normal, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); - MemoryState state{}; + KMemoryState state{}; CASCADE_CODE(CheckMemoryState( - &state, nullptr, nullptr, dst_addr, PageSize, MemoryState::FlagCanCodeAlias, - MemoryState::FlagCanCodeAlias, MemoryPermission::None, MemoryPermission::None, - MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); - CASCADE_CODE(CheckMemoryState(dst_addr, size, MemoryState::All, state, MemoryPermission::None, - MemoryPermission::None, MemoryAttribute::Mask, - MemoryAttribute::None)); - CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap)); - - block_manager->Update(dst_addr, num_pages, MemoryState::Free); - block_manager->Update(src_addr, num_pages, MemoryState::Normal, MemoryPermission::ReadAndWrite); + &state, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias, + KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); + CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::None)); + CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap)); + + block_manager->Update(dst_addr, num_pages, KMemoryState::Free); + block_manager->Update(src_addr, num_pages, KMemoryState::Normal, + KMemoryPermission::ReadAndWrite); return RESULT_SUCCESS; } -void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end) { +void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) { auto node{page_linked_list.Nodes().begin()}; PAddr map_addr{node->GetAddress()}; std::size_t src_num_pages{node->GetNumPages()}; - block_manager->IterateForRange(start, end, [&](const MemoryInfo& info) { - if (info.state != MemoryState::Free) { + block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) { + if (info.state != KMemoryState::Free) { return; } @@ -382,7 +384,7 @@ void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, } const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)}; - Operate(dst_addr, num_pages, MemoryPermission::ReadAndWrite, OperationType::Map, + Operate(dst_addr, num_pages, KMemoryPermission::ReadAndWrite, OperationType::Map, map_addr); dst_addr += num_pages * PageSize; @@ -393,14 +395,14 @@ void PageTable::MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, }); } -ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { +ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; std::size_t mapped_size{}; const VAddr end_addr{addr + size}; - block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { - if (info.state != MemoryState::Free) { + block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { + if (info.state != KMemoryState::Free) { mapped_size += GetSizeInRange(info, addr, end_addr); } }); @@ -409,41 +411,39 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { return RESULT_SUCCESS; } - auto process{system.Kernel().CurrentProcess()}; const std::size_t remaining_size{size - mapped_size}; const std::size_t remaining_pages{remaining_size / PageSize}; - if (process->GetResourceLimit() && - !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) { - return ERR_RESOURCE_LIMIT_EXCEEDED; + // Reserve the memory from the process resource limit. + KScopedResourceReservation memory_reservation( + system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, + remaining_size); + if (!memory_reservation.Succeeded()) { + LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size); + return ResultResourceLimitedExceeded; } - PageLinkedList page_linked_list; - { - auto block_guard = detail::ScopeExit([&] { - system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool); - process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size); - }); + KPageLinkedList page_linked_list; - CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, - memory_pool)); + CASCADE_CODE( + system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool)); - block_guard.Cancel(); - } + // We succeeded, so commit the memory reservation. + memory_reservation.Commit(); MapPhysicalMemory(page_linked_list, addr, end_addr); physical_memory_usage += remaining_size; const std::size_t num_pages{size / PageSize}; - block_manager->Update(addr, num_pages, MemoryState::Free, MemoryPermission::None, - MemoryAttribute::None, MemoryState::Normal, - MemoryPermission::ReadAndWrite, MemoryAttribute::None); + block_manager->Update(addr, num_pages, KMemoryState::Free, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryState::Normal, + KMemoryPermission::ReadAndWrite, KMemoryAttribute::None); return RESULT_SUCCESS; } -ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { +ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; const VAddr end_addr{addr + size}; @@ -451,15 +451,15 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { std::size_t mapped_size{}; // Verify that the region can be unmapped - block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { - if (info.state == MemoryState::Normal) { - if (info.attribute != MemoryAttribute::None) { - result = ERR_INVALID_ADDRESS_STATE; + block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { + if (info.state == KMemoryState::Normal) { + if (info.attribute != KMemoryAttribute::None) { + result = ResultInvalidCurrentMemory; return; } mapped_size += GetSizeInRange(info, addr, end_addr); - } else if (info.state != MemoryState::Free) { - result = ERR_INVALID_ADDRESS_STATE; + } else if (info.state != KMemoryState::Free) { + result = ResultInvalidCurrentMemory; } }); @@ -480,23 +480,23 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { return RESULT_SUCCESS; } -ResultCode PageTable::UnmapMemory(VAddr addr, std::size_t size) { +ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; const VAddr end_addr{addr + size}; ResultCode result{RESULT_SUCCESS}; - PageLinkedList page_linked_list; + KPageLinkedList page_linked_list; // Unmap each region within the range - block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { - if (info.state == MemoryState::Normal) { + block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { + if (info.state == KMemoryState::Normal) { const std::size_t block_size{GetSizeInRange(info, addr, end_addr)}; const std::size_t block_num_pages{block_size / PageSize}; const VAddr block_addr{GetAddressInRange(info, addr)}; AddRegionToPages(block_addr, block_size / PageSize, page_linked_list); - if (result = Operate(block_addr, block_num_pages, MemoryPermission::None, + if (result = Operate(block_addr, block_num_pages, KMemoryPermission::None, OperationType::Unmap); result.IsError()) { return; @@ -511,93 +511,94 @@ ResultCode PageTable::UnmapMemory(VAddr addr, std::size_t size) { const std::size_t num_pages{size / PageSize}; system.Kernel().MemoryManager().Free(page_linked_list, num_pages, memory_pool); - block_manager->Update(addr, num_pages, MemoryState::Free); + block_manager->Update(addr, num_pages, KMemoryState::Free); return RESULT_SUCCESS; } -ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; - MemoryState src_state{}; + KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( - &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias, - MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::ReadAndWrite, - MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias, + KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::ReadAndWrite, + KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); if (IsRegionMapped(dst_addr, size)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - PageLinkedList page_linked_list; + KPageLinkedList page_linked_list; const std::size_t num_pages{size / PageSize}; AddRegionToPages(src_addr, num_pages, page_linked_list); { auto block_guard = detail::ScopeExit([&] { - Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite, + Operate(src_addr, num_pages, KMemoryPermission::ReadAndWrite, OperationType::ChangePermissions); }); - CASCADE_CODE( - Operate(src_addr, num_pages, MemoryPermission::None, OperationType::ChangePermissions)); - CASCADE_CODE(MapPages(dst_addr, page_linked_list, MemoryPermission::ReadAndWrite)); + CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None, + OperationType::ChangePermissions)); + CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::ReadAndWrite)); block_guard.Cancel(); } - block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::None, - MemoryAttribute::Locked); - block_manager->Update(dst_addr, num_pages, MemoryState::Stack, MemoryPermission::ReadAndWrite); + block_manager->Update(src_addr, num_pages, src_state, KMemoryPermission::None, + KMemoryAttribute::Locked); + block_manager->Update(dst_addr, num_pages, KMemoryState::Stack, + KMemoryPermission::ReadAndWrite); return RESULT_SUCCESS; } -ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { +ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { std::lock_guard lock{page_table_lock}; - MemoryState src_state{}; + KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( - &src_state, nullptr, nullptr, src_addr, size, MemoryState::FlagCanAlias, - MemoryState::FlagCanAlias, MemoryPermission::Mask, MemoryPermission::None, - MemoryAttribute::Mask, MemoryAttribute::Locked, MemoryAttribute::IpcAndDeviceMapped)); - - MemoryPermission dst_perm{}; - CASCADE_CODE(CheckMemoryState(nullptr, &dst_perm, nullptr, dst_addr, size, MemoryState::All, - MemoryState::Stack, MemoryPermission::None, - MemoryPermission::None, MemoryAttribute::Mask, - MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); - - PageLinkedList src_pages; - PageLinkedList dst_pages; + &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias, + KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::None, + KMemoryAttribute::Mask, KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); + + KMemoryPermission dst_perm{}; + CASCADE_CODE(CheckMemoryState(nullptr, &dst_perm, nullptr, dst_addr, size, KMemoryState::All, + KMemoryState::Stack, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); + + KPageLinkedList src_pages; + KPageLinkedList dst_pages; const std::size_t num_pages{size / PageSize}; AddRegionToPages(src_addr, num_pages, src_pages); AddRegionToPages(dst_addr, num_pages, dst_pages); if (!dst_pages.IsEqual(src_pages)) { - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } { auto block_guard = detail::ScopeExit([&] { MapPages(dst_addr, dst_pages, dst_perm); }); - CASCADE_CODE(Operate(dst_addr, num_pages, MemoryPermission::None, OperationType::Unmap)); - CASCADE_CODE(Operate(src_addr, num_pages, MemoryPermission::ReadAndWrite, + CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap)); + CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::ReadAndWrite, OperationType::ChangePermissions)); block_guard.Cancel(); } - block_manager->Update(src_addr, num_pages, src_state, MemoryPermission::ReadAndWrite); - block_manager->Update(dst_addr, num_pages, MemoryState::Free); + block_manager->Update(src_addr, num_pages, src_state, KMemoryPermission::ReadAndWrite); + block_manager->Update(dst_addr, num_pages, KMemoryState::Free); return RESULT_SUCCESS; } -ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_list, - MemoryPermission perm) { +ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_list, + KMemoryPermission perm) { VAddr cur_addr{addr}; for (const auto& node : page_linked_list.Nodes()) { @@ -606,8 +607,8 @@ ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_lis result.IsError()) { const std::size_t num_pages{(addr - cur_addr) / PageSize}; - ASSERT( - Operate(addr, num_pages, MemoryPermission::None, OperationType::Unmap).IsSuccess()); + ASSERT(Operate(addr, num_pages, KMemoryPermission::None, OperationType::Unmap) + .IsSuccess()); return result; } @@ -618,19 +619,19 @@ ResultCode PageTable::MapPages(VAddr addr, const PageLinkedList& page_linked_lis return RESULT_SUCCESS; } -ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state, - MemoryPermission perm) { +ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, + KMemoryPermission perm) { std::lock_guard lock{page_table_lock}; const std::size_t num_pages{page_linked_list.GetNumPages()}; const std::size_t size{num_pages * PageSize}; if (!CanContain(addr, size, state)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (IsRegionMapped(addr, num_pages * PageSize)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } CASCADE_CODE(MapPages(addr, page_linked_list, perm)); @@ -640,26 +641,27 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem return RESULT_SUCCESS; } -ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm) { +ResultCode KPageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, + KMemoryPermission perm) { std::lock_guard lock{page_table_lock}; - MemoryState prev_state{}; - MemoryPermission prev_perm{}; + KMemoryState prev_state{}; + KMemoryPermission prev_perm{}; CASCADE_CODE(CheckMemoryState( - &prev_state, &prev_perm, nullptr, addr, size, MemoryState::FlagCode, MemoryState::FlagCode, - MemoryPermission::None, MemoryPermission::None, MemoryAttribute::Mask, - MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); + &prev_state, &prev_perm, nullptr, addr, size, KMemoryState::FlagCode, + KMemoryState::FlagCode, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); - MemoryState state{prev_state}; + KMemoryState state{prev_state}; // Ensure state is mutable if permission allows write - if ((perm & MemoryPermission::Write) != MemoryPermission::None) { - if (prev_state == MemoryState::Code) { - state = MemoryState::CodeData; - } else if (prev_state == MemoryState::AliasCode) { - state = MemoryState::AliasCodeData; + if ((perm & KMemoryPermission::Write) != KMemoryPermission::None) { + if (prev_state == KMemoryState::Code) { + state = KMemoryState::CodeData; + } else if (prev_state == KMemoryState::AliasCode) { + state = KMemoryState::AliasCodeData; } else { UNREACHABLE(); } @@ -670,13 +672,13 @@ ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, Memo return RESULT_SUCCESS; } - if ((prev_perm & MemoryPermission::Execute) != (perm & MemoryPermission::Execute)) { + if ((prev_perm & KMemoryPermission::Execute) != (perm & KMemoryPermission::Execute)) { // Memory execution state is changing, invalidate CPU cache range system.InvalidateCpuInstructionCacheRange(addr, size); } const std::size_t num_pages{size / PageSize}; - const OperationType operation{(perm & MemoryPermission::Execute) != MemoryPermission::None + const OperationType operation{(perm & KMemoryPermission::Execute) != KMemoryPermission::None ? OperationType::ChangePermissionsAndRefresh : OperationType::ChangePermissions}; @@ -687,69 +689,69 @@ ResultCode PageTable::SetCodeMemoryPermission(VAddr addr, std::size_t size, Memo return RESULT_SUCCESS; } -MemoryInfo PageTable::QueryInfoImpl(VAddr addr) { +KMemoryInfo KPageTable::QueryInfoImpl(VAddr addr) { std::lock_guard lock{page_table_lock}; return block_manager->FindBlock(addr).GetMemoryInfo(); } -MemoryInfo PageTable::QueryInfo(VAddr addr) { +KMemoryInfo KPageTable::QueryInfo(VAddr addr) { if (!Contains(addr, 1)) { - return {address_space_end, 0 - address_space_end, MemoryState::Inaccessible, - MemoryPermission::None, MemoryAttribute::None, MemoryPermission::None}; + return {address_space_end, 0 - address_space_end, KMemoryState::Inaccessible, + KMemoryPermission::None, KMemoryAttribute::None, KMemoryPermission::None}; } return QueryInfoImpl(addr); } -ResultCode PageTable::ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm) { +ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm) { std::lock_guard lock{page_table_lock}; - MemoryState state{}; - MemoryAttribute attribute{}; + KMemoryState state{}; + KMemoryAttribute attribute{}; - CASCADE_CODE(CheckMemoryState(&state, nullptr, &attribute, addr, size, - MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, - MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, - MemoryPermission::Mask, MemoryPermission::ReadAndWrite, - MemoryAttribute::Mask, MemoryAttribute::None, - MemoryAttribute::IpcAndDeviceMapped)); + CASCADE_CODE(CheckMemoryState( + &state, nullptr, &attribute, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::Mask, + KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, + KMemoryAttribute::IpcAndDeviceMapped)); - block_manager->Update(addr, size / PageSize, state, perm, attribute | MemoryAttribute::Locked); + block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked); return RESULT_SUCCESS; } -ResultCode PageTable::ResetTransferMemory(VAddr addr, std::size_t size) { +ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; - MemoryState state{}; + KMemoryState state{}; - CASCADE_CODE(CheckMemoryState(&state, nullptr, nullptr, addr, size, - MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, - MemoryState::FlagCanTransfer | MemoryState::FlagReferenceCounted, - MemoryPermission::None, MemoryPermission::None, - MemoryAttribute::Mask, MemoryAttribute::Locked, - MemoryAttribute::IpcAndDeviceMapped)); + CASCADE_CODE( + CheckMemoryState(&state, nullptr, nullptr, addr, size, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, + KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); - block_manager->Update(addr, size / PageSize, state, MemoryPermission::ReadAndWrite); + block_manager->Update(addr, size / PageSize, state, KMemoryPermission::ReadAndWrite); return RESULT_SUCCESS; } -ResultCode PageTable::SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask, - MemoryAttribute value) { +ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask, + KMemoryAttribute value) { std::lock_guard lock{page_table_lock}; - MemoryState state{}; - MemoryPermission perm{}; - MemoryAttribute attribute{}; + KMemoryState state{}; + KMemoryPermission perm{}; + KMemoryAttribute attribute{}; - CASCADE_CODE(CheckMemoryState(&state, &perm, &attribute, addr, size, - MemoryState::FlagCanChangeAttribute, - MemoryState::FlagCanChangeAttribute, MemoryPermission::None, - MemoryPermission::None, MemoryAttribute::LockedAndIpcLocked, - MemoryAttribute::None, MemoryAttribute::DeviceSharedAndUncached)); + CASCADE_CODE(CheckMemoryState( + &state, &perm, &attribute, addr, size, KMemoryState::FlagCanChangeAttribute, + KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None, + KMemoryAttribute::DeviceSharedAndUncached)); attribute = attribute & ~mask; attribute = attribute | (mask & value); @@ -759,16 +761,16 @@ ResultCode PageTable::SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAtt return RESULT_SUCCESS; } -ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) { +ResultCode KPageTable::SetHeapCapacity(std::size_t new_heap_capacity) { std::lock_guard lock{page_table_lock}; heap_capacity = new_heap_capacity; return RESULT_SUCCESS; } -ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { +ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) { if (size > heap_region_end - heap_region_start) { - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } const u64 previous_heap_size{GetHeapSize()}; @@ -781,27 +783,34 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { const u64 delta{size - previous_heap_size}; - auto process{system.Kernel().CurrentProcess()}; - if (process->GetResourceLimit() && delta != 0 && - !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) { - return ERR_RESOURCE_LIMIT_EXCEEDED; + // Reserve memory for the heap extension. + KScopedResourceReservation memory_reservation( + system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, + delta); + + if (!memory_reservation.Succeeded()) { + LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta); + return ResultResourceLimitedExceeded; } - PageLinkedList page_linked_list; + KPageLinkedList page_linked_list; const std::size_t num_pages{delta / PageSize}; CASCADE_CODE( system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); if (IsRegionMapped(current_heap_addr, delta)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } CASCADE_CODE( Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); - block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, - MemoryPermission::ReadAndWrite); + // Succeeded in allocation, commit the resource reservation + memory_reservation.Commit(); + + block_manager->Update(current_heap_addr, num_pages, KMemoryState::Normal, + KMemoryPermission::ReadAndWrite); current_heap_addr = heap_region_start + size; } @@ -809,30 +818,30 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { return MakeResult<VAddr>(heap_region_start); } -ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, - bool is_map_only, VAddr region_start, - std::size_t region_num_pages, MemoryState state, - MemoryPermission perm, PAddr map_addr) { +ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, + bool is_map_only, VAddr region_start, + std::size_t region_num_pages, KMemoryState state, + KMemoryPermission perm, PAddr map_addr) { std::lock_guard lock{page_table_lock}; if (!CanContain(region_start, region_num_pages * PageSize, state)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (region_num_pages <= needed_num_pages) { - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } const VAddr addr{ AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; if (!addr) { - return ERR_OUT_OF_MEMORY; + return ResultOutOfMemory; } if (is_map_only) { CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); } else { - PageLinkedList page_group; + KPageLinkedList page_group; CASCADE_CODE( system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool)); CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); @@ -843,22 +852,22 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s return MakeResult<VAddr>(addr); } -ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { +ResultCode KPageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; - MemoryPermission perm{}; + KMemoryPermission perm{}; if (const ResultCode result{CheckMemoryState( - nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute, - MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None, - MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None, - MemoryAttribute::DeviceSharedAndUncached)}; + nullptr, &perm, nullptr, addr, size, KMemoryState::FlagCanChangeAttribute, + KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None, + KMemoryAttribute::DeviceSharedAndUncached)}; result.IsError()) { return result; } block_manager->UpdateLock( addr, size / PageSize, - [](MemoryBlockManager::iterator block, MemoryPermission perm) { + [](KMemoryBlockManager::iterator block, KMemoryPermission perm) { block->ShareToDevice(perm); }, perm); @@ -866,22 +875,22 @@ ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { return RESULT_SUCCESS; } -ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) { +ResultCode KPageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) { std::lock_guard lock{page_table_lock}; - MemoryPermission perm{}; + KMemoryPermission perm{}; if (const ResultCode result{CheckMemoryState( - nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute, - MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None, - MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None, - MemoryAttribute::DeviceSharedAndUncached)}; + nullptr, &perm, nullptr, addr, size, KMemoryState::FlagCanChangeAttribute, + KMemoryState::FlagCanChangeAttribute, KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::LockedAndIpcLocked, KMemoryAttribute::None, + KMemoryAttribute::DeviceSharedAndUncached)}; result.IsError()) { return result; } block_manager->UpdateLock( addr, size / PageSize, - [](MemoryBlockManager::iterator block, MemoryPermission perm) { + [](KMemoryBlockManager::iterator block, KMemoryPermission perm) { block->UnshareToDevice(perm); }, perm); @@ -889,20 +898,21 @@ ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) return RESULT_SUCCESS; } -ResultCode PageTable::InitializeMemoryLayout(VAddr start, VAddr end) { - block_manager = std::make_unique<MemoryBlockManager>(start, end); +ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) { + block_manager = std::make_unique<KMemoryBlockManager>(start, end); return RESULT_SUCCESS; } -bool PageTable::IsRegionMapped(VAddr address, u64 size) { - return CheckMemoryState(address, size, MemoryState::All, MemoryState::Free, - MemoryPermission::Mask, MemoryPermission::None, MemoryAttribute::Mask, - MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped) +bool KPageTable::IsRegionMapped(VAddr address, u64 size) { + return CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::Mask, KMemoryPermission::None, + KMemoryAttribute::Mask, KMemoryAttribute::None, + KMemoryAttribute::IpcAndDeviceMapped) .IsError(); } -bool PageTable::IsRegionContiguous(VAddr addr, u64 size) const { +bool KPageTable::IsRegionContiguous(VAddr addr, u64 size) const { auto start_ptr = system.Memory().GetPointer(addr); for (u64 offset{}; offset < size; offset += PageSize) { if (start_ptr != system.Memory().GetPointer(addr + offset)) { @@ -913,8 +923,8 @@ bool PageTable::IsRegionContiguous(VAddr addr, u64 size) const { return true; } -void PageTable::AddRegionToPages(VAddr start, std::size_t num_pages, - PageLinkedList& page_linked_list) { +void KPageTable::AddRegionToPages(VAddr start, std::size_t num_pages, + KPageLinkedList& page_linked_list) { VAddr addr{start}; while (addr < start + (num_pages * PageSize)) { const PAddr paddr{GetPhysicalAddr(addr)}; @@ -926,8 +936,8 @@ void PageTable::AddRegionToPages(VAddr start, std::size_t num_pages, } } -VAddr PageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, - u64 needed_num_pages, std::size_t align) { +VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, + u64 needed_num_pages, std::size_t align) { if (is_aslr_enabled) { UNIMPLEMENTED(); } @@ -935,8 +945,8 @@ VAddr PageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_pages IsKernel() ? 1 : 4); } -ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group, - OperationType operation) { +ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group, + OperationType operation) { std::lock_guard lock{page_table_lock}; ASSERT(Common::IsAligned(addr, PageSize)); @@ -960,8 +970,8 @@ ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, const PageLinke return RESULT_SUCCESS; } -ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm, - OperationType operation, PAddr map_addr) { +ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, + OperationType operation, PAddr map_addr) { std::lock_guard lock{page_table_lock}; ASSERT(num_pages > 0); @@ -987,34 +997,34 @@ ResultCode PageTable::Operate(VAddr addr, std::size_t num_pages, MemoryPermissio return RESULT_SUCCESS; } -constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const { +constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const { switch (state) { - case MemoryState::Free: - case MemoryState::Kernel: + case KMemoryState::Free: + case KMemoryState::Kernel: return address_space_start; - case MemoryState::Normal: + case KMemoryState::Normal: return heap_region_start; - case MemoryState::Ipc: - case MemoryState::NonSecureIpc: - case MemoryState::NonDeviceIpc: + case KMemoryState::Ipc: + case KMemoryState::NonSecureIpc: + case KMemoryState::NonDeviceIpc: return alias_region_start; - case MemoryState::Stack: + case KMemoryState::Stack: return stack_region_start; - case MemoryState::Io: - case MemoryState::Static: - case MemoryState::ThreadLocal: + case KMemoryState::Io: + case KMemoryState::Static: + case KMemoryState::ThreadLocal: return kernel_map_region_start; - case MemoryState::Shared: - case MemoryState::AliasCode: - case MemoryState::AliasCodeData: - case MemoryState::Transferred: - case MemoryState::SharedTransferred: - case MemoryState::SharedCode: - case MemoryState::GeneratedCode: - case MemoryState::CodeOut: + case KMemoryState::Shared: + case KMemoryState::AliasCode: + case KMemoryState::AliasCodeData: + case KMemoryState::Transferred: + case KMemoryState::SharedTransferred: + case KMemoryState::SharedCode: + case KMemoryState::GeneratedCode: + case KMemoryState::CodeOut: return alias_code_region_start; - case MemoryState::Code: - case MemoryState::CodeData: + case KMemoryState::Code: + case KMemoryState::CodeData: return code_region_start; default: UNREACHABLE(); @@ -1022,34 +1032,34 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const { } } -constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const { +constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const { switch (state) { - case MemoryState::Free: - case MemoryState::Kernel: + case KMemoryState::Free: + case KMemoryState::Kernel: return address_space_end - address_space_start; - case MemoryState::Normal: + case KMemoryState::Normal: return heap_region_end - heap_region_start; - case MemoryState::Ipc: - case MemoryState::NonSecureIpc: - case MemoryState::NonDeviceIpc: + case KMemoryState::Ipc: + case KMemoryState::NonSecureIpc: + case KMemoryState::NonDeviceIpc: return alias_region_end - alias_region_start; - case MemoryState::Stack: + case KMemoryState::Stack: return stack_region_end - stack_region_start; - case MemoryState::Io: - case MemoryState::Static: - case MemoryState::ThreadLocal: + case KMemoryState::Io: + case KMemoryState::Static: + case KMemoryState::ThreadLocal: return kernel_map_region_end - kernel_map_region_start; - case MemoryState::Shared: - case MemoryState::AliasCode: - case MemoryState::AliasCodeData: - case MemoryState::Transferred: - case MemoryState::SharedTransferred: - case MemoryState::SharedCode: - case MemoryState::GeneratedCode: - case MemoryState::CodeOut: + case KMemoryState::Shared: + case KMemoryState::AliasCode: + case KMemoryState::AliasCodeData: + case KMemoryState::Transferred: + case KMemoryState::SharedTransferred: + case KMemoryState::SharedCode: + case KMemoryState::GeneratedCode: + case KMemoryState::CodeOut: return alias_code_region_end - alias_code_region_start; - case MemoryState::Code: - case MemoryState::CodeData: + case KMemoryState::Code: + case KMemoryState::CodeData: return code_region_end - code_region_start; default: UNREACHABLE(); @@ -1057,7 +1067,7 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const { } } -constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState state) const { +constexpr bool KPageTable::CanContain(VAddr addr, std::size_t size, KMemoryState state) const { const VAddr end{addr + size}; const VAddr last{end - 1}; const VAddr region_start{GetRegionAddress(state)}; @@ -1068,30 +1078,30 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s const bool is_in_alias{!(end <= alias_region_start || alias_region_end <= addr)}; switch (state) { - case MemoryState::Free: - case MemoryState::Kernel: + case KMemoryState::Free: + case KMemoryState::Kernel: return is_in_region; - case MemoryState::Io: - case MemoryState::Static: - case MemoryState::Code: - case MemoryState::CodeData: - case MemoryState::Shared: - case MemoryState::AliasCode: - case MemoryState::AliasCodeData: - case MemoryState::Stack: - case MemoryState::ThreadLocal: - case MemoryState::Transferred: - case MemoryState::SharedTransferred: - case MemoryState::SharedCode: - case MemoryState::GeneratedCode: - case MemoryState::CodeOut: + case KMemoryState::Io: + case KMemoryState::Static: + case KMemoryState::Code: + case KMemoryState::CodeData: + case KMemoryState::Shared: + case KMemoryState::AliasCode: + case KMemoryState::AliasCodeData: + case KMemoryState::Stack: + case KMemoryState::ThreadLocal: + case KMemoryState::Transferred: + case KMemoryState::SharedTransferred: + case KMemoryState::SharedCode: + case KMemoryState::GeneratedCode: + case KMemoryState::CodeOut: return is_in_region && !is_in_heap && !is_in_alias; - case MemoryState::Normal: + case KMemoryState::Normal: ASSERT(is_in_heap); return is_in_region && !is_in_alias; - case MemoryState::Ipc: - case MemoryState::NonSecureIpc: - case MemoryState::NonDeviceIpc: + case KMemoryState::Ipc: + case KMemoryState::NonSecureIpc: + case KMemoryState::NonDeviceIpc: ASSERT(is_in_alias); return is_in_region && !is_in_heap; default: @@ -1099,53 +1109,54 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s } } -constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryState state_mask, - MemoryState state, MemoryPermission perm_mask, - MemoryPermission perm, MemoryAttribute attr_mask, - MemoryAttribute attr) const { +constexpr ResultCode KPageTable::CheckMemoryState(const KMemoryInfo& info, KMemoryState state_mask, + KMemoryState state, KMemoryPermission perm_mask, + KMemoryPermission perm, + KMemoryAttribute attr_mask, + KMemoryAttribute attr) const { // Validate the states match expectation if ((info.state & state_mask) != state) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if ((info.perm & perm_mask) != perm) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if ((info.attribute & attr_mask) != attr) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } return RESULT_SUCCESS; } -ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm, - MemoryAttribute* out_attr, VAddr addr, std::size_t size, - MemoryState state_mask, MemoryState state, - MemoryPermission perm_mask, MemoryPermission perm, - MemoryAttribute attr_mask, MemoryAttribute attr, - MemoryAttribute ignore_attr) { +ResultCode KPageTable::CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, + KMemoryAttribute* out_attr, VAddr addr, std::size_t size, + KMemoryState state_mask, KMemoryState state, + KMemoryPermission perm_mask, KMemoryPermission perm, + KMemoryAttribute attr_mask, KMemoryAttribute attr, + KMemoryAttribute ignore_attr) { std::lock_guard lock{page_table_lock}; // Get information about the first block const VAddr last_addr{addr + size - 1}; - MemoryBlockManager::const_iterator it{block_manager->FindIterator(addr)}; - MemoryInfo info{it->GetMemoryInfo()}; + KMemoryBlockManager::const_iterator it{block_manager->FindIterator(addr)}; + KMemoryInfo info{it->GetMemoryInfo()}; // Validate all blocks in the range have correct state - const MemoryState first_state{info.state}; - const MemoryPermission first_perm{info.perm}; - const MemoryAttribute first_attr{info.attribute}; + const KMemoryState first_state{info.state}; + const KMemoryPermission first_perm{info.perm}; + const KMemoryAttribute first_attr{info.attribute}; while (true) { // Validate the current block if (!(info.state == first_state)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!(info.perm == first_perm)) { - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == - (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { - return ERR_INVALID_ADDRESS_STATE; + if (!((info.attribute | static_cast<KMemoryAttribute>(ignore_attr)) == + (first_attr | static_cast<KMemoryAttribute>(ignore_attr)))) { + return ResultInvalidCurrentMemory; } // Validate against the provided masks @@ -1170,10 +1181,10 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission* *out_perm = first_perm; } if (out_attr) { - *out_attr = first_attr & static_cast<MemoryAttribute>(~ignore_attr); + *out_attr = first_attr & static_cast<KMemoryAttribute>(~ignore_attr); } return RESULT_SUCCESS; } -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/memory/page_table.h b/src/core/hle/kernel/k_page_table.h index ce0d388495..49b8243797 100644 --- a/src/core/hle/kernel/memory/page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -10,27 +10,27 @@ #include "common/common_types.h" #include "common/page_table.h" #include "core/file_sys/program_metadata.h" -#include "core/hle/kernel/memory/memory_block.h" -#include "core/hle/kernel/memory/memory_manager.h" +#include "core/hle/kernel/k_memory_block.h" +#include "core/hle/kernel/k_memory_manager.h" #include "core/hle/result.h" namespace Core { class System; } -namespace Kernel::Memory { +namespace Kernel { -class MemoryBlockManager; +class KMemoryBlockManager; -class PageTable final : NonCopyable { +class KPageTable final : NonCopyable { public: - explicit PageTable(Core::System& system); + explicit KPageTable(Core::System& system); ResultCode InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, VAddr code_addr, std::size_t code_size, - Memory::MemoryManager::Pool pool); - ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, MemoryState state, - MemoryPermission perm); + KMemoryManager::Pool pool); + ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state, + KMemoryPermission perm); ResultCode MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode UnmapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); @@ -38,20 +38,20 @@ public: ResultCode UnmapMemory(VAddr addr, std::size_t size); ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size); ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size); - ResultCode MapPages(VAddr addr, PageLinkedList& page_linked_list, MemoryState state, - MemoryPermission perm); - ResultCode SetCodeMemoryPermission(VAddr addr, std::size_t size, MemoryPermission perm); - MemoryInfo QueryInfo(VAddr addr); - ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, MemoryPermission perm); + ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, + KMemoryPermission perm); + ResultCode SetCodeMemoryPermission(VAddr addr, std::size_t size, KMemoryPermission perm); + KMemoryInfo QueryInfo(VAddr addr); + ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm); ResultCode ResetTransferMemory(VAddr addr, std::size_t size); - ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, MemoryAttribute mask, - MemoryAttribute value); + ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask, + KMemoryAttribute value); ResultCode SetHeapCapacity(std::size_t new_heap_capacity); ResultVal<VAddr> SetHeapSize(std::size_t size); ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, bool is_map_only, VAddr region_start, - std::size_t region_num_pages, MemoryState state, - MemoryPermission perm, PAddr map_addr = 0); + std::size_t region_num_pages, KMemoryState state, + KMemoryPermission perm, PAddr map_addr = 0); ResultCode LockForDeviceAddressSpace(VAddr addr, std::size_t size); ResultCode UnlockForDeviceAddressSpace(VAddr addr, std::size_t size); @@ -72,47 +72,49 @@ private: ChangePermissionsAndRefresh, }; - static constexpr MemoryAttribute DefaultMemoryIgnoreAttr = - MemoryAttribute::DontCareMask | MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared; + static constexpr KMemoryAttribute DefaultMemoryIgnoreAttr = KMemoryAttribute::DontCareMask | + KMemoryAttribute::IpcLocked | + KMemoryAttribute::DeviceShared; ResultCode InitializeMemoryLayout(VAddr start, VAddr end); - ResultCode MapPages(VAddr addr, const PageLinkedList& page_linked_list, MemoryPermission perm); - void MapPhysicalMemory(PageLinkedList& page_linked_list, VAddr start, VAddr end); + ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, + KMemoryPermission perm); + void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end); bool IsRegionMapped(VAddr address, u64 size); bool IsRegionContiguous(VAddr addr, u64 size) const; - void AddRegionToPages(VAddr start, std::size_t num_pages, PageLinkedList& page_linked_list); - MemoryInfo QueryInfoImpl(VAddr addr); + void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list); + KMemoryInfo QueryInfoImpl(VAddr addr); VAddr AllocateVirtualMemory(VAddr start, std::size_t region_num_pages, u64 needed_num_pages, std::size_t align); - ResultCode Operate(VAddr addr, std::size_t num_pages, const PageLinkedList& page_group, + ResultCode Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group, OperationType operation); - ResultCode Operate(VAddr addr, std::size_t num_pages, MemoryPermission perm, + ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, OperationType operation, PAddr map_addr = 0); - constexpr VAddr GetRegionAddress(MemoryState state) const; - constexpr std::size_t GetRegionSize(MemoryState state) const; - constexpr bool CanContain(VAddr addr, std::size_t size, MemoryState state) const; + constexpr VAddr GetRegionAddress(KMemoryState state) const; + constexpr std::size_t GetRegionSize(KMemoryState state) const; + constexpr bool CanContain(VAddr addr, std::size_t size, KMemoryState state) const; - constexpr ResultCode CheckMemoryState(const MemoryInfo& info, MemoryState state_mask, - MemoryState state, MemoryPermission perm_mask, - MemoryPermission perm, MemoryAttribute attr_mask, - MemoryAttribute attr) const; - ResultCode CheckMemoryState(MemoryState* out_state, MemoryPermission* out_perm, - MemoryAttribute* out_attr, VAddr addr, std::size_t size, - MemoryState state_mask, MemoryState state, - MemoryPermission perm_mask, MemoryPermission perm, - MemoryAttribute attr_mask, MemoryAttribute attr, - MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr); - ResultCode CheckMemoryState(VAddr addr, std::size_t size, MemoryState state_mask, - MemoryState state, MemoryPermission perm_mask, - MemoryPermission perm, MemoryAttribute attr_mask, - MemoryAttribute attr, - MemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) { + constexpr ResultCode CheckMemoryState(const KMemoryInfo& info, KMemoryState state_mask, + KMemoryState state, KMemoryPermission perm_mask, + KMemoryPermission perm, KMemoryAttribute attr_mask, + KMemoryAttribute attr) const; + ResultCode CheckMemoryState(KMemoryState* out_state, KMemoryPermission* out_perm, + KMemoryAttribute* out_attr, VAddr addr, std::size_t size, + KMemoryState state_mask, KMemoryState state, + KMemoryPermission perm_mask, KMemoryPermission perm, + KMemoryAttribute attr_mask, KMemoryAttribute attr, + KMemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr); + ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask, + KMemoryState state, KMemoryPermission perm_mask, + KMemoryPermission perm, KMemoryAttribute attr_mask, + KMemoryAttribute attr, + KMemoryAttribute ignore_attr = DefaultMemoryIgnoreAttr) { return CheckMemoryState(nullptr, nullptr, nullptr, addr, size, state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr); } std::recursive_mutex page_table_lock; - std::unique_ptr<MemoryBlockManager> block_manager; + std::unique_ptr<KMemoryBlockManager> block_manager; public: constexpr VAddr GetAddressSpaceStart() const { @@ -212,7 +214,7 @@ public: return !IsOutsideASLRRegion(address, size); } constexpr PAddr GetPhysicalAddr(VAddr addr) { - return page_table_impl.backing_addr[addr >> Memory::PageBits] + addr; + return page_table_impl.backing_addr[addr >> PageBits] + addr; } private: @@ -267,11 +269,11 @@ private: bool is_kernel{}; bool is_aslr_enabled{}; - MemoryManager::Pool memory_pool{MemoryManager::Pool::Application}; + KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application}; Common::PageTable page_table_impl; Core::System& system; }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp index d8a42dbaf9..4b4d34857b 100644 --- a/src/core/hle/kernel/k_readable_event.cpp +++ b/src/core/hle/kernel/k_readable_event.cpp @@ -6,7 +6,6 @@ #include "common/assert.h" #include "common/common_funcs.h" #include "common/logging/log.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_thread.h" @@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() { KScopedSchedulerLock lk{kernel}; if (!is_signaled) { - return Svc::ResultInvalidState; + return ResultInvalidState; } is_signaled = false; diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index ab2ab683f7..d7a4a38e64 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp @@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const { ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { const auto index = static_cast<std::size_t>(which); KScopedLightLock lk(lock); - R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState); + R_UNLESS(current_values[index] <= value, ResultInvalidState); limit_values[index] = value; diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h new file mode 100644 index 0000000000..c5deca00b3 --- /dev/null +++ b/src/core/hle/kernel/k_scoped_resource_reservation.h @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file references various implementation details from Atmosphere, an open-source firmware for +// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. + +#pragma once + +#include "common/common_types.h" +#include "core/hle/kernel/k_resource_limit.h" +#include "core/hle/kernel/process.h" + +namespace Kernel { + +class KScopedResourceReservation { +public: + explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r, + s64 v, s64 timeout) + : resource_limit(std::move(l)), value(v), resource(r) { + if (resource_limit && value) { + success = resource_limit->Reserve(resource, value, timeout); + } else { + success = true; + } + } + + explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r, + s64 v = 1) + : resource_limit(std::move(l)), value(v), resource(r) { + if (resource_limit && value) { + success = resource_limit->Reserve(resource, value); + } else { + success = true; + } + } + + explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t) + : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {} + + explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1) + : KScopedResourceReservation(p->GetResourceLimit(), r, v) {} + + ~KScopedResourceReservation() noexcept { + if (resource_limit && value && success) { + // resource was not committed, release the reservation. + resource_limit->Release(resource, value); + } + } + + /// Commit the resource reservation, destruction of this object does not release the resource + void Commit() { + resource_limit = nullptr; + } + + [[nodiscard]] bool Succeeded() const { + return success; + } + +private: + std::shared_ptr<KResourceLimit> resource_limit; + s64 value; + LimitableResource resource; + bool success; +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_shared_memory.cpp b/src/core/hle/kernel/k_shared_memory.cpp new file mode 100644 index 0000000000..9b14f42b58 --- /dev/null +++ b/src/core/hle/kernel/k_shared_memory.cpp @@ -0,0 +1,65 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/core.h" +#include "core/hle/kernel/k_page_table.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" +#include "core/hle/kernel/k_shared_memory.h" +#include "core/hle/kernel/kernel.h" + +namespace Kernel { + +KSharedMemory::KSharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) + : Object{kernel}, device_memory{device_memory} {} + +KSharedMemory::~KSharedMemory() { + kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size); +} + +std::shared_ptr<KSharedMemory> KSharedMemory::Create( + KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, + KPageLinkedList&& page_list, KMemoryPermission owner_permission, + KMemoryPermission user_permission, PAddr physical_address, std::size_t size, std::string name) { + + const auto resource_limit = kernel.GetSystemResourceLimit(); + KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory, + size); + ASSERT(memory_reservation.Succeeded()); + + std::shared_ptr<KSharedMemory> shared_memory{ + std::make_shared<KSharedMemory>(kernel, device_memory)}; + + shared_memory->owner_process = owner_process; + shared_memory->page_list = std::move(page_list); + shared_memory->owner_permission = owner_permission; + shared_memory->user_permission = user_permission; + shared_memory->physical_address = physical_address; + shared_memory->size = size; + shared_memory->name = name; + + memory_reservation.Commit(); + return shared_memory; +} + +ResultCode KSharedMemory::Map(Process& target_process, VAddr address, std::size_t size, + KMemoryPermission permissions) { + const u64 page_count{(size + PageSize - 1) / PageSize}; + + if (page_list.GetNumPages() != page_count) { + UNIMPLEMENTED_MSG("Page count does not match"); + } + + const KMemoryPermission expected = + &target_process == owner_process ? owner_permission : user_permission; + + if (permissions != expected) { + UNIMPLEMENTED_MSG("Permission does not match"); + } + + return target_process.PageTable().MapPages(address, page_list, KMemoryState::Shared, + permissions); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/k_shared_memory.h index 623bd8b115..016e34be5c 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/k_shared_memory.h @@ -9,8 +9,8 @@ #include "common/common_types.h" #include "core/device_memory.h" -#include "core/hle/kernel/memory/memory_block.h" -#include "core/hle/kernel/memory/page_linked_list.h" +#include "core/hle/kernel/k_memory_block.h" +#include "core/hle/kernel/k_page_linked_list.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" @@ -19,15 +19,15 @@ namespace Kernel { class KernelCore; -class SharedMemory final : public Object { +class KSharedMemory final : public Object { public: - explicit SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory); - ~SharedMemory() override; + explicit KSharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory); + ~KSharedMemory() override; - static std::shared_ptr<SharedMemory> Create( + static std::shared_ptr<KSharedMemory> Create( KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, - Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission, - Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, + KPageLinkedList&& page_list, KMemoryPermission owner_permission, + KMemoryPermission user_permission, PAddr physical_address, std::size_t size, std::string name); std::string GetTypeName() const override { @@ -51,7 +51,7 @@ public: * @param permissions Memory block map permissions (specified by SVC field) */ ResultCode Map(Process& target_process, VAddr address, std::size_t size, - Memory::MemoryPermission permissions); + KMemoryPermission permissions); /** * Gets a pointer to the shared memory block @@ -76,9 +76,9 @@ public: private: Core::DeviceMemory& device_memory; Process* owner_process{}; - Memory::PageLinkedList page_list; - Memory::MemoryPermission owner_permission{}; - Memory::MemoryPermission user_permission{}; + KPageLinkedList page_list; + KMemoryPermission owner_permission{}; + KMemoryPermission user_permission{}; PAddr physical_address{}; std::size_t size{}; std::string name; diff --git a/src/core/hle/kernel/memory/slab_heap.h b/src/core/hle/kernel/k_slab_heap.h index 465eaddb32..aa4471d2f3 100644 --- a/src/core/hle/kernel/memory/slab_heap.h +++ b/src/core/hle/kernel/k_slab_heap.h @@ -2,9 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - #pragma once #include <atomic> @@ -12,17 +9,17 @@ #include "common/assert.h" #include "common/common_types.h" -namespace Kernel::Memory { +namespace Kernel { namespace impl { -class SlabHeapImpl final : NonCopyable { +class KSlabHeapImpl final : NonCopyable { public: struct Node { Node* next{}; }; - constexpr SlabHeapImpl() = default; + constexpr KSlabHeapImpl() = default; void Initialize(std::size_t size) { ASSERT(head == nullptr); @@ -65,9 +62,9 @@ private: } // namespace impl -class SlabHeapBase : NonCopyable { +class KSlabHeapBase : NonCopyable { public: - constexpr SlabHeapBase() = default; + constexpr KSlabHeapBase() = default; constexpr bool Contains(uintptr_t addr) const { return start <= addr && addr < end; @@ -126,7 +123,7 @@ public: } private: - using Impl = impl::SlabHeapImpl; + using Impl = impl::KSlabHeapImpl; Impl impl; uintptr_t peak{}; @@ -135,9 +132,9 @@ private: }; template <typename T> -class SlabHeap final : public SlabHeapBase { +class KSlabHeap final : public KSlabHeapBase { public: - constexpr SlabHeap() : SlabHeapBase() {} + constexpr KSlabHeap() : KSlabHeapBase() {} void Initialize(void* memory, std::size_t memory_size) { InitializeImpl(sizeof(T), memory, memory_size); @@ -160,4 +157,4 @@ public: } }; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/k_spin_lock.cpp b/src/core/hle/kernel/k_spin_lock.cpp new file mode 100644 index 0000000000..4412aa4bb2 --- /dev/null +++ b/src/core/hle/kernel/k_spin_lock.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/hle/kernel/k_spin_lock.h" + +#if _MSC_VER +#include <intrin.h> +#if _M_AMD64 +#define __x86_64__ 1 +#endif +#if _M_ARM64 +#define __aarch64__ 1 +#endif +#else +#if __x86_64__ +#include <xmmintrin.h> +#endif +#endif + +namespace { + +void ThreadPause() { +#if __x86_64__ + _mm_pause(); +#elif __aarch64__ && _MSC_VER + __yield(); +#elif __aarch64__ + asm("yield"); +#endif +} + +} // namespace + +namespace Kernel { + +void KSpinLock::Lock() { + while (lck.test_and_set(std::memory_order_acquire)) { + ThreadPause(); + } +} + +void KSpinLock::Unlock() { + lck.clear(std::memory_order_release); +} + +bool KSpinLock::TryLock() { + if (lck.test_and_set(std::memory_order_acquire)) { + return false; + } + return true; +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_spin_lock.h b/src/core/hle/kernel/k_spin_lock.h new file mode 100644 index 0000000000..12c4b2e889 --- /dev/null +++ b/src/core/hle/kernel/k_spin_lock.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> + +#include "core/hle/kernel/k_scoped_lock.h" + +namespace Kernel { + +class KSpinLock { +public: + KSpinLock() = default; + + KSpinLock(const KSpinLock&) = delete; + KSpinLock& operator=(const KSpinLock&) = delete; + + KSpinLock(KSpinLock&&) = delete; + KSpinLock& operator=(KSpinLock&&) = delete; + + void Lock(); + void Unlock(); + [[nodiscard]] bool TryLock(); + +private: + std::atomic_flag lck = ATOMIC_FLAG_INIT; +}; + +using KScopedSpinLock = KScopedLock<KSpinLock>; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp index 140cc46a72..82f72a0fe6 100644 --- a/src/core/hle/kernel/k_synchronization_object.cpp +++ b/src/core/hle/kernel/k_synchronization_object.cpp @@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, // Check if the timeout is zero. if (timeout == 0) { slp.CancelSleep(); - return Svc::ResultTimedOut; + return ResultTimedOut; } // Check if the thread should terminate. if (thread->IsTerminationRequested()) { slp.CancelSleep(); - return Svc::ResultTerminationRequested; + return ResultTerminationRequested; } // Check if waiting was canceled. if (thread->IsWaitCancelled()) { slp.CancelSleep(); thread->ClearWaitCancelled(); - return Svc::ResultCancelled; + return ResultCancelled; } // Add the waiters. @@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, // Mark the thread as waiting. thread->SetCancellable(); - thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); + thread->SetSyncedObject(nullptr, ResultTimedOut); thread->SetState(ThreadState::Waiting); thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); } diff --git a/src/core/hle/kernel/memory/system_control.cpp b/src/core/hle/kernel/k_system_control.cpp index 11d204bc29..aa1682f69c 100644 --- a/src/core/hle/kernel/memory/system_control.cpp +++ b/src/core/hle/kernel/k_system_control.cpp @@ -1,12 +1,13 @@ -// Copyright 2020 yuzu Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <random> -#include "core/hle/kernel/memory/system_control.h" +#include "core/hle/kernel/k_system_control.h" + +namespace Kernel { -namespace Kernel::Memory::SystemControl { namespace { template <typename F> u64 GenerateUniformRange(u64 min, u64 max, F f) { @@ -25,16 +26,17 @@ u64 GenerateUniformRange(u64 min, u64 max, F f) { } } -u64 GenerateRandomU64ForInit() { +} // Anonymous namespace + +u64 KSystemControl::GenerateRandomU64() { static std::random_device device; static std::mt19937 gen(device()); static std::uniform_int_distribution<u64> distribution(1, std::numeric_limits<u64>::max()); return distribution(gen); } -} // Anonymous namespace -u64 GenerateRandomRange(u64 min, u64 max) { - return GenerateUniformRange(min, max, GenerateRandomU64ForInit); +u64 KSystemControl::GenerateRandomRange(u64 min, u64 max) { + return GenerateUniformRange(min, max, GenerateRandomU64); } -} // namespace Kernel::Memory::SystemControl +} // namespace Kernel diff --git a/src/core/hle/kernel/k_system_control.h b/src/core/hle/kernel/k_system_control.h new file mode 100644 index 0000000000..1d5b64ffaa --- /dev/null +++ b/src/core/hle/kernel/k_system_control.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Kernel { + +class KSystemControl { +public: + KSystemControl() = default; + + static u64 GenerateRandomRange(u64 min, u64 max); + static u64 GenerateRandomU64(); +}; + +} // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index b59259c4f2..1661afbd97 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -18,16 +18,15 @@ #include "core/core.h" #include "core/cpu_manager.h" #include "core/hardware_properties.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/k_condition_variable.h" +#include "core/hle/kernel/k_memory_layout.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_thread_queue.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/memory_layout.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc_results.h" @@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s // Set core ID and wait result. core_id = phys_core; - wait_result = Svc::ResultNoSynchronizationObject; + wait_result = ResultNoSynchronizationObject; // Set priorities. priority = prio; @@ -238,7 +237,7 @@ void KThread::Finalize() { while (it != waiter_list.end()) { // The thread shouldn't be a kernel waiter. it->SetLockOwner(nullptr); - it->SetSyncedObject(nullptr, Svc::ResultInvalidState); + it->SetSyncedObject(nullptr, ResultInvalidState); it->Wakeup(); it = waiter_list.erase(it); } @@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) { // If the core id is no-update magic, preserve the ideal core id. if (core_id == Svc::IdealCoreNoUpdate) { core_id = virtual_ideal_core_id; - R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination); + R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination); } // Set the virtual core/affinity mask. @@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) { if (GetStackParameters().is_pinned) { // Verify that the current thread isn't terminating. R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), - Svc::ResultTerminationRequested); + ResultTerminationRequested); // Note that the thread was pinned. thread_is_pinned = true; @@ -604,7 +603,7 @@ void KThread::WaitCancel() { sleeping_queue->WakeupThread(this); wait_cancelled = true; } else { - SetSyncedObject(nullptr, Svc::ResultCancelled); + SetSyncedObject(nullptr, ResultCancelled); SetState(ThreadState::Runnable); wait_cancelled = false; } @@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { // Verify our state. const auto cur_state = GetState(); R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable), - Svc::ResultInvalidState); + ResultInvalidState); // Either pause or resume. if (activity == Svc::ThreadActivity::Paused) { // Verify that we're not suspended. - R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); + R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState); // Suspend. RequestSuspend(SuspendType::Thread); @@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { ASSERT(activity == Svc::ThreadActivity::Runnable); // Verify that we're suspended. - R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); + R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState); // Resume. Resume(SuspendType::Thread); @@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { if (GetStackParameters().is_pinned) { // Verify that the current thread isn't terminating. R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), - Svc::ResultTerminationRequested); + ResultTerminationRequested); // Note that the thread was pinned and not current. thread_is_pinned = true; @@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) { KScopedSchedulerLock sl{kernel}; // Verify that we're suspended. - R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); + R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState); // If we're not terminating, get the thread's user context. if (!IsTerminationRequested()) { @@ -783,7 +782,7 @@ void KThread::AddWaiterImpl(KThread* thread) { } // Keep track of how many kernel waiters we have. - if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + if (IsKernelAddressKey(thread->GetAddressKey())) { ASSERT((num_kernel_waiters++) >= 0); } @@ -796,7 +795,7 @@ void KThread::RemoveWaiterImpl(KThread* thread) { ASSERT(kernel.GlobalSchedulerContext().IsLocked()); // Keep track of how many kernel waiters we have. - if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + if (IsKernelAddressKey(thread->GetAddressKey())) { ASSERT((num_kernel_waiters--) > 0); } @@ -871,7 +870,7 @@ KThread* KThread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) { KThread* thread = std::addressof(*it); // Keep track of how many kernel waiters we have. - if (Memory::IsKernelAddressKey(thread->GetAddressKey())) { + if (IsKernelAddressKey(thread->GetAddressKey())) { ASSERT((num_kernel_waiters--) > 0); } it = waiter_list.erase(it); @@ -905,12 +904,11 @@ ResultCode KThread::Run() { KScopedSchedulerLock lk{kernel}; // If either this thread or the current thread are requesting termination, note it. - R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested); - R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), - Svc::ResultTerminationRequested); + R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested); + R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested); // Ensure our thread state is correct. - R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState); + R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState); // If the current thread has been asked to suspend, suspend it and retry. if (GetCurrentThread(kernel).IsSuspended()) { @@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) { // Check if the thread should terminate. if (IsTerminationRequested()) { slp.CancelSleep(); - return Svc::ResultTerminationRequested; + return ResultTerminationRequested; } // Mark the thread as waiting. diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index b20c2d13a0..331cf3a60f 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -26,19 +26,19 @@ #include "core/device_memory.h" #include "core/hardware_properties.h" #include "core/hle/kernel/client_port.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/k_memory_layout.h" +#include "core/hle/kernel/k_memory_manager.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_shared_memory.h" +#include "core/hle/kernel/k_slab_heap.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/memory_layout.h" -#include "core/hle/kernel/memory/memory_manager.h" -#include "core/hle/kernel/memory/slab_heap.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/service_thread.h" -#include "core/hle/kernel/shared_memory.h" +#include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/time_manager.h" #include "core/hle/lock.h" #include "core/hle/result.h" @@ -101,8 +101,6 @@ struct KernelCore::Impl { current_process = nullptr; - system_resource_limit = nullptr; - global_handle_table.Clear(); preemption_event = nullptr; @@ -111,6 +109,13 @@ struct KernelCore::Impl { exclusive_monitor.reset(); + hid_shared_mem = nullptr; + font_shared_mem = nullptr; + irs_shared_mem = nullptr; + time_shared_mem = nullptr; + + system_resource_limit = nullptr; + // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others next_host_thread_id = Core::Hardware::NUM_CPU_CORES; } @@ -141,11 +146,17 @@ struct KernelCore::Impl { ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess()); ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200) .IsSuccess()); - ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess()); + ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess()); - if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) { + // Derived from recent software updates. The kernel reserves 27MB + constexpr u64 kernel_size{0x1b00000}; + if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) { UNREACHABLE(); } + // Reserve secure applet memory, introduced in firmware 5.0.0 + constexpr u64 secure_applet_memory_size{0x400000}; + ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory, + secure_applet_memory_size)); } void InitializePreemption(KernelCore& kernel) { @@ -260,7 +271,7 @@ struct KernelCore::Impl { void InitializeMemoryLayout() { // Initialize memory layout - constexpr Memory::MemoryLayout layout{Memory::MemoryLayout::GetDefaultLayout()}; + constexpr KMemoryLayout layout{KMemoryLayout::GetDefaultLayout()}; constexpr std::size_t hid_size{0x40000}; constexpr std::size_t font_size{0x1100000}; constexpr std::size_t irs_size{0x8000}; @@ -271,39 +282,42 @@ struct KernelCore::Impl { constexpr PAddr time_addr{layout.System().StartAddress() + hid_size + font_size + irs_size}; // Initialize memory manager - memory_manager = std::make_unique<Memory::MemoryManager>(); - memory_manager->InitializeManager(Memory::MemoryManager::Pool::Application, + memory_manager = std::make_unique<KMemoryManager>(); + memory_manager->InitializeManager(KMemoryManager::Pool::Application, layout.Application().StartAddress(), layout.Application().EndAddress()); - memory_manager->InitializeManager(Memory::MemoryManager::Pool::Applet, + memory_manager->InitializeManager(KMemoryManager::Pool::Applet, layout.Applet().StartAddress(), layout.Applet().EndAddress()); - memory_manager->InitializeManager(Memory::MemoryManager::Pool::System, + memory_manager->InitializeManager(KMemoryManager::Pool::System, layout.System().StartAddress(), layout.System().EndAddress()); - hid_shared_mem = Kernel::SharedMemory::Create( - system.Kernel(), system.DeviceMemory(), nullptr, - {hid_addr, hid_size / Memory::PageSize}, Memory::MemoryPermission::None, - Memory::MemoryPermission::Read, hid_addr, hid_size, "HID:SharedMemory"); - font_shared_mem = Kernel::SharedMemory::Create( - system.Kernel(), system.DeviceMemory(), nullptr, - {font_pa, font_size / Memory::PageSize}, Memory::MemoryPermission::None, - Memory::MemoryPermission::Read, font_pa, font_size, "Font:SharedMemory"); - irs_shared_mem = Kernel::SharedMemory::Create( - system.Kernel(), system.DeviceMemory(), nullptr, - {irs_addr, irs_size / Memory::PageSize}, Memory::MemoryPermission::None, - Memory::MemoryPermission::Read, irs_addr, irs_size, "IRS:SharedMemory"); - time_shared_mem = Kernel::SharedMemory::Create( - system.Kernel(), system.DeviceMemory(), nullptr, - {time_addr, time_size / Memory::PageSize}, Memory::MemoryPermission::None, - Memory::MemoryPermission::Read, time_addr, time_size, "Time:SharedMemory"); + hid_shared_mem = Kernel::KSharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, {hid_addr, hid_size / PageSize}, + KMemoryPermission::None, KMemoryPermission::Read, hid_addr, hid_size, + "HID:SharedMemory"); + font_shared_mem = Kernel::KSharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, {font_pa, font_size / PageSize}, + KMemoryPermission::None, KMemoryPermission::Read, font_pa, font_size, + "Font:SharedMemory"); + irs_shared_mem = Kernel::KSharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, {irs_addr, irs_size / PageSize}, + KMemoryPermission::None, KMemoryPermission::Read, irs_addr, irs_size, + "IRS:SharedMemory"); + time_shared_mem = Kernel::KSharedMemory::Create( + system.Kernel(), system.DeviceMemory(), nullptr, {time_addr, time_size / PageSize}, + KMemoryPermission::None, KMemoryPermission::Read, time_addr, time_size, + "Time:SharedMemory"); // Allocate slab heaps - user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); + user_slab_heap_pages = std::make_unique<KSlabHeap<Page>>(); + constexpr u64 user_slab_heap_size{0x1ef000}; + // Reserve slab heaps + ASSERT( + system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size)); // Initialize slab heaps - constexpr u64 user_slab_heap_size{0x3de000}; user_slab_heap_pages->Initialize( system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), user_slab_heap_size); @@ -339,14 +353,14 @@ struct KernelCore::Impl { std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES}; // Kernel memory management - std::unique_ptr<Memory::MemoryManager> memory_manager; - std::unique_ptr<Memory::SlabHeap<Memory::Page>> user_slab_heap_pages; + std::unique_ptr<KMemoryManager> memory_manager; + std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages; // Shared memory for services - std::shared_ptr<Kernel::SharedMemory> hid_shared_mem; - std::shared_ptr<Kernel::SharedMemory> font_shared_mem; - std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; - std::shared_ptr<Kernel::SharedMemory> time_shared_mem; + std::shared_ptr<Kernel::KSharedMemory> hid_shared_mem; + std::shared_ptr<Kernel::KSharedMemory> font_shared_mem; + std::shared_ptr<Kernel::KSharedMemory> irs_shared_mem; + std::shared_ptr<Kernel::KSharedMemory> time_shared_mem; // Threads used for services std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; @@ -564,51 +578,51 @@ KThread* KernelCore::GetCurrentEmuThread() const { return impl->GetCurrentEmuThread(); } -Memory::MemoryManager& KernelCore::MemoryManager() { +KMemoryManager& KernelCore::MemoryManager() { return *impl->memory_manager; } -const Memory::MemoryManager& KernelCore::MemoryManager() const { +const KMemoryManager& KernelCore::MemoryManager() const { return *impl->memory_manager; } -Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() { +KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() { return *impl->user_slab_heap_pages; } -const Memory::SlabHeap<Memory::Page>& KernelCore::GetUserSlabHeapPages() const { +const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const { return *impl->user_slab_heap_pages; } -Kernel::SharedMemory& KernelCore::GetHidSharedMem() { +Kernel::KSharedMemory& KernelCore::GetHidSharedMem() { return *impl->hid_shared_mem; } -const Kernel::SharedMemory& KernelCore::GetHidSharedMem() const { +const Kernel::KSharedMemory& KernelCore::GetHidSharedMem() const { return *impl->hid_shared_mem; } -Kernel::SharedMemory& KernelCore::GetFontSharedMem() { +Kernel::KSharedMemory& KernelCore::GetFontSharedMem() { return *impl->font_shared_mem; } -const Kernel::SharedMemory& KernelCore::GetFontSharedMem() const { +const Kernel::KSharedMemory& KernelCore::GetFontSharedMem() const { return *impl->font_shared_mem; } -Kernel::SharedMemory& KernelCore::GetIrsSharedMem() { +Kernel::KSharedMemory& KernelCore::GetIrsSharedMem() { return *impl->irs_shared_mem; } -const Kernel::SharedMemory& KernelCore::GetIrsSharedMem() const { +const Kernel::KSharedMemory& KernelCore::GetIrsSharedMem() const { return *impl->irs_shared_mem; } -Kernel::SharedMemory& KernelCore::GetTimeSharedMem() { +Kernel::KSharedMemory& KernelCore::GetTimeSharedMem() { return *impl->time_shared_mem; } -const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const { +const Kernel::KSharedMemory& KernelCore::GetTimeSharedMem() const { return *impl->time_shared_mem; } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 806a0d9868..56906f2da2 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,7 +11,7 @@ #include <vector> #include "core/arm/cpu_interrupt_handler.h" #include "core/hardware_properties.h" -#include "core/hle/kernel/memory/memory_types.h" +#include "core/hle/kernel/memory_types.h" #include "core/hle/kernel/object.h" namespace Core { @@ -27,25 +27,23 @@ struct EventType; namespace Kernel { -namespace Memory { -class MemoryManager; -template <typename T> -class SlabHeap; -} // namespace Memory - class ClientPort; class GlobalSchedulerContext; class HandleTable; -class PhysicalCore; -class Process; +class KMemoryManager; class KResourceLimit; class KScheduler; -class SharedMemory; +class KSharedMemory; +class KThread; +class PhysicalCore; +class Process; class ServiceThread; class Synchronization; -class KThread; class TimeManager; +template <typename T> +class KSlabHeap; + using EmuThreadHandle = uintptr_t; constexpr EmuThreadHandle EmuThreadHandleInvalid{}; constexpr EmuThreadHandle EmuThreadHandleReserved{1ULL << 63}; @@ -178,40 +176,40 @@ public: void RegisterHostThread(); /// Gets the virtual memory manager for the kernel. - Memory::MemoryManager& MemoryManager(); + KMemoryManager& MemoryManager(); /// Gets the virtual memory manager for the kernel. - const Memory::MemoryManager& MemoryManager() const; + const KMemoryManager& MemoryManager() const; /// Gets the slab heap allocated for user space pages. - Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages(); + KSlabHeap<Page>& GetUserSlabHeapPages(); /// Gets the slab heap allocated for user space pages. - const Memory::SlabHeap<Memory::Page>& GetUserSlabHeapPages() const; + const KSlabHeap<Page>& GetUserSlabHeapPages() const; /// Gets the shared memory object for HID services. - Kernel::SharedMemory& GetHidSharedMem(); + Kernel::KSharedMemory& GetHidSharedMem(); /// Gets the shared memory object for HID services. - const Kernel::SharedMemory& GetHidSharedMem() const; + const Kernel::KSharedMemory& GetHidSharedMem() const; /// Gets the shared memory object for font services. - Kernel::SharedMemory& GetFontSharedMem(); + Kernel::KSharedMemory& GetFontSharedMem(); /// Gets the shared memory object for font services. - const Kernel::SharedMemory& GetFontSharedMem() const; + const Kernel::KSharedMemory& GetFontSharedMem() const; /// Gets the shared memory object for IRS services. - Kernel::SharedMemory& GetIrsSharedMem(); + Kernel::KSharedMemory& GetIrsSharedMem(); /// Gets the shared memory object for IRS services. - const Kernel::SharedMemory& GetIrsSharedMem() const; + const Kernel::KSharedMemory& GetIrsSharedMem() const; /// Gets the shared memory object for Time services. - Kernel::SharedMemory& GetTimeSharedMem(); + Kernel::KSharedMemory& GetTimeSharedMem(); /// Gets the shared memory object for Time services. - const Kernel::SharedMemory& GetTimeSharedMem() const; + const Kernel::KSharedMemory& GetTimeSharedMem() const; /// Suspend/unsuspend the OS. void Suspend(bool in_suspention); diff --git a/src/core/hle/kernel/memory/page_heap.h b/src/core/hle/kernel/memory/page_heap.h deleted file mode 100644 index 1310932841..0000000000 --- a/src/core/hle/kernel/memory/page_heap.h +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -// This file references various implementation details from Atmosphere, an open-source firmware for -// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. - -#pragma once - -#include <array> -#include <bit> -#include <vector> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "core/hle/kernel/memory/memory_types.h" - -namespace Kernel::Memory { - -class PageHeap final : NonCopyable { -public: - static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { - const auto target_pages{std::max(num_pages, align_pages)}; - for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { - if (target_pages <= - (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { - return static_cast<s32>(i); - } - } - return -1; - } - - static constexpr s32 GetBlockIndex(std::size_t num_pages) { - for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) { - if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { - return i; - } - } - return -1; - } - - static constexpr std::size_t GetBlockSize(std::size_t index) { - return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index]; - } - - static constexpr std::size_t GetBlockNumPages(std::size_t index) { - return GetBlockSize(index) / PageSize; - } - -private: - static constexpr std::size_t NumMemoryBlockPageShifts{7}; - static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ - 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, - }; - - class Block final : NonCopyable { - private: - class Bitmap final : NonCopyable { - public: - static constexpr std::size_t MaxDepth{4}; - - private: - std::array<u64*, MaxDepth> bit_storages{}; - std::size_t num_bits{}; - std::size_t used_depths{}; - - public: - constexpr Bitmap() = default; - - constexpr std::size_t GetNumBits() const { - return num_bits; - } - constexpr s32 GetHighestDepthIndex() const { - return static_cast<s32>(used_depths) - 1; - } - - constexpr u64* Initialize(u64* storage, std::size_t size) { - //* Initially, everything is un-set - num_bits = 0; - - // Calculate the needed bitmap depth - used_depths = static_cast<std::size_t>(GetRequiredDepth(size)); - ASSERT(used_depths <= MaxDepth); - - // Set the bitmap pointers - for (s32 depth{GetHighestDepthIndex()}; depth >= 0; depth--) { - bit_storages[depth] = storage; - size = Common::AlignUp(size, 64) / 64; - storage += size; - } - - return storage; - } - - s64 FindFreeBlock() const { - uintptr_t offset{}; - s32 depth{}; - - do { - const u64 v{bit_storages[depth][offset]}; - if (v == 0) { - // Non-zero depth indicates that a previous level had a free block - ASSERT(depth == 0); - return -1; - } - offset = offset * 64 + static_cast<u32>(std::countr_zero(v)); - ++depth; - } while (depth < static_cast<s32>(used_depths)); - - return static_cast<s64>(offset); - } - - constexpr void SetBit(std::size_t offset) { - SetBit(GetHighestDepthIndex(), offset); - num_bits++; - } - - constexpr void ClearBit(std::size_t offset) { - ClearBit(GetHighestDepthIndex(), offset); - num_bits--; - } - - constexpr bool ClearRange(std::size_t offset, std::size_t count) { - const s32 depth{GetHighestDepthIndex()}; - const auto bit_ind{offset / 64}; - u64* bits{bit_storages[depth]}; - if (count < 64) { - const auto shift{offset % 64}; - ASSERT(shift + count <= 64); - // Check that all the bits are set - const u64 mask{((1ULL << count) - 1) << shift}; - u64 v{bits[bit_ind]}; - if ((v & mask) != mask) { - return false; - } - - // Clear the bits - v &= ~mask; - bits[bit_ind] = v; - if (v == 0) { - ClearBit(depth - 1, bit_ind); - } - } else { - ASSERT(offset % 64 == 0); - ASSERT(count % 64 == 0); - // Check that all the bits are set - std::size_t remaining{count}; - std::size_t i = 0; - do { - if (bits[bit_ind + i++] != ~u64(0)) { - return false; - } - remaining -= 64; - } while (remaining > 0); - - // Clear the bits - remaining = count; - i = 0; - do { - bits[bit_ind + i] = 0; - ClearBit(depth - 1, bit_ind + i); - i++; - remaining -= 64; - } while (remaining > 0); - } - - num_bits -= count; - return true; - } - - private: - constexpr void SetBit(s32 depth, std::size_t offset) { - while (depth >= 0) { - const auto ind{offset / 64}; - const auto which{offset % 64}; - const u64 mask{1ULL << which}; - - u64* bit{std::addressof(bit_storages[depth][ind])}; - const u64 v{*bit}; - ASSERT((v & mask) == 0); - *bit = v | mask; - if (v) { - break; - } - offset = ind; - depth--; - } - } - - constexpr void ClearBit(s32 depth, std::size_t offset) { - while (depth >= 0) { - const auto ind{offset / 64}; - const auto which{offset % 64}; - const u64 mask{1ULL << which}; - - u64* bit{std::addressof(bit_storages[depth][ind])}; - u64 v{*bit}; - ASSERT((v & mask) != 0); - v &= ~mask; - *bit = v; - if (v) { - break; - } - offset = ind; - depth--; - } - } - - private: - static constexpr s32 GetRequiredDepth(std::size_t region_size) { - s32 depth = 0; - while (true) { - region_size /= 64; - depth++; - if (region_size == 0) { - return depth; - } - } - } - - public: - static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size) { - std::size_t overhead_bits = 0; - for (s32 depth{GetRequiredDepth(region_size) - 1}; depth >= 0; depth--) { - region_size = Common::AlignUp(region_size, 64) / 64; - overhead_bits += region_size; - } - return overhead_bits * sizeof(u64); - } - }; - - private: - Bitmap bitmap; - VAddr heap_address{}; - uintptr_t end_offset{}; - std::size_t block_shift{}; - std::size_t next_block_shift{}; - - public: - constexpr Block() = default; - - constexpr std::size_t GetShift() const { - return block_shift; - } - constexpr std::size_t GetNextShift() const { - return next_block_shift; - } - constexpr std::size_t GetSize() const { - return static_cast<std::size_t>(1) << GetShift(); - } - constexpr std::size_t GetNumPages() const { - return GetSize() / PageSize; - } - constexpr std::size_t GetNumFreeBlocks() const { - return bitmap.GetNumBits(); - } - constexpr std::size_t GetNumFreePages() const { - return GetNumFreeBlocks() * GetNumPages(); - } - - constexpr u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs, - u64* bit_storage) { - // Set shifts - block_shift = bs; - next_block_shift = nbs; - - // Align up the address - VAddr end{addr + size}; - const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift) - : (1ULL << block_shift)}; - addr = Common::AlignDown((addr), align); - end = Common::AlignUp((end), align); - - heap_address = addr; - end_offset = (end - addr) / (1ULL << block_shift); - return bitmap.Initialize(bit_storage, end_offset); - } - - constexpr VAddr PushBlock(VAddr address) { - // Set the bit for the free block - std::size_t offset{(address - heap_address) >> GetShift()}; - bitmap.SetBit(offset); - - // If we have a next shift, try to clear the blocks below and return the address - if (GetNextShift()) { - const auto diff{1ULL << (GetNextShift() - GetShift())}; - offset = Common::AlignDown(offset, diff); - if (bitmap.ClearRange(offset, diff)) { - return heap_address + (offset << GetShift()); - } - } - - // We couldn't coalesce, or we're already as big as possible - return 0; - } - - VAddr PopBlock() { - // Find a free block - const s64 soffset{bitmap.FindFreeBlock()}; - if (soffset < 0) { - return 0; - } - const auto offset{static_cast<std::size_t>(soffset)}; - - // Update our tracking and return it - bitmap.ClearBit(offset); - return heap_address + (offset << GetShift()); - } - - public: - static constexpr std::size_t CalculateMetadataOverheadSize(std::size_t region_size, - std::size_t cur_block_shift, - std::size_t next_block_shift) { - const auto cur_block_size{(1ULL << cur_block_shift)}; - const auto next_block_size{(1ULL << next_block_shift)}; - const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size}; - return Bitmap::CalculateMetadataOverheadSize( - (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); - } - }; - -public: - PageHeap() = default; - - constexpr VAddr GetAddress() const { - return heap_address; - } - constexpr std::size_t GetSize() const { - return heap_size; - } - constexpr VAddr GetEndAddress() const { - return GetAddress() + GetSize(); - } - constexpr std::size_t GetPageOffset(VAddr block) const { - return (block - GetAddress()) / PageSize; - } - - void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); - VAddr AllocateBlock(s32 index); - void Free(VAddr addr, std::size_t num_pages); - - void UpdateUsedSize() { - used_size = heap_size - (GetNumFreePages() * PageSize); - } - - static std::size_t CalculateMetadataOverheadSize(std::size_t region_size); - -private: - constexpr std::size_t GetNumFreePages() const { - std::size_t num_free{}; - - for (const auto& block : blocks) { - num_free += block.GetNumFreePages(); - } - - return num_free; - } - - void FreeBlock(VAddr block, s32 index); - - VAddr heap_address{}; - std::size_t heap_size{}; - std::size_t used_size{}; - std::array<Block, NumMemoryBlockPageShifts> blocks{}; - std::vector<u64> metadata; -}; - -} // namespace Kernel::Memory diff --git a/src/core/hle/kernel/memory/system_control.h b/src/core/hle/kernel/memory/system_control.h deleted file mode 100644 index 19cab8cbc4..0000000000 --- a/src/core/hle/kernel/memory/system_control.h +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Kernel::Memory::SystemControl { - -u64 GenerateRandomRange(u64 min, u64 max); - -} // namespace Kernel::Memory::SystemControl diff --git a/src/core/hle/kernel/memory/memory_types.h b/src/core/hle/kernel/memory_types.h index a75bf77c00..d458f0ecad 100644 --- a/src/core/hle/kernel/memory/memory_types.h +++ b/src/core/hle/kernel/memory_types.h @@ -8,11 +8,11 @@ #include "common/common_types.h" -namespace Kernel::Memory { +namespace Kernel { constexpr std::size_t PageBits{12}; constexpr std::size_t PageSize{1 << PageBits}; using Page = std::array<u8, PageSize>; -} // namespace Kernel::Memory +} // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 2286b292dc..73b85d6f95 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -14,14 +14,14 @@ #include "core/device_memory.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" -#include "core/hle/kernel/errors.h" +#include "core/hle/kernel/k_memory_block_manager.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" +#include "core/hle/kernel/k_slab_heap.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/memory_block_manager.h" -#include "core/hle/kernel/memory/page_table.h" -#include "core/hle/kernel/memory/slab_heap.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/svc_results.h" #include "core/hle/lock.h" @@ -39,6 +39,7 @@ namespace { */ void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); + ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1)); auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0, owner_process.GetIdealCoreId(), stack_top, &owner_process); @@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, std::shared_ptr<Process> process = std::make_shared<Process>(system); process->name = std::move(name); + + // TODO: This is inaccurate + // The process should hold a reference to the kernel-wide resource limit. process->resource_limit = std::make_shared<KResourceLimit>(kernel, system); process->status = ProcessStatus::Created; process->program_id = 0; @@ -155,6 +159,9 @@ void Process::DecrementThreadCount() { } u64 Process::GetTotalPhysicalMemoryAvailable() const { + // TODO: This is expected to always return the application memory pool size after accurately + // reserving kernel resources. The current workaround uses a process-local resource limit of + // application memory pool size, which is inaccurate. const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + main_thread_stack_size}; @@ -248,8 +255,8 @@ ResultCode Process::Reset() { KScopedSchedulerLock sl{kernel}; // Validate that we're in a state that we can reset. - R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState); - R_UNLESS(is_signaled, Svc::ResultInvalidState); + R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState); + R_UNLESS(is_signaled, ResultInvalidState); // Clear signaled. is_signaled = false; @@ -264,18 +271,29 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, system_resource_size = metadata.GetSystemResourceSize(); image_size = code_size; + // Set initial resource limits + resource_limit->SetLimitValue( + LimitableResource::PhysicalMemory, + kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application)); + KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory, + code_size + system_resource_size); + if (!memory_reservation.Succeeded()) { + LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes", + code_size + system_resource_size); + return ResultResourceLimitedExceeded; + } // Initialize proces address space if (const ResultCode result{ page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, - code_size, Memory::MemoryManager::Pool::Application)}; + code_size, KMemoryManager::Pool::Application)}; result.IsError()) { return result; } // Map process code region - if (const ResultCode result{page_table->MapProcessCode( - page_table->GetCodeRegionStart(), code_size / Memory::PageSize, - Memory::MemoryState::Code, Memory::MemoryPermission::None)}; + if (const ResultCode result{page_table->MapProcessCode(page_table->GetCodeRegionStart(), + code_size / PageSize, KMemoryState::Code, + KMemoryPermission::None)}; result.IsError()) { return result; } @@ -308,21 +326,24 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, // Set initial resource limits resource_limit->SetLimitValue( LimitableResource::PhysicalMemory, - kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application)); + kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application)); + resource_limit->SetLimitValue(LimitableResource::Threads, 608); resource_limit->SetLimitValue(LimitableResource::Events, 700); resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128); resource_limit->SetLimitValue(LimitableResource::Sessions, 894); - ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size)); // Create TLS region tls_region_address = CreateTLSRegion(); + memory_reservation.Commit(); return handle_table.SetSize(capabilities.GetHandleTableSize()); } void Process::Run(s32 main_thread_priority, u64 stack_size) { AllocateMainThreadStack(stack_size); + resource_limit->Reserve(LimitableResource::Threads, 1); + resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size); const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); @@ -330,8 +351,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) { ChangeStatus(ProcessStatus::Running); SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); - resource_limit->Reserve(LimitableResource::Threads, 1); - resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size); } void Process::PrepareForTermination() { @@ -358,6 +377,11 @@ void Process::PrepareForTermination() { FreeTLSRegion(tls_region_address); tls_region_address = 0; + if (resource_limit) { + resource_limit->Release(LimitableResource::PhysicalMemory, + main_thread_stack_size + image_size); + } + ChangeStatus(ProcessStatus::Exited); } @@ -381,22 +405,22 @@ VAddr Process::CreateTLSRegion() { return *tls_page_iter->ReserveSlot(); } - Memory::Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; + Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; ASSERT(tls_page_ptr); const VAddr start{page_table->GetKernelMapRegionStart()}; const VAddr size{page_table->GetKernelMapRegionEnd() - start}; const PAddr tls_map_addr{system.DeviceMemory().GetPhysicalAddr(tls_page_ptr)}; - const VAddr tls_page_addr{ - page_table - ->AllocateAndMapMemory(1, Memory::PageSize, true, start, size / Memory::PageSize, - Memory::MemoryState::ThreadLocal, - Memory::MemoryPermission::ReadAndWrite, tls_map_addr) - .ValueOr(0)}; + const VAddr tls_page_addr{page_table + ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize, + KMemoryState::ThreadLocal, + KMemoryPermission::ReadAndWrite, + tls_map_addr) + .ValueOr(0)}; ASSERT(tls_page_addr); - std::memset(tls_page_ptr, 0, Memory::PageSize); + std::memset(tls_page_ptr, 0, PageSize); tls_pages.emplace_back(tls_page_addr); const auto reserve_result{tls_pages.back().ReserveSlot()}; @@ -423,15 +447,15 @@ void Process::FreeTLSRegion(VAddr tls_address) { void Process::LoadModule(CodeSet code_set, VAddr base_addr) { std::lock_guard lock{HLE::g_hle_lock}; const auto ReprotectSegment = [&](const CodeSet::Segment& segment, - Memory::MemoryPermission permission) { + KMemoryPermission permission) { page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission); }; system.Memory().WriteBlock(*this, base_addr, code_set.memory.data(), code_set.memory.size()); - ReprotectSegment(code_set.CodeSegment(), Memory::MemoryPermission::ReadAndExecute); - ReprotectSegment(code_set.RODataSegment(), Memory::MemoryPermission::Read); - ReprotectSegment(code_set.DataSegment(), Memory::MemoryPermission::ReadAndWrite); + ReprotectSegment(code_set.CodeSegment(), KMemoryPermission::ReadAndExecute); + ReprotectSegment(code_set.RODataSegment(), KMemoryPermission::Read); + ReprotectSegment(code_set.DataSegment(), KMemoryPermission::ReadAndWrite); } bool Process::IsSignaled() const { @@ -440,9 +464,9 @@ bool Process::IsSignaled() const { } Process::Process(Core::System& system) - : KSynchronizationObject{system.Kernel()}, - page_table{std::make_unique<Memory::PageTable>(system)}, handle_table{system.Kernel()}, - address_arbiter{system}, condition_var{system}, state_lock{system.Kernel()}, system{system} {} + : KSynchronizationObject{system.Kernel()}, page_table{std::make_unique<KPageTable>(system)}, + handle_table{system.Kernel()}, address_arbiter{system}, condition_var{system}, + state_lock{system.Kernel()}, system{system} {} Process::~Process() = default; @@ -460,16 +484,15 @@ ResultCode Process::AllocateMainThreadStack(std::size_t stack_size) { ASSERT(stack_size); // The kernel always ensures that the given stack size is page aligned. - main_thread_stack_size = Common::AlignUp(stack_size, Memory::PageSize); + main_thread_stack_size = Common::AlignUp(stack_size, PageSize); const VAddr start{page_table->GetStackRegionStart()}; const std::size_t size{page_table->GetStackRegionEnd() - start}; CASCADE_RESULT(main_thread_stack_top, page_table->AllocateAndMapMemory( - main_thread_stack_size / Memory::PageSize, Memory::PageSize, false, start, - size / Memory::PageSize, Memory::MemoryState::Stack, - Memory::MemoryPermission::ReadAndWrite)); + main_thread_stack_size / PageSize, PageSize, false, start, size / PageSize, + KMemoryState::Stack, KMemoryPermission::ReadAndWrite)); main_thread_stack_top += main_thread_stack_size; diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 320b0f3474..45eefb90e2 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -29,16 +29,13 @@ class ProgramMetadata; namespace Kernel { class KernelCore; +class KPageTable; class KResourceLimit; class KThread; class TLSPage; struct CodeSet; -namespace Memory { -class PageTable; -} - enum class MemoryRegion : u16 { APPLICATION = 1, SYSTEM = 2, @@ -104,12 +101,12 @@ public: } /// Gets a reference to the process' page table. - Memory::PageTable& PageTable() { + KPageTable& PageTable() { return *page_table; } /// Gets const a reference to the process' page table. - const Memory::PageTable& PageTable() const { + const KPageTable& PageTable() const { return *page_table; } @@ -385,7 +382,7 @@ private: ResultCode AllocateMainThreadStack(std::size_t stack_size); /// Memory manager for this process - std::unique_ptr<Memory::PageTable> page_table; + std::unique_ptr<KPageTable> page_table; /// Current status of the process ProcessStatus status{}; diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 0566311b69..3fc326eabe 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp @@ -6,10 +6,10 @@ #include "common/bit_util.h" #include "common/logging/log.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process_capability.h" +#include "core/hle/kernel/svc_results.h" namespace Kernel { namespace { @@ -69,7 +69,7 @@ u32 GetFlagBitOffset(CapabilityType type) { ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table) { + KPageTable& page_table) { Clear(); // Allow all cores and priorities. @@ -82,7 +82,7 @@ ResultCode ProcessCapabilities::InitializeForKernelProcess(const u32* capabiliti ResultCode ProcessCapabilities::InitializeForUserProcess(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table) { + KPageTable& page_table) { Clear(); return ParseCapabilities(capabilities, num_capabilities, page_table); @@ -108,7 +108,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() { ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table) { + KPageTable& page_table) { u32 set_flags = 0; u32 set_svc_bits = 0; @@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, // If there's only one, then there's a problem. if (i >= num_capabilities) { LOG_ERROR(Kernel, "Invalid combination! i={}", i); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } const auto size_flags = capabilities[i]; if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) { LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); @@ -155,11 +155,11 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, } ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits, - u32 flag, Memory::PageTable& page_table) { + u32 flag, KPageTable& page_table) { const auto type = GetCapabilityType(flag); if (type == CapabilityType::Unset) { - return ERR_INVALID_CAPABILITY_DESCRIPTOR; + return ResultInvalidCapabilityDescriptor; } // Bail early on ignorable entries, as one would expect, @@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s LOG_ERROR(Kernel, "Attempted to initialize flags that may only be initialized once. set_flags={}", set_flags); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } set_flags |= set_flag; @@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s } LOG_ERROR(Kernel, "Invalid capability type! type={}", type); - return ERR_INVALID_CAPABILITY_DESCRIPTOR; + return ResultInvalidCapabilityDescriptor; } void ProcessCapabilities::Clear() { @@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { if (priority_mask != 0 || core_mask != 0) { LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}", priority_mask, core_mask); - return ERR_INVALID_CAPABILITY_DESCRIPTOR; + return ResultInvalidCapabilityDescriptor; } const u32 core_num_min = (flags >> 16) & 0xFF; @@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { if (core_num_min > core_num_max) { LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}", core_num_min, core_num_max); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } const u32 priority_min = (flags >> 10) & 0x3F; @@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { LOG_ERROR(Kernel, "Priority min is greater than priority max! priority_min={}, priority_max={}", core_num_min, priority_max); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } // The switch only has 4 usable cores. if (core_num_max >= 4) { LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max); - return ERR_INVALID_PROCESSOR_ID; + return ResultInvalidCoreId; } const auto make_mask = [](u64 min, u64 max) { @@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) // If we've already set this svc before, bail. if ((set_svc_bits & svc_bit) != 0) { - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } set_svc_bits |= svc_bit; @@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) if (svc_number >= svc_capabilities.size()) { LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } svc_capabilities[svc_number] = true; @@ -293,12 +293,12 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) } ResultCode ProcessCapabilities::HandleMapPhysicalFlags(u32 flags, u32 size_flags, - Memory::PageTable& page_table) { + KPageTable& page_table) { // TODO(Lioncache): Implement once the memory manager can handle this. return RESULT_SUCCESS; } -ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, Memory::PageTable& page_table) { +ResultCode ProcessCapabilities::HandleMapIOFlags(u32 flags, KPageTable& page_table) { // TODO(Lioncache): Implement once the memory manager can handle this. return RESULT_SUCCESS; } @@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) { if (interrupt >= interrupt_capabilities.size()) { LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}", interrupt); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } interrupt_capabilities[interrupt] = true; @@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) { const u32 reserved = flags >> 17; if (reserved != 0) { LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); - return ERR_RESERVED_VALUE; + return ResultReservedValue; } program_type = static_cast<ProgramType>((flags >> 14) & 0b111); @@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) { LOG_ERROR(Kernel, "Kernel version is non zero or flags are too small! major_version={}, flags={}", major_version, flags); - return ERR_INVALID_CAPABILITY_DESCRIPTOR; + return ResultInvalidCapabilityDescriptor; } kernel_version = flags; @@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) { const u32 reserved = flags >> 26; if (reserved != 0) { LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); - return ERR_RESERVED_VALUE; + return ResultReservedValue; } handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); @@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) { const u32 reserved = flags >> 19; if (reserved != 0) { LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); - return ERR_RESERVED_VALUE; + return ResultReservedValue; } is_debuggable = (flags & 0x20000) != 0; diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h index ea9d12c161..73ad197fa0 100644 --- a/src/core/hle/kernel/process_capability.h +++ b/src/core/hle/kernel/process_capability.h @@ -12,9 +12,7 @@ union ResultCode; namespace Kernel { -namespace Memory { -class PageTable; -} +class KPageTable; /// The possible types of programs that may be indicated /// by the program type capability descriptor. @@ -90,7 +88,7 @@ public: /// otherwise, an error code upon failure. /// ResultCode InitializeForKernelProcess(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table); + KPageTable& page_table); /// Initializes this process capabilities instance for a userland process. /// @@ -103,7 +101,7 @@ public: /// otherwise, an error code upon failure. /// ResultCode InitializeForUserProcess(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table); + KPageTable& page_table); /// Initializes this process capabilities instance for a process that does not /// have any metadata to parse. @@ -189,7 +187,7 @@ private: /// @return RESULT_SUCCESS if no errors occur, otherwise an error code. /// ResultCode ParseCapabilities(const u32* capabilities, std::size_t num_capabilities, - Memory::PageTable& page_table); + KPageTable& page_table); /// Attempts to parse a capability descriptor that is only represented by a /// single flag set. @@ -204,7 +202,7 @@ private: /// @return RESULT_SUCCESS if no errors occurred, otherwise an error code. /// ResultCode ParseSingleFlagCapability(u32& set_flags, u32& set_svc_bits, u32 flag, - Memory::PageTable& page_table); + KPageTable& page_table); /// Clears the internal state of this process capability instance. Necessary, /// to have a sane starting point due to us allowing running executables without @@ -228,10 +226,10 @@ private: ResultCode HandleSyscallFlags(u32& set_svc_bits, u32 flags); /// Handles flags related to mapping physical memory pages. - ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, Memory::PageTable& page_table); + ResultCode HandleMapPhysicalFlags(u32 flags, u32 size_flags, KPageTable& page_table); /// Handles flags related to mapping IO pages. - ResultCode HandleMapIOFlags(u32 flags, Memory::PageTable& page_table); + ResultCode HandleMapIOFlags(u32 flags, KPageTable& page_table); /// Handles flags related to the interrupt capability flags. ResultCode HandleInterruptFlags(u32 flags); diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index fe7a483c4e..5d17346ade 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp @@ -5,11 +5,11 @@ #include <tuple> #include "common/assert.h" #include "core/hle/kernel/client_port.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/server_port.h" #include "core/hle/kernel/server_session.h" +#include "core/hle/kernel/svc_results.h" namespace Kernel { @@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default; ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { if (pending_sessions.empty()) { - return ERR_NOT_FOUND; + return ResultNotFound; } auto session = std::move(pending_sessions.back()); diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp index 75304b9619..8830d4e916 100644 --- a/src/core/hle/kernel/session.cpp +++ b/src/core/hle/kernel/session.cpp @@ -4,15 +4,23 @@ #include "common/assert.h" #include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/server_session.h" #include "core/hle/kernel/session.h" namespace Kernel { Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} -Session::~Session() = default; +Session::~Session() { + // Release reserved resource when the Session pair was created. + kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1); +} Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { + // Reserve a new session from the resource limit. + KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(), + LimitableResource::Sessions); + ASSERT(session_reservation.Succeeded()); auto session{std::make_shared<Session>(kernel)}; auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()}; auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()}; @@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { session->client = client_session; session->server = server_session; + session_reservation.Commit(); return std::make_pair(std::move(client_session), std::move(server_session)); } diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp deleted file mode 100644 index 0cd4671106..0000000000 --- a/src/core/hle/kernel/shared_memory.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "core/core.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/page_table.h" -#include "core/hle/kernel/shared_memory.h" - -namespace Kernel { - -SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) - : Object{kernel}, device_memory{device_memory} {} - -SharedMemory::~SharedMemory() = default; - -std::shared_ptr<SharedMemory> SharedMemory::Create( - KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, - Memory::PageLinkedList&& page_list, Memory::MemoryPermission owner_permission, - Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, - std::string name) { - - std::shared_ptr<SharedMemory> shared_memory{ - std::make_shared<SharedMemory>(kernel, device_memory)}; - - shared_memory->owner_process = owner_process; - shared_memory->page_list = std::move(page_list); - shared_memory->owner_permission = owner_permission; - shared_memory->user_permission = user_permission; - shared_memory->physical_address = physical_address; - shared_memory->size = size; - shared_memory->name = name; - - return shared_memory; -} - -ResultCode SharedMemory::Map(Process& target_process, VAddr address, std::size_t size, - Memory::MemoryPermission permissions) { - const u64 page_count{(size + Memory::PageSize - 1) / Memory::PageSize}; - - if (page_list.GetNumPages() != page_count) { - UNIMPLEMENTED_MSG("Page count does not match"); - } - - const Memory::MemoryPermission expected = - &target_process == owner_process ? owner_permission : user_permission; - - if (permissions != expected) { - UNIMPLEMENTED_MSG("Permission does not match"); - } - - return target_process.PageTable().MapPages(address, page_list, Memory::MemoryState::Shared, - permissions); -} - -} // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 26650a5134..cc8fa6576f 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -23,25 +23,25 @@ #include "core/cpu_manager.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" -#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/k_address_arbiter.h" #include "core/hle/kernel/k_condition_variable.h" #include "core/hle/kernel/k_event.h" +#include "core/hle/kernel/k_memory_block.h" +#include "core/hle/kernel/k_memory_layout.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" +#include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_synchronization_object.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/memory_block.h" -#include "core/hle/kernel/memory/memory_layout.h" -#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/physical_core.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/svc.h" #include "core/hle/kernel/svc_results.h" #include "core/hle/kernel/svc_types.h" @@ -67,53 +67,53 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) { // Helper function that performs the common sanity checks for svcMapMemory // and svcUnmapMemory. This is doable, as both functions perform their sanitizing // in the same order. -ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr dst_addr, - VAddr src_addr, u64 size) { +ResultCode MapUnmapMemorySanityChecks(const KPageTable& manager, VAddr dst_addr, VAddr src_addr, + u64 size) { if (!Common::Is4KBAligned(dst_addr)) { LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(src_addr)) { LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (size == 0) { LOG_ERROR(Kernel_SVC, "Size is 0"); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!IsValidAddressRange(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}", dst_addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!IsValidAddressRange(src_addr, size)) { LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}", src_addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!manager.IsInsideAddressSpace(src_addr, size)) { LOG_ERROR(Kernel_SVC, "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", src_addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (manager.IsOutsideStackRegion(dst_addr, size)) { LOG_ERROR(Kernel_SVC, "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", dst_addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (manager.IsInsideHeapRegion(dst_addr, size)) { @@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds "Destination does not fit within the heap region, addr=0x{:016X}, " "size=0x{:016X}", dst_addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (manager.IsInsideAliasRegion(dst_addr, size)) { @@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds "Destination does not fit within the map region, addr=0x{:016X}, " "size=0x{:016X}", dst_addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } return RESULT_SUCCESS; @@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds enum class ResourceLimitValueType { CurrentValue, LimitValue, + PeakValue, }; ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, @@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_ const auto type = static_cast<LimitableResource>(resource_type); if (!IsValidResourceType(type)) { LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } const auto* const current_process = system.Kernel().CurrentProcess(); @@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_ if (!resource_limit_object) { LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", resource_limit); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } - if (value_type == ResourceLimitValueType::CurrentValue) { + switch (value_type) { + case ResourceLimitValueType::CurrentValue: return MakeResult(resource_limit_object->GetCurrentValue(type)); + case ResourceLimitValueType::LimitValue: + return MakeResult(resource_limit_object->GetLimitValue(type)); + case ResourceLimitValueType::PeakValue: + return MakeResult(resource_limit_object->GetPeakValue(type)); + default: + LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type); + return ResultInvalidEnumValue; } - - return MakeResult(resource_limit_object->GetLimitValue(type)); } } // Anonymous namespace @@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s if ((heap_size % 0x200000) != 0) { LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", heap_size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (heap_size >= 0x200000000) { LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; @@ -208,34 +215,34 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si if (!Common::Is4KBAligned(address)) { LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (size == 0 || !Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.", size); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!IsValidAddressRange(address, size)) { LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})", address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; - if (attributes != static_cast<Memory::MemoryAttribute>(mask) || - (attributes | Memory::MemoryAttribute::Uncached) != Memory::MemoryAttribute::Uncached) { + const auto attributes{static_cast<MemoryAttribute>(mask | attribute)}; + if (attributes != static_cast<MemoryAttribute>(mask) || + (attributes | MemoryAttribute::Uncached) != MemoryAttribute::Uncached) { LOG_ERROR(Kernel_SVC, "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", attribute, mask); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; - return page_table.SetMemoryAttribute(address, size, static_cast<Memory::MemoryAttribute>(mask), - static_cast<Memory::MemoryAttribute>(attribute)); + return page_table.SetMemoryAttribute(address, size, static_cast<KMemoryAttribute>(mask), + static_cast<KMemoryAttribute>(attribute)); } static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask, @@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, LOG_ERROR(Kernel_SVC, "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", port_name_address); - return ERR_NOT_FOUND; + return ResultNotFound; } static constexpr std::size_t PortNameMaxLength = 11; @@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, if (port_name.size() > PortNameMaxLength) { LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength, port_name.size()); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); @@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, const auto it = kernel.FindNamedPort(port_name); if (!kernel.IsValidNamedPort(it)) { LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); - return ERR_NOT_FOUND; + return ResultNotFound; } - ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1)); - auto client_port = it->second; std::shared_ptr<ClientSession> client_session; @@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); if (!session) { LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); @@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han const Process* const owner_process = thread->GetOwnerProcess(); if (!owner_process) { LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered."); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } *process_id = owner_process->GetProcessID(); @@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han // NOTE: This should also handle debug objects before returning. LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, @@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha LOG_ERROR(Kernel_SVC, "Handle address is not a valid virtual address, handle_address=0x{:016X}", handles_address); - return ERR_INVALID_POINTER; + return ResultInvalidPointer; } static constexpr u64 MaxHandles = 0x40; @@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha if (handle_count > MaxHandles) { LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}", MaxHandles, handle_count); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } auto& kernel = system.Kernel(); @@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha if (object == nullptr) { LOG_ERROR(Kernel_SVC, "Object is a nullptr"); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } objects[i] = object.get(); @@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand // Get the thread from its handle. const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle); + if (!thread) { LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle); return ResultInvalidHandle; @@ -502,7 +508,7 @@ static ResultCode ArbitrateLock(Core::System& system, Handle thread_handle, VAdd thread_handle, address, tag); // Validate the input address. - if (Memory::IsKernelAddress(address)) { + if (IsKernelAddress(address)) { LOG_ERROR(Kernel_SVC, "Attempting to arbitrate a lock on a kernel address (address={:08X})", address); return ResultInvalidCurrentMemory; @@ -525,7 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) { LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); // Validate the input address. - if (Memory::IsKernelAddress(address)) { + if (IsKernelAddress(address)) { LOG_ERROR(Kernel_SVC, "Attempting to arbitrate an unlock on a kernel address (address={:08X})", address); @@ -735,7 +741,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (info_sub_id != 0) { LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, info_sub_id); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } const auto& current_process_handle_table = @@ -744,7 +750,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (!process) { LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}", info_id, info_sub_id, handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } switch (info_id_type) { @@ -826,7 +832,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha } LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } case GetInfoType::IsCurrentProcessBeingDebugged: @@ -836,13 +842,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha case GetInfoType::RegisterResourceLimit: { if (handle != 0) { LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } if (info_sub_id != 0) { LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, info_sub_id); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } Process* const current_process = system.Kernel().CurrentProcess(); @@ -867,13 +873,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (handle != 0) { LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}", handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) { LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}", Process::RANDOM_ENTROPY_SIZE, info_sub_id); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); @@ -890,7 +896,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) { LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus, info_sub_id); - return ERR_INVALID_COMBINATION; + return ResultInvalidCombination; } const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>( @@ -898,7 +904,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha if (!thread) { LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", static_cast<Handle>(handle)); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } const auto& core_timing = system.CoreTiming(); @@ -922,7 +928,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha default: LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } } @@ -945,22 +951,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) if (!Common::Is4KBAligned(addr)) { LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (size == 0) { LOG_ERROR(Kernel_SVC, "Size is zero"); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!(addr < addr + size)) { LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } Process* const current_process{system.Kernel().CurrentProcess()}; @@ -968,21 +974,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) if (current_process->GetSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); - return ERR_INVALID_STATE; + return ResultInvalidState; } if (!page_table.IsInsideAddressSpace(addr, size)) { LOG_ERROR(Kernel_SVC, "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (page_table.IsOutsideAliasRegion(addr, size)) { LOG_ERROR(Kernel_SVC, "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } return page_table.MapPhysicalMemory(addr, size); @@ -999,22 +1005,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size if (!Common::Is4KBAligned(addr)) { LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (size == 0) { LOG_ERROR(Kernel_SVC, "Size is zero"); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!(addr < addr + size)) { LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } Process* const current_process{system.Kernel().CurrentProcess()}; @@ -1022,21 +1028,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size if (current_process->GetSystemResourceSize() == 0) { LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); - return ERR_INVALID_STATE; + return ResultInvalidState; } if (!page_table.IsInsideAddressSpace(addr, size)) { LOG_ERROR(Kernel_SVC, "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (page_table.IsOutsideAliasRegion(addr, size)) { LOG_ERROR(Kernel_SVC, "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } return page_table.UnmapPhysicalMemory(addr, size); @@ -1206,31 +1212,30 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han if (!Common::Is4KBAligned(addr)) { LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (size == 0) { LOG_ERROR(Kernel_SVC, "Size is 0"); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!IsValidAddressRange(addr, size)) { LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); - if ((permission_type | Memory::MemoryPermission::Write) != - Memory::MemoryPermission::ReadAndWrite) { + const auto permission_type = static_cast<MemoryPermission>(permissions); + if ((permission_type | MemoryPermission::Write) != MemoryPermission::ReadWrite) { LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", permissions); - return ERR_INVALID_MEMORY_PERMISSIONS; + return ResultInvalidMemoryPermissions; } auto* const current_process{system.Kernel().CurrentProcess()}; @@ -1241,7 +1246,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han "Addr does not fit within the valid region, addr=0x{:016X}, " "size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (page_table.IsInsideHeapRegion(addr, size)) { @@ -1249,7 +1254,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han "Addr does not fit within the heap region, addr=0x{:016X}, " "size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } if (page_table.IsInsideAliasRegion(addr, size)) { @@ -1257,17 +1262,18 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han "Address does not fit within the map region, addr=0x{:016X}, " "size=0x{:016X}", addr, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } - auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; + auto shared_memory{current_process->GetHandleTable().Get<KSharedMemory>(shared_memory_handle)}; if (!shared_memory) { LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", shared_memory_handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } - return shared_memory->Map(*current_process, addr, size, permission_type); + return shared_memory->Map(*current_process, addr, size, + static_cast<KMemoryPermission>(permission_type)); } static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr, @@ -1285,7 +1291,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add if (!process) { LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", process_handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } auto& memory{system.Memory()}; @@ -1332,18 +1338,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand if (!Common::Is4KBAligned(src_address)) { LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", src_address); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(dst_address)) { LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", dst_address); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (size == 0 || !Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!IsValidAddressRange(dst_address, size)) { @@ -1351,7 +1357,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand "Destination address range overflows the address space (dst_address=0x{:016X}, " "size=0x{:016X}).", dst_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!IsValidAddressRange(src_address, size)) { @@ -1359,7 +1365,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand "Source address range overflows the address space (src_address=0x{:016X}, " "size=0x{:016X}).", src_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); @@ -1367,7 +1373,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand if (!process) { LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", process_handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } auto& page_table = process->PageTable(); @@ -1376,7 +1382,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand "Source address range is not within the address space (src_address=0x{:016X}, " "size=0x{:016X}).", src_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!page_table.IsInsideASLRRegion(dst_address, size)) { @@ -1384,7 +1390,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " "size=0x{:016X}).", dst_address, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } return page_table.MapProcessCodeMemory(dst_address, src_address, size); @@ -1400,18 +1406,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha if (!Common::Is4KBAligned(dst_address)) { LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", dst_address); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(src_address)) { LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", src_address); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (size == 0 || Common::Is4KBAligned(size)) { LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); - return ERR_INVALID_SIZE; + return ResultInvalidSize; } if (!IsValidAddressRange(dst_address, size)) { @@ -1419,7 +1425,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha "Destination address range overflows the address space (dst_address=0x{:016X}, " "size=0x{:016X}).", dst_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!IsValidAddressRange(src_address, size)) { @@ -1427,7 +1433,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha "Source address range overflows the address space (src_address=0x{:016X}, " "size=0x{:016X}).", src_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); @@ -1435,7 +1441,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha if (!process) { LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", process_handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } auto& page_table = process->PageTable(); @@ -1444,7 +1450,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha "Source address range is not within the address space (src_address=0x{:016X}, " "size=0x{:016X}).", src_address, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } if (!page_table.IsInsideASLRRegion(dst_address, size)) { @@ -1452,7 +1458,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " "size=0x{:016X}).", dst_address, size); - return ERR_INVALID_MEMORY_RANGE; + return ResultInvalidMemoryRange; } return page_table.UnmapProcessCodeMemory(dst_address, src_address, size); @@ -1515,8 +1521,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e return ResultInvalidPriority; } - ASSERT(process.GetResourceLimit()->Reserve( - LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000)); + KScopedResourceReservation thread_reservation( + kernel.CurrentProcess(), LimitableResource::Threads, 1, + system.CoreTiming().GetGlobalTimeNs().count() + 100000000); + if (!thread_reservation.Succeeded()) { + LOG_ERROR(Kernel_SVC, "Could not reserve a new thread"); + return ResultResourceLimitedExceeded; + } std::shared_ptr<KThread> thread; { @@ -1536,6 +1547,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e // Set the thread name for debugging purposes. thread->SetName( fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); + thread_reservation.Commit(); return RESULT_SUCCESS; } @@ -1625,7 +1637,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr address, cv_key, tag, timeout_ns); // Validate input. - if (Memory::IsKernelAddress(address)) { + if (IsKernelAddress(address)) { LOG_ERROR(Kernel_SVC, "Attempted to wait on kernel address (address={:08X})", address); return ResultInvalidCurrentMemory; } @@ -1707,7 +1719,7 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, Svc::Arbit address, arb_type, value, timeout_ns); // Validate input. - if (Memory::IsKernelAddress(address)) { + if (IsKernelAddress(address)) { LOG_ERROR(Kernel_SVC, "Attempting to wait on kernel address (address={:08X})", address); return ResultInvalidCurrentMemory; } @@ -1752,7 +1764,7 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, Svc::Sign address, signal_type, value, count); // Validate input. - if (Memory::IsKernelAddress(address)) { + if (IsKernelAddress(address)) { LOG_ERROR(Kernel_SVC, "Attempting to signal to a kernel address (address={:08X})", address); return ResultInvalidCurrentMemory; } @@ -1844,7 +1856,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) { LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle); - return Svc::ResultInvalidHandle; + return ResultInvalidHandle; } static ResultCode ResetSignal32(Core::System& system, Handle handle) { @@ -1860,30 +1872,37 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd if (!Common::Is4KBAligned(addr)) { LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!Common::Is4KBAligned(size) || size == 0) { LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size); - return ERR_INVALID_ADDRESS; + return ResultInvalidAddress; } if (!IsValidAddressRange(addr, size)) { LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})", addr, size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } - const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; - if (perms > Memory::MemoryPermission::ReadAndWrite || - perms == Memory::MemoryPermission::Write) { + const auto perms{static_cast<MemoryPermission>(permissions)}; + if (perms > MemoryPermission::ReadWrite || perms == MemoryPermission::Write) { LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", permissions); - return ERR_INVALID_MEMORY_PERMISSIONS; + return ResultInvalidMemoryPermissions; } auto& kernel = system.Kernel(); - auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); + // Reserve a new transfer memory from the process resource limit. + KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(), + LimitableResource::TransferMemory); + if (!trmem_reservation.Succeeded()) { + LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory"); + return ResultResourceLimitedExceeded; + } + auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, + static_cast<KMemoryPermission>(perms)); if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { return reserve_result; @@ -1894,6 +1913,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd if (result.Failed()) { return result.Code(); } + trmem_reservation.Commit(); *handle = *result; return RESULT_SUCCESS; @@ -1989,7 +2009,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw); return set_result; } - return RESULT_SUCCESS; } @@ -2002,8 +2021,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle static ResultCode SignalEvent(Core::System& system, Handle event_handle) { LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle); + auto& kernel = system.Kernel(); // Get the current handle table. - const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); + const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable(); + + // Reserve a new event from the process resource limit. + KScopedResourceReservation event_reservation(kernel.CurrentProcess(), + LimitableResource::Events); + if (!event_reservation.Succeeded()) { + LOG_ERROR(Kernel, "Could not reserve a new event"); + return ResultResourceLimitedExceeded; + } // Get the writable event. auto writable_event = handle_table.Get<KWritableEvent>(event_handle); @@ -2012,6 +2040,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) { return ResultInvalidHandle; } + // Commit the successfuly reservation. + event_reservation.Commit(); + return writable_event->Signal(); } @@ -2043,7 +2074,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) { LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle); - return Svc::ResultInvalidHandle; + return ResultInvalidHandle; } static ResultCode ClearEvent32(Core::System& system, Handle event_handle) { @@ -2106,13 +2137,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_ if (!process) { LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", process_handle); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } const auto info_type = static_cast<InfoType>(type); if (info_type != InfoType::Status) { LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } *out = static_cast<u64>(process->GetStatus()); @@ -2174,7 +2205,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour const auto type = static_cast<LimitableResource>(resource_type); if (!IsValidResourceType(type)) { LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); - return ERR_INVALID_ENUM_VALUE; + return ResultInvalidEnumValue; } auto* const current_process = system.Kernel().CurrentProcess(); @@ -2185,16 +2216,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour if (!resource_limit_object) { LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", resource_limit); - return ERR_INVALID_HANDLE; + return ResultInvalidHandle; } const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); if (set_result.IsError()) { - LOG_ERROR( - Kernel_SVC, - "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", - resource_limit_object->GetLimitValue(type), resource_type, - resource_limit_object->GetCurrentValue(type)); + LOG_ERROR(Kernel_SVC, + "Attempted to lower resource limit ({}) for category '{}' below its current " + "value ({})", + resource_limit_object->GetLimitValue(type), resource_type, + resource_limit_object->GetCurrentValue(type)); return set_result; } @@ -2211,7 +2242,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}", out_process_ids_size); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } const auto& kernel = system.Kernel(); @@ -2221,7 +2252,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, out_process_ids, total_copy_size)) { LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", out_process_ids, out_process_ids + total_copy_size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } auto& memory = system.Memory(); @@ -2250,7 +2281,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd if ((out_thread_ids_size & 0xF0000000) != 0) { LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}", out_thread_ids_size); - return ERR_OUT_OF_RANGE; + return ResultOutOfRange; } const auto* const current_process = system.Kernel().CurrentProcess(); @@ -2260,7 +2291,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", out_thread_ids, out_thread_ids + total_copy_size); - return ERR_INVALID_ADDRESS_STATE; + return ResultInvalidCurrentMemory; } auto& memory = system.Memory(); diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h index 204cd989d8..a26d9f2c90 100644 --- a/src/core/hle/kernel/svc_results.h +++ b/src/core/hle/kernel/svc_results.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2018 yuzu emulator team // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -6,21 +6,36 @@ #include "core/hle/result.h" -namespace Kernel::Svc { +namespace Kernel { +// Confirmed Switch kernel error codes + +constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7}; +constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14}; constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57}; constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; +constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101}; constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103}; +constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104}; +constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105}; constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; +constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108}; +constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110}; constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112}; constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113}; constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; +constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115}; constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116}; constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; +constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119}; constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; +constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121}; constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122}; +constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123}; constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; +constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126}; +constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132}; -} // namespace Kernel::Svc +} // namespace Kernel diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 765f408c3c..cad063e4d5 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp @@ -2,8 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "core/hle/kernel/k_page_table.h" +#include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/transfer_memory.h" #include "core/hle/result.h" @@ -17,12 +18,13 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory) TransferMemory::~TransferMemory() { // Release memory region when transfer memory is destroyed Reset(); + owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1); } std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Core::Memory::Memory& memory, VAddr base_address, std::size_t size, - Memory::MemoryPermission permissions) { + KMemoryPermission permissions) { std::shared_ptr<TransferMemory> transfer_memory{ std::make_shared<TransferMemory>(kernel, memory)}; diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index 777799d12e..5219514242 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h @@ -6,7 +6,7 @@ #include <memory> -#include "core/hle/kernel/memory/memory_block.h" +#include "core/hle/kernel/k_memory_block.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/physical_memory.h" @@ -36,7 +36,7 @@ public: static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Core::Memory::Memory& memory, VAddr base_address, std::size_t size, - Memory::MemoryPermission permissions); + KMemoryPermission permissions); TransferMemory(const TransferMemory&) = delete; TransferMemory& operator=(const TransferMemory&) = delete; @@ -82,7 +82,7 @@ private: std::size_t size{}; /// The memory permissions that are applied to this instance. - Memory::MemoryPermission owner_permissions{}; + KMemoryPermission owner_permissions{}; /// The process that this transfer memory instance was created under. Process* owner_process{}; diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 3ec0e1ecab..615e20a543 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -508,7 +508,7 @@ public: {1, &IManagerForApplication::GetAccountId, "GetAccountId"}, {2, nullptr, "EnsureIdTokenCacheAsync"}, {3, nullptr, "LoadIdTokenCache"}, - {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"}, + {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"}, {150, nullptr, "CreateAuthorizationRequest"}, {160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"}, {170, nullptr, "LoadNetworkServiceLicenseKindAsync"}, @@ -534,6 +534,22 @@ private: rb.PushRaw<u64>(user_id.GetNintendoID()); } + void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_ACC, "(STUBBED) called"); + + std::vector<u8> nas_user_base_for_application(0x68); + ctx.WriteBuffer(nas_user_base_for_application, 0); + + if (ctx.CanWriteBuffer(1)) { + std::vector<u8> unknown_out_buffer(ctx.GetWriteBufferSize(1)); + ctx.WriteBuffer(unknown_out_buffer, 1); + } + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.PushRaw<u64>(user_id.GetNintendoID()); + } + void StoreOpenContext(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_ACC, "(STUBBED) called"); IPC::ResponseBuilder rb{ctx, 2}; diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index bb77c2569f..8e1fe94387 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) { const u64 offset{rp.Pop<u64>()}; const std::vector<u8> data{ctx.ReadBuffer()}; + const std::size_t size{std::min(data.size(), backing.GetSize() - offset)}; - LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size()); + LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size); - if (data.size() > backing.GetSize() - offset) { + if (offset > backing.GetSize()) { LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}", - backing.GetSize(), data.size(), offset); + backing.GetSize(), size, offset); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERR_SIZE_OUT_OF_BOUNDS); return; } - std::memcpy(backing.GetData().data() + offset, data.data(), data.size()); + std::memcpy(backing.GetData().data() + offset, data.data(), size); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); @@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 offset{rp.Pop<u64>()}; - const std::size_t size{ctx.GetWriteBufferSize()}; + const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)}; LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size); - if (size > backing.GetSize() - offset) { + if (offset > backing.GetSize()) { LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}", backing.GetSize(), size, offset); diff --git a/src/core/hle/service/am/applets/controller.cpp b/src/core/hle/service/am/applets/controller.cpp index d7d3ee99ad..c2bfe698f5 100644 --- a/src/core/hle/service/am/applets/controller.cpp +++ b/src/core/hle/service/am/applets/controller.cpp @@ -211,7 +211,8 @@ void Controller::Execute() { case ControllerSupportMode::ShowControllerFirmwareUpdate: UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented", controller_private_arg.mode); - [[fallthrough]]; + ConfigurationComplete(); + break; default: { ConfigurationComplete(); break; diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index 3022438b1d..79b209c6b9 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -121,6 +121,10 @@ void SoftwareKeyboard::ExecuteInteractive() { std::memcpy(&request, data.data(), sizeof(Request)); switch (request) { + case Request::Finalize: + complete = true; + broker.SignalStateChanged(); + break; case Request::Calc: { broker.PushNormalDataFromApplet(std::make_shared<IStorage>(system, std::vector<u8>{1})); broker.SignalStateChanged(); diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index dbf1983454..70b9f38248 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -21,6 +21,7 @@ namespace Service::HID { constexpr s32 HID_JOYSTICK_MAX = 0x7fff; +constexpr s32 HID_TRIGGER_MAX = 0x7fff; [[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff; constexpr std::size_t NPAD_OFFSET = 0x9A00; constexpr u32 BATTERY_FULL = 2; @@ -48,6 +49,8 @@ Controller_NPad::NPadControllerType Controller_NPad::MapSettingsTypeToNPad( return NPadControllerType::JoyRight; case Settings::ControllerType::Handheld: return NPadControllerType::Handheld; + case Settings::ControllerType::GameCube: + return NPadControllerType::GameCube; default: UNREACHABLE(); return NPadControllerType::ProController; @@ -67,6 +70,8 @@ Settings::ControllerType Controller_NPad::MapNPadToSettingsType( return Settings::ControllerType::RightJoycon; case NPadControllerType::Handheld: return Settings::ControllerType::Handheld; + case NPadControllerType::GameCube: + return Settings::ControllerType::GameCube; default: UNREACHABLE(); return Settings::ControllerType::ProController; @@ -209,6 +214,13 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) { controller.assignment_mode = NpadAssignments::Single; controller.footer_type = AppletFooterUiType::JoyRightHorizontal; break; + case NPadControllerType::GameCube: + controller.style_set.gamecube.Assign(1); + // The GC Controller behaves like a wired Pro Controller + controller.device_type.fullkey.Assign(1); + controller.system_properties.is_vertical.Assign(1); + controller.system_properties.use_plus.Assign(1); + break; case NPadControllerType::Pokeball: controller.style_set.palma.Assign(1); controller.device_type.palma.Assign(1); @@ -259,6 +271,7 @@ void Controller_NPad::OnInit() { style.joycon_right.Assign(1); style.joycon_dual.Assign(1); style.fullkey.Assign(1); + style.gamecube.Assign(1); style.palma.Assign(1); } @@ -339,6 +352,7 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { auto& pad_state = npad_pad_states[controller_idx].pad_states; auto& lstick_entry = npad_pad_states[controller_idx].l_stick; auto& rstick_entry = npad_pad_states[controller_idx].r_stick; + auto& trigger_entry = npad_trigger_states[controller_idx]; const auto& button_state = buttons[controller_idx]; const auto& analog_state = sticks[controller_idx]; const auto [stick_l_x_f, stick_l_y_f] = @@ -404,6 +418,17 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) { pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus()); pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus()); } + + if (controller_type == NPadControllerType::GameCube) { + trigger_entry.l_analog = static_cast<s32>( + button_state[ZL - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0); + trigger_entry.r_analog = static_cast<s32>( + button_state[ZR - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0); + pad_state.zl.Assign(false); + pad_state.zr.Assign(button_state[R - BUTTON_HID_BEGIN]->GetStatus()); + pad_state.l.Assign(button_state[ZL - BUTTON_HID_BEGIN]->GetStatus()); + pad_state.r.Assign(button_state[ZR - BUTTON_HID_BEGIN]->GetStatus()); + } } void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, @@ -418,6 +443,11 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* &npad.joy_left_states, &npad.joy_right_states, &npad.palma_states, &npad.system_ext_states}; + // There is the posibility to have more controllers with analog triggers + const std::array<TriggerGeneric*, 1> controller_triggers{ + &npad.gc_trigger_states, + }; + for (auto* main_controller : controller_npads) { main_controller->common.entry_count = 16; main_controller->common.total_entry_count = 17; @@ -435,6 +465,21 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* cur_entry.timestamp2 = cur_entry.timestamp; } + for (auto* analog_trigger : controller_triggers) { + analog_trigger->entry_count = 16; + analog_trigger->total_entry_count = 17; + + const auto& last_entry = analog_trigger->trigger[analog_trigger->last_entry_index]; + + analog_trigger->timestamp = core_timing.GetCPUTicks(); + analog_trigger->last_entry_index = (analog_trigger->last_entry_index + 1) % 17; + + auto& cur_entry = analog_trigger->trigger[analog_trigger->last_entry_index]; + + cur_entry.timestamp = last_entry.timestamp + 1; + cur_entry.timestamp2 = cur_entry.timestamp; + } + const auto& controller_type = connected_controllers[i].type; if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) { @@ -444,6 +489,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* RequestPadStateUpdate(npad_index); auto& pad_state = npad_pad_states[npad_index]; + auto& trigger_state = npad_trigger_states[npad_index]; auto& main_controller = npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index]; @@ -456,6 +502,8 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index]; auto& libnx_entry = npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index]; + auto& trigger_entry = + npad.gc_trigger_states.trigger[npad.gc_trigger_states.last_entry_index]; libnx_entry.connection_status.raw = 0; libnx_entry.connection_status.is_connected.Assign(1); @@ -524,6 +572,18 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* libnx_entry.connection_status.is_right_connected.Assign(1); break; + case NPadControllerType::GameCube: + main_controller.connection_status.raw = 0; + main_controller.connection_status.is_connected.Assign(1); + main_controller.connection_status.is_wired.Assign(1); + main_controller.pad.pad_states.raw = pad_state.pad_states.raw; + main_controller.pad.l_stick = pad_state.l_stick; + main_controller.pad.r_stick = pad_state.r_stick; + trigger_entry.l_analog = trigger_state.l_analog; + trigger_entry.r_analog = trigger_state.r_analog; + + libnx_entry.connection_status.is_wired.Assign(1); + break; case NPadControllerType::Pokeball: pokeball_entry.connection_status.raw = 0; pokeball_entry.connection_status.is_connected.Assign(1); @@ -674,6 +734,7 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing right_sixaxis_entry.orientation = motion_devices[1].orientation; } break; + case NPadControllerType::GameCube: case NPadControllerType::Pokeball: break; } @@ -1135,6 +1196,8 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const return style.joycon_left; case NPadControllerType::JoyRight: return style.joycon_right; + case NPadControllerType::GameCube: + return style.gamecube; case NPadControllerType::Pokeball: return style.palma; default: diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 48bab988c6..bc2e6779d4 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -51,6 +51,7 @@ public: JoyDual, JoyLeft, JoyRight, + GameCube, Pokeball, }; @@ -60,6 +61,7 @@ public: JoyconDual = 5, JoyconLeft = 6, JoyconRight = 7, + GameCube = 8, Pokeball = 9, MaxNpadType = 10, }; @@ -389,6 +391,25 @@ private: }; static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size"); + struct TriggerState { + s64_le timestamp{}; + s64_le timestamp2{}; + s32_le l_analog{}; + s32_le r_analog{}; + }; + static_assert(sizeof(TriggerState) == 0x18, "TriggerState is an invalid size"); + + struct TriggerGeneric { + INSERT_PADDING_BYTES(0x4); + s64_le timestamp; + INSERT_PADDING_BYTES(0x4); + s64_le total_entry_count; + s64_le last_entry_index; + s64_le entry_count; + std::array<TriggerState, 17> trigger{}; + }; + static_assert(sizeof(TriggerGeneric) == 0x1C8, "TriggerGeneric is an invalid size"); + struct NPadSystemProperties { union { s64_le raw{}; @@ -509,7 +530,9 @@ private: AppletFooterUiType footer_type; // nfc_states needs to be checked switchbrew does not match with HW NfcXcdHandle nfc_states; - INSERT_PADDING_BYTES(0xdef); + INSERT_PADDING_BYTES(0x8); // Mutex + TriggerGeneric gc_trigger_states; + INSERT_PADDING_BYTES(0xc1f); }; static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size"); @@ -560,6 +583,7 @@ private: f32 sixaxis_fusion_parameter2{}; bool sixaxis_at_rest{true}; std::array<ControllerPad, 10> npad_pad_states{}; + std::array<TriggerState, 10> npad_trigger_states{}; bool is_in_lr_assignment_mode{false}; Core::System& system; }; diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 51a010a554..ba27bbb05e 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -15,9 +15,9 @@ #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" #include "core/hle/kernel/k_readable_event.h" +#include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_writable_event.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/service/hid/errors.h" #include "core/hle/service/hid/hid.h" #include "core/hle/service/hid/irs.h" @@ -110,6 +110,7 @@ void IAppletResource::DeactivateController(HidController controller) { IAppletResource ::~IAppletResource() { system.CoreTiming().UnscheduleEvent(pad_update_event, 0); + system.CoreTiming().UnscheduleEvent(motion_update_event, 0); } void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { @@ -272,8 +273,8 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} { {204, &Hid::PermitVibration, "PermitVibration"}, {205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"}, {206, &Hid::SendVibrationValues, "SendVibrationValues"}, - {207, nullptr, "SendVibrationGcErmCommand"}, - {208, nullptr, "GetActualVibrationGcErmCommand"}, + {207, &Hid::SendVibrationGcErmCommand, "SendVibrationGcErmCommand"}, + {208, &Hid::GetActualVibrationGcErmCommand, "GetActualVibrationGcErmCommand"}, {209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"}, {210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"}, {211, &Hid::IsVibrationDeviceMounted, "IsVibrationDeviceMounted"}, @@ -1092,7 +1093,22 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) { VibrationDeviceInfo vibration_device_info; - vibration_device_info.type = VibrationDeviceType::LinearResonantActuator; + switch (vibration_device_handle.npad_type) { + case Controller_NPad::NpadType::ProController: + case Controller_NPad::NpadType::Handheld: + case Controller_NPad::NpadType::JoyconDual: + case Controller_NPad::NpadType::JoyconLeft: + case Controller_NPad::NpadType::JoyconRight: + default: + vibration_device_info.type = VibrationDeviceType::LinearResonantActuator; + break; + case Controller_NPad::NpadType::GameCube: + vibration_device_info.type = VibrationDeviceType::GcErm; + break; + case Controller_NPad::NpadType::Pokeball: + vibration_device_info.type = VibrationDeviceType::Unknown; + break; + } switch (vibration_device_handle.device_index) { case Controller_NPad::DeviceIndex::Left: @@ -1214,6 +1230,108 @@ void Hid::SendVibrationValues(Kernel::HLERequestContext& ctx) { rb.Push(RESULT_SUCCESS); } +void Hid::SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + Controller_NPad::DeviceHandle vibration_device_handle; + u64 applet_resource_user_id; + VibrationGcErmCommand gc_erm_command; + }; + static_assert(sizeof(Parameters) == 0x18, "Parameters has incorrect size."); + + const auto parameters{rp.PopRaw<Parameters>()}; + + /** + * Note: This uses yuzu-specific behavior such that the StopHard command produces + * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined below, + * in order to differentiate between Stop and StopHard commands. + * This is done to reuse the controller vibration functions made for regular controllers. + */ + const auto vibration_value = [parameters] { + switch (parameters.gc_erm_command) { + case VibrationGcErmCommand::Stop: + return Controller_NPad::VibrationValue{ + .amp_low = 0.0f, + .freq_low = 160.0f, + .amp_high = 0.0f, + .freq_high = 320.0f, + }; + case VibrationGcErmCommand::Start: + return Controller_NPad::VibrationValue{ + .amp_low = 1.0f, + .freq_low = 160.0f, + .amp_high = 1.0f, + .freq_high = 320.0f, + }; + case VibrationGcErmCommand::StopHard: + return Controller_NPad::VibrationValue{ + .amp_low = 0.0f, + .freq_low = 0.0f, + .amp_high = 0.0f, + .freq_high = 0.0f, + }; + default: + return Controller_NPad::DEFAULT_VIBRATION_VALUE; + } + }(); + + applet_resource->GetController<Controller_NPad>(HidController::NPad) + .VibrateController(parameters.vibration_device_handle, vibration_value); + + LOG_DEBUG(Service_HID, + "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}, " + "gc_erm_command={}", + parameters.vibration_device_handle.npad_type, + parameters.vibration_device_handle.npad_id, + parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id, + parameters.gc_erm_command); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); +} + +void Hid::GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + struct Parameters { + Controller_NPad::DeviceHandle vibration_device_handle; + INSERT_PADDING_WORDS_NOINIT(1); + u64 applet_resource_user_id; + }; + + const auto parameters{rp.PopRaw<Parameters>()}; + + const auto last_vibration = applet_resource->GetController<Controller_NPad>(HidController::NPad) + .GetLastVibration(parameters.vibration_device_handle); + + const auto gc_erm_command = [last_vibration] { + if (last_vibration.amp_low != 0.0f || last_vibration.amp_high != 0.0f) { + return VibrationGcErmCommand::Start; + } + + /** + * Note: This uses yuzu-specific behavior such that the StopHard command produces + * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined in the HID function + * SendVibrationGcErmCommand, in order to differentiate between Stop and StopHard commands. + * This is done to reuse the controller vibration functions made for regular controllers. + */ + if (last_vibration.freq_low == 0.0f && last_vibration.freq_high == 0.0f) { + return VibrationGcErmCommand::StopHard; + } + + return VibrationGcErmCommand::Stop; + }(); + + LOG_DEBUG(Service_HID, + "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}", + parameters.vibration_device_handle.npad_type, + parameters.vibration_device_handle.npad_id, + parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id); + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.PushEnum(gc_erm_command); +} + void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto applet_resource_user_id{rp.Pop<u64>()}; diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 7cc0433e24..36ed228c8b 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -14,7 +14,7 @@ struct EventType; } namespace Kernel { -class SharedMemory; +class KSharedMemory; } namespace Service::SM { @@ -69,7 +69,7 @@ private: void UpdateControllers(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); void UpdateMotion(std::uintptr_t user_data, std::chrono::nanoseconds ns_late); - std::shared_ptr<Kernel::SharedMemory> shared_mem; + std::shared_ptr<Kernel::KSharedMemory> shared_mem; std::shared_ptr<Core::Timing::EventType> pad_update_event; std::shared_ptr<Core::Timing::EventType> motion_update_event; @@ -136,6 +136,8 @@ private: void PermitVibration(Kernel::HLERequestContext& ctx); void IsVibrationPermitted(Kernel::HLERequestContext& ctx); void SendVibrationValues(Kernel::HLERequestContext& ctx); + void SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx); + void GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx); void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx); void EndPermitVibrationSession(Kernel::HLERequestContext& ctx); void IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx); @@ -154,7 +156,9 @@ private: void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx); enum class VibrationDeviceType : u32 { + Unknown = 0, LinearResonantActuator = 1, + GcErm = 2, }; enum class VibrationDevicePosition : u32 { @@ -163,6 +167,12 @@ private: Right = 2, }; + enum class VibrationGcErmCommand : u64 { + Stop = 0, + Start = 1, + StopHard = 2, + }; + struct VibrationDeviceInfo { VibrationDeviceType type{}; VibrationDevicePosition position{}; diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index c8413099f1..2dfa936fb9 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -6,8 +6,8 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/service/hid/irs.h" namespace Service::HID { diff --git a/src/core/hle/service/hid/irs.h b/src/core/hle/service/hid/irs.h index be0c486ba0..b0c8c71686 100644 --- a/src/core/hle/service/hid/irs.h +++ b/src/core/hle/service/hid/irs.h @@ -12,7 +12,7 @@ class System; } namespace Kernel { -class SharedMemory; +class KSharedMemory; } namespace Service::HID { @@ -42,7 +42,7 @@ private: void StopImageProcessorAsync(Kernel::HLERequestContext& ctx); void ActivateIrsensorWithFunctionLevel(Kernel::HLERequestContext& ctx); - std::shared_ptr<Kernel::SharedMemory> shared_mem; + std::shared_ptr<Kernel::KSharedMemory> shared_mem; const u32 device_handle{0xABCD}; }; diff --git a/src/core/hle/service/ldn/errors.h b/src/core/hle/service/ldn/errors.h new file mode 100644 index 0000000000..a718c5c66b --- /dev/null +++ b/src/core/hle/service/ldn/errors.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hle/result.h" + +namespace Service::LDN { + +constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22}; + +} // namespace Service::LDN diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index ee908f3990..c630d93cd0 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -6,6 +6,7 @@ #include "core/hle/ipc_helpers.h" #include "core/hle/result.h" +#include "core/hle/service/ldn/errors.h" #include "core/hle/service/ldn/ldn.h" #include "core/hle/service/sm/sm.h" @@ -103,7 +104,7 @@ public: : ServiceFramework{system_, "IUserLocalCommunicationService"} { // clang-format off static const FunctionInfo functions[] = { - {0, nullptr, "GetState"}, + {0, &IUserLocalCommunicationService::GetState, "GetState"}, {1, nullptr, "GetNetworkInfo"}, {2, nullptr, "GetIpv4Address"}, {3, nullptr, "GetDisconnectReason"}, @@ -138,13 +139,38 @@ public: RegisterHandlers(functions); } - void Initialize2(Kernel::HLERequestContext& ctx) { + void GetState(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_LDN, "(STUBBED) called"); - // Result success seem make this services start network and continue. - // If we just pass result error then it will stop and maybe try again and again. + + IPC::ResponseBuilder rb{ctx, 3}; + + // Indicate a network error, as we do not actually emulate LDN + rb.Push(static_cast<u32>(State::Error)); + + rb.Push(RESULT_SUCCESS); + } + + void Initialize2(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_LDN, "called"); + + is_initialized = true; + IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(RESULT_UNKNOWN); + rb.Push(RESULT_SUCCESS); } + +private: + enum class State { + None, + Initialized, + AccessPointOpened, + AccessPointCreated, + StationOpened, + StationConnected, + Error, + }; + + bool is_initialized{}; }; class LDNS final : public ServiceFramework<LDNS> { diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 9da786b4ef..d111c13578 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -11,10 +11,10 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/hle/ipc_helpers.h" -#include "core/hle/kernel/errors.h" -#include "core/hle/kernel/memory/page_table.h" -#include "core/hle/kernel/memory/system_control.h" +#include "core/hle/kernel/k_page_table.h" +#include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/svc_results.h" #include "core/hle/service/ldr/ldr.h" #include "core/hle/service/service.h" #include "core/loader/nro.h" @@ -287,12 +287,11 @@ public: rb.Push(RESULT_SUCCESS); } - bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start, - std::size_t size) const { - constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize}; + bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const { + constexpr std::size_t padding_size{4 * Kernel::PageSize}; const auto start_info{page_table.QueryInfo(start - 1)}; - if (start_info.state != Kernel::Memory::MemoryState::Free) { + if (start_info.state != Kernel::KMemoryState::Free) { return {}; } @@ -302,21 +301,20 @@ public: const auto end_info{page_table.QueryInfo(start + size)}; - if (end_info.state != Kernel::Memory::MemoryState::Free) { + if (end_info.state != Kernel::KMemoryState::Free) { return {}; } return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); } - VAddr GetRandomMapRegion(const Kernel::Memory::PageTable& page_table, std::size_t size) const { + VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const { VAddr addr{}; const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >> - Kernel::Memory::PageBits}; + Kernel::PageBits}; do { addr = page_table.GetAliasCodeRegionStart() + - (Kernel::Memory::SystemControl::GenerateRandomRange(0, end_pages) - << Kernel::Memory::PageBits); + (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits); } while (!page_table.IsInsideAddressSpace(addr, size) || page_table.IsInsideHeapRegion(addr, size) || page_table.IsInsideAliasRegion(addr, size)); @@ -330,7 +328,7 @@ public: const VAddr addr{GetRandomMapRegion(page_table, size)}; const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; - if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { + if (result == Kernel::ResultInvalidCurrentMemory) { continue; } @@ -361,7 +359,7 @@ public: const ResultCode result{ page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; - if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { + if (result == Kernel::ResultInvalidCurrentMemory) { continue; } @@ -387,7 +385,7 @@ public: const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset}; const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size}; const VAddr bss_end_addr{ - Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)}; + Common::AlignUp(bss_start + nro_header.bss_size, Kernel::PageSize)}; auto CopyCode{[&](VAddr src_addr, VAddr dst_addr, u64 size) { std::vector<u8> source_data(size); @@ -402,12 +400,12 @@ public: nro_header.segment_headers[DATA_INDEX].memory_size); CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( - text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute)); - CASCADE_CODE(process->PageTable().SetCodeMemoryPermission( - ro_start, data_start - ro_start, Kernel::Memory::MemoryPermission::Read)); + text_start, ro_start - text_start, Kernel::KMemoryPermission::ReadAndExecute)); + CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(ro_start, data_start - ro_start, + Kernel::KMemoryPermission::Read)); return process->PageTable().SetCodeMemoryPermission( - data_start, bss_end_addr - data_start, Kernel::Memory::MemoryPermission::ReadAndWrite); + data_start, bss_end_addr - data_start, Kernel::KMemoryPermission::ReadAndWrite); } void LoadNro(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp index 5d6d256965..2d1d4d67fd 100644 --- a/src/core/hle/service/nfp/nfp.cpp +++ b/src/core/hle/service/nfp/nfp.cpp @@ -215,7 +215,7 @@ private: const auto& amiibo = nfp_interface.GetAmiiboBuffer(); const TagInfo tag_info{ .uuid = amiibo.uuid, - .uuid_length = static_cast<u8>(tag_info.uuid.size()), + .uuid_length = static_cast<u8>(amiibo.uuid.size()), .padding_1 = {}, .protocol = 1, // TODO(ogniK): Figure out actual values .tag_type = 2, diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index b6ac0a81aa..fcd15d81fa 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp @@ -19,9 +19,9 @@ #include "core/file_sys/romfs.h" #include "core/file_sys/system_archive/system_archive.h" #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/physical_memory.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/ns/pl_u.h" @@ -131,7 +131,7 @@ struct PL_U::Impl { } /// Handle to shared memory region designated for a shared font - std::shared_ptr<Kernel::SharedMemory> shared_font_mem; + std::shared_ptr<Kernel::KSharedMemory> shared_font_mem; /// Backing memory for the shared font data std::shared_ptr<Kernel::PhysicalMemory> shared_font; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 36970f828c..ecba1dba18 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -34,8 +34,7 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, case 0xa: { if (command.length == 0x1c) { LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); - Tegra::ChCommandHeaderList cmdlist(1); - cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F}; + Tegra::ChCommandHeaderList cmdlist{{0xDEADB33F}}; system.GPU().PushCommandBuffer(cmdlist); } return UnmapBuffer(input, output); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 72499654c7..70849a9bd1 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -28,8 +28,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve return GetWaitbase(input, output); case 0x9: return MapBuffer(input, output); - case 0xa: + case 0xa: { + if (command.length == 0x1c) { + Tegra::ChCommandHeaderList cmdlist{{0xDEADB33F}}; + system.GPU().PushCommandBuffer(cmdlist); + } return UnmapBuffer(input, output); + } default: break; } diff --git a/src/core/hle/service/olsc/olsc.cpp b/src/core/hle/service/olsc/olsc.cpp index 4440135ed4..e2ac71fa14 100644 --- a/src/core/hle/service/olsc/olsc.cpp +++ b/src/core/hle/service/olsc/olsc.cpp @@ -17,7 +17,7 @@ public: static const FunctionInfo functions[] = { {0, &OLSC::Initialize, "Initialize"}, {10, nullptr, "VerifySaveDataBackupLicenseAsync"}, - {13, nullptr, "GetSaveDataBackupSetting"}, + {13, &OLSC::GetSaveDataBackupSetting, "GetSaveDataBackupSetting"}, {14, &OLSC::SetSaveDataBackupSettingEnabled, "SetSaveDataBackupSettingEnabled"}, {15, nullptr, "SetCustomData"}, {16, nullptr, "DeleteSaveDataBackupSetting"}, @@ -52,6 +52,17 @@ private: rb.Push(RESULT_SUCCESS); } + void GetSaveDataBackupSetting(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_OLSC, "(STUBBED) called"); + + // backup_setting is set to 0 since real value is unknown + constexpr u64 backup_setting = 0; + + IPC::ResponseBuilder rb{ctx, 4}; + rb.Push(RESULT_SUCCESS); + rb.Push(backup_setting); + } + void SetSaveDataBackupSettingEnabled(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_OLSC, "(STUBBED) called"); diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index 0b306b87a2..78e9cd708b 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco return {-1, Errno::MFILE}; } - FileDescriptor& descriptor = file_descriptors[fd].emplace(); + file_descriptors[fd] = FileDescriptor{}; + FileDescriptor& descriptor = *file_descriptors[fd]; // ENONMEM might be thrown here LOG_INFO(Service, "New socket fd={}", fd); @@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) { return {-1, Translate(bsd_errno)}; } - FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace(); + file_descriptors[new_fd] = FileDescriptor{}; + FileDescriptor& new_descriptor = *file_descriptors[new_fd]; new_descriptor.socket = std::move(result.socket); new_descriptor.is_connection_based = descriptor.is_connection_based; diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp index 858623e2bd..1f7309f6b4 100644 --- a/src/core/hle/service/time/time_manager.cpp +++ b/src/core/hle/service/time/time_manager.cpp @@ -279,6 +279,10 @@ const SharedMemory& TimeManager::GetSharedMemory() const { return impl->shared_memory; } +void TimeManager::Shutdown() { + impl.reset(); +} + void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) { impl->UpdateLocalSystemClockTime(system, posix_time); } diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h index 993c7c2886..4db8cc0e1f 100644 --- a/src/core/hle/service/time/time_manager.h +++ b/src/core/hle/service/time/time_manager.h @@ -61,6 +61,8 @@ public: const SharedMemory& GetSharedMemory() const; + void Shutdown(); + void SetupTimeZoneManager(std::string location_name, Clock::SteadyClockTimePoint time_zone_updated_time_point, std::size_t total_location_name_count, u128 time_zone_rule_version, diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index e0ae9f8748..4d8de81be4 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -22,7 +22,7 @@ SharedMemory::SharedMemory(Core::System& system) : system(system) { SharedMemory::~SharedMemory() = default; -std::shared_ptr<Kernel::SharedMemory> SharedMemory::GetSharedMemoryHolder() const { +std::shared_ptr<Kernel::KSharedMemory> SharedMemory::GetSharedMemoryHolder() const { return shared_memory_holder; } diff --git a/src/core/hle/service/time/time_sharedmemory.h b/src/core/hle/service/time/time_sharedmemory.h index e0c3e63da0..2996805172 100644 --- a/src/core/hle/service/time/time_sharedmemory.h +++ b/src/core/hle/service/time/time_sharedmemory.h @@ -6,8 +6,8 @@ #include "common/common_types.h" #include "common/uuid.h" +#include "core/hle/kernel/k_shared_memory.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/shared_memory.h" #include "core/hle/service/time/clock_types.h" namespace Service::Time { @@ -18,7 +18,7 @@ public: ~SharedMemory(); // Return the shared memory handle - std::shared_ptr<Kernel::SharedMemory> GetSharedMemoryHolder() const; + std::shared_ptr<Kernel::KSharedMemory> GetSharedMemoryHolder() const; // TODO(ogniK): We have to properly simulate memory barriers, how are we going to do this? template <typename T, std::size_t Offset> @@ -63,7 +63,7 @@ public: void SetAutomaticCorrectionEnabled(bool is_enabled); private: - std::shared_ptr<Kernel::SharedMemory> shared_memory_holder; + std::shared_ptr<Kernel::KSharedMemory> shared_memory_holder; Core::System& system; Format shared_memory_format{}; }; diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 79ebf11dea..4a10211f61 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -12,8 +12,8 @@ #include "core/file_sys/control_metadata.h" #include "core/file_sys/patch_manager.h" #include "core/file_sys/romfs_factory.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/deconstructed_rom_directory.h" diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index dca1fcb18f..f4a3393909 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -10,7 +10,7 @@ #include "common/file_util.h" #include "common/logging/log.h" #include "core/hle/kernel/code_set.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process.h" #include "core/loader/elf.h" #include "core/memory.h" diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index e162c4ff09..3f4ba233da 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -6,7 +6,7 @@ #include "core/file_sys/kernel_executable.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/code_set.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process.h" #include "core/loader/kip.h" #include "core/memory.h" diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index f976d0a9cc..14618cb40a 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -15,8 +15,8 @@ #include "core/file_sys/romfs_factory.h" #include "core/file_sys/vfs_offset.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/loader/nro.h" diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index ea347ea837..cbd0486951 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -15,8 +15,8 @@ #include "core/core.h" #include "core/file_sys/patch_manager.h" #include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_thread.h" -#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" #include "core/loader/nso.h" #include "core/memory.h" diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 11609682a0..b9dd3e275f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -16,7 +16,7 @@ #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/device_memory.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" #include "core/memory.h" diff --git a/src/core/memory.h b/src/core/memory.h index 705ebb23d4..6d34fcfe26 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -116,6 +116,11 @@ public: */ u8* GetPointer(VAddr vaddr); + template <typename T> + T* GetPointer(VAddr vaddr) { + return reinterpret_cast<T*>(GetPointer(vaddr)); + } + /** * Gets a pointer to the given address. * @@ -126,6 +131,11 @@ public: */ const u8* GetPointer(VAddr vaddr) const; + template <typename T> + const T* GetPointer(VAddr vaddr) const { + return reinterpret_cast<T*>(GetPointer(vaddr)); + } + /** * Reads an 8-bit unsigned value from the current process' address space * at the given virtual address. diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index 2dd0eb0f88..8eec567abd 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -10,7 +10,7 @@ #include "core/core_timing.h" #include "core/core_timing_util.h" #include "core/hardware_properties.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/service/hid/controllers/npad.h" #include "core/hle/service/hid/hid.h" diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index f199c33624..74fb328142 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -17,7 +17,7 @@ #include "core/arm/arm_interface.h" #include "core/core.h" #include "core/hle/kernel/hle_ipc.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" #include "core/memory.h" diff --git a/src/core/settings.h b/src/core/settings.h index a324530bdb..d849dded38 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -181,12 +181,13 @@ struct Values { std::string motion_device; std::string udp_input_servers; - bool emulate_analog_keyboard; - + bool mouse_panning; + float mouse_panning_sensitivity; bool mouse_enabled; std::string mouse_device; MouseButtonsRaw mouse_buttons; + bool emulate_analog_keyboard; bool keyboard_enabled; KeyboardKeysRaw keyboard_keys; KeyboardModsRaw keyboard_mods; diff --git a/src/input_common/mouse/mouse_input.cpp b/src/input_common/mouse/mouse_input.cpp index 10786a5413..b864d26f26 100644 --- a/src/input_common/mouse/mouse_input.cpp +++ b/src/input_common/mouse/mouse_input.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include "core/settings.h" #include "input_common/mouse/mouse_input.h" namespace MouseInput { @@ -32,10 +33,18 @@ void Mouse::UpdateThread() { info.motion.UpdateOrientation(update_time * 1000); info.tilt_speed = 0; info.data.motion = info.motion.GetMotion(); + if (Settings::values.mouse_panning) { + info.last_mouse_change *= 0.96f; + info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x), + static_cast<int>(16 * -info.last_mouse_change.y)}; + } } if (configuring) { UpdateYuzuSettings(); } + if (mouse_panning_timout++ > 20) { + StopPanning(); + } std::this_thread::sleep_for(std::chrono::milliseconds(update_time)); } } @@ -65,8 +74,45 @@ void Mouse::PressButton(int x, int y, int button_) { mouse_info[button_index].data.pressed = true; } -void Mouse::MouseMove(int x, int y) { +void Mouse::StopPanning() { for (MouseInfo& info : mouse_info) { + if (Settings::values.mouse_panning) { + info.data.axis = {}; + info.tilt_speed = 0; + info.last_mouse_change = {}; + } + } +} + +void Mouse::MouseMove(int x, int y, int center_x, int center_y) { + for (MouseInfo& info : mouse_info) { + if (Settings::values.mouse_panning) { + auto mouse_change = + (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>(); + mouse_panning_timout = 0; + + if (mouse_change.y == 0 && mouse_change.x == 0) { + continue; + } + const auto mouse_change_length = mouse_change.Length(); + if (mouse_change_length < 3.0f) { + mouse_change /= mouse_change_length / 3.0f; + } + + info.last_mouse_change = (info.last_mouse_change * 0.91f) + (mouse_change * 0.09f); + + const auto last_mouse_change_length = info.last_mouse_change.Length(); + if (last_mouse_change_length > 8.0f) { + info.last_mouse_change /= last_mouse_change_length / 8.0f; + } else if (last_mouse_change_length < 1.0f) { + info.last_mouse_change = mouse_change / mouse_change.Length(); + } + + info.tilt_direction = info.last_mouse_change; + info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity; + continue; + } + if (info.data.pressed) { const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin; const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position; diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h index 58803c1bf2..46aa676c15 100644 --- a/src/input_common/mouse/mouse_input.h +++ b/src/input_common/mouse/mouse_input.h @@ -57,8 +57,10 @@ public: * Signals that mouse has moved. * @param x the x-coordinate of the cursor * @param y the y-coordinate of the cursor + * @param center_x the x-coordinate of the middle of the screen + * @param center_y the y-coordinate of the middle of the screen */ - void MouseMove(int x, int y); + void MouseMove(int x, int y, int center_x, int center_y); /** * Signals that a motion sensor tilt has ended. @@ -74,11 +76,13 @@ public: private: void UpdateThread(); void UpdateYuzuSettings(); + void StopPanning(); struct MouseInfo { InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f}; Common::Vec2<int> mouse_origin; Common::Vec2<int> last_mouse_position; + Common::Vec2<float> last_mouse_change; bool is_tilting = false; float sensitivity{0.120f}; @@ -94,5 +98,6 @@ private: Common::SPSCQueue<MouseStatus> mouse_queue; bool configuring{false}; bool update_thread_running{true}; + int mouse_panning_timout{}; }; } // namespace MouseInput diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp index 3d799b2937..bb56787ee3 100644 --- a/src/input_common/mouse/mouse_poller.cpp +++ b/src/input_common/mouse/mouse_poller.cpp @@ -6,6 +6,7 @@ #include <utility> #include "common/threadsafe_queue.h" +#include "core/settings.h" #include "input_common/mouse/mouse_input.h" #include "input_common/mouse/mouse_poller.h" @@ -71,7 +72,7 @@ public: std::lock_guard lock{mutex}; const auto axis_value = static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis)); - return axis_value / (100.0f * range); + return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range); } std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { diff --git a/src/input_common/settings.h b/src/input_common/settings.h index 75486554b6..a59f5d4612 100644 --- a/src/input_common/settings.h +++ b/src/input_common/settings.h @@ -340,6 +340,7 @@ enum class ControllerType { LeftJoycon, RightJoycon, Handheld, + GameCube, }; struct PlayerInput { diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index e7e50d789b..c4afa41747 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -144,6 +144,10 @@ Client::~Client() { Reset(); } +Client::ClientData::ClientData() = default; + +Client::ClientData::~ClientData() = default; + std::vector<Common::ParamPackage> Client::GetInputDevices() const { std::vector<Common::ParamPackage> devices; for (std::size_t client = 0; client < clients.size(); client++) { diff --git a/src/input_common/udp/client.h b/src/input_common/udp/client.h index 822f9c5503..a523f61245 100644 --- a/src/input_common/udp/client.h +++ b/src/input_common/udp/client.h @@ -98,6 +98,9 @@ public: private: struct ClientData { + ClientData(); + ~ClientData(); + std::string host{"127.0.0.1"}; u16 port{26760}; std::size_t pad_index{}; diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index b630281a0e..9829da6f00 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -84,8 +84,8 @@ public: private: const std::string ip; - const u16 port; - const u16 pad; + [[maybe_unused]] const u16 port; + [[maybe_unused]] const u16 pad; CemuhookUDP::Client* client; mutable std::mutex mutex; }; diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 6a5c18945c..4ea0076e96 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,5 +1,6 @@ add_executable(tests common/bit_field.cpp + common/cityhash.cpp common/fibers.cpp common/param_package.cpp common/ring_buffer.cpp diff --git a/src/tests/common/cityhash.cpp b/src/tests/common/cityhash.cpp new file mode 100644 index 0000000000..7a40b6c4ae --- /dev/null +++ b/src/tests/common/cityhash.cpp @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <catch2/catch.hpp> + +#include "common/cityhash.h" + +constexpr char msg[] = "The blue frogs are singing under the crimson sky.\n" + "It is time to run, Robert."; + +using namespace Common; + +TEST_CASE("CityHash", "[common]") { + // These test results were built against a known good version. + REQUIRE(CityHash64(msg, sizeof(msg)) == 0x92d5c2e9cbfbbc01); + REQUIRE(CityHash64WithSeed(msg, sizeof(msg), 0xdead) == 0xbfbe93f21a2820dd); + REQUIRE(CityHash64WithSeeds(msg, sizeof(msg), 0xbeef, 0xcafe) == 0xb343317955fc8a06); + REQUIRE(CityHash128(msg, sizeof(msg)) == u128{0x98e60d0423747eaa, 0xd8694c5b6fcaede9}); + REQUIRE(CityHash128WithSeed(msg, sizeof(msg), {0xdead, 0xbeef}) == + u128{0xf0307dba81199ebe, 0xd77764e0c4a9eb74}); +} diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index 651633e9ed..edced69bb2 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp @@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") { REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); } + +TEST_CASE("BufferBase: Cached write") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.CachedCpuWrite(c + PAGE, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.FlushCachedWrites(); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Multiple cached write") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.CachedCpuWrite(c + PAGE, PAGE); + buffer.CachedCpuWrite(c + PAGE * 3, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); + buffer.FlushCachedWrites(); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Cached write unmarked") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.CachedCpuWrite(c + PAGE, PAGE); + buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.FlushCachedWrites(); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Cached write iterated") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.CachedCpuWrite(c + PAGE, PAGE); + int num = 0; + buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); + REQUIRE(num == 0); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.FlushCachedWrites(); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Cached write downloads") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 64); + buffer.CachedCpuWrite(c + PAGE, PAGE); + REQUIRE(rasterizer.Count() == 63); + buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); + int num = 0; + buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); + buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); + REQUIRE(num == 0); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); + buffer.FlushCachedWrites(); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2cf95937e6..9b931976a1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -2,10 +2,8 @@ add_subdirectory(host_shaders) add_library(video_core STATIC buffer_cache/buffer_base.h - buffer_cache/buffer_block.h + buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.h - buffer_cache/map_interval.cpp - buffer_cache/map_interval.h cdma_pusher.cpp cdma_pusher.h command_classes/codecs/codec.cpp @@ -152,8 +150,6 @@ add_library(video_core STATIC renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_state_tracker.cpp renderer_vulkan/vk_state_tracker.h - renderer_vulkan/vk_stream_buffer.cpp - renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp @@ -271,14 +267,13 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) -if (MSVC) - target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) - target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib) -else() - target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR}) - target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES}) +if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) + add_dependencies(video_core ffmpeg-build) endif() +target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) +target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) + add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index ee8602ce97..0c00ae280e 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -19,6 +19,7 @@ namespace VideoCommon { enum class BufferFlagBits { Picked = 1 << 0, + CachedWrites = 1 << 1, }; DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) @@ -40,7 +41,7 @@ class BufferBase { static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; /// Vector tracking modified pages tightly packed with small vector optimization - union WrittenWords { + union WordsArray { /// Returns the pointer to the words state [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { return is_short ? &stack : heap; @@ -55,49 +56,59 @@ class BufferBase { u64* heap; ///< Not-small buffers pointer to the storage }; - struct GpuCpuWords { - explicit GpuCpuWords() = default; - explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { + struct Words { + explicit Words() = default; + explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { if (IsShort()) { cpu.stack = ~u64{0}; gpu.stack = 0; + cached_cpu.stack = 0; + untracked.stack = ~u64{0}; } else { // Share allocation between CPU and GPU pages and set their default values const size_t num_words = NumWords(); - u64* const alloc = new u64[num_words * 2]; + u64* const alloc = new u64[num_words * 4]; cpu.heap = alloc; gpu.heap = alloc + num_words; + cached_cpu.heap = alloc + num_words * 2; + untracked.heap = alloc + num_words * 3; std::fill_n(cpu.heap, num_words, ~u64{0}); std::fill_n(gpu.heap, num_words, 0); + std::fill_n(cached_cpu.heap, num_words, 0); + std::fill_n(untracked.heap, num_words, ~u64{0}); } // Clean up tailing bits - const u64 last_local_page = - Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); + const u64 last_word_size = size_bytes % BYTES_PER_WORD; + const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; - u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; - last_word = (last_word << shift) >> shift; + const u64 last_word = (~u64{0} << shift) >> shift; + cpu.Pointer(IsShort())[NumWords() - 1] = last_word; + untracked.Pointer(IsShort())[NumWords() - 1] = last_word; } - ~GpuCpuWords() { + ~Words() { Release(); } - GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { + Words& operator=(Words&& rhs) noexcept { Release(); size_bytes = rhs.size_bytes; cpu = rhs.cpu; gpu = rhs.gpu; + cached_cpu = rhs.cached_cpu; + untracked = rhs.untracked; rhs.cpu.heap = nullptr; return *this; } - GpuCpuWords(GpuCpuWords&& rhs) noexcept - : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { + Words(Words&& rhs) noexcept + : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, + cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { rhs.cpu.heap = nullptr; } - GpuCpuWords& operator=(const GpuCpuWords&) = delete; - GpuCpuWords(const GpuCpuWords&) = delete; + Words& operator=(const Words&) = delete; + Words(const Words&) = delete; /// Returns true when the buffer fits in the small vector optimization [[nodiscard]] bool IsShort() const noexcept { @@ -118,8 +129,17 @@ class BufferBase { } u64 size_bytes = 0; - WrittenWords cpu; - WrittenWords gpu; + WordsArray cpu; + WordsArray gpu; + WordsArray cached_cpu; + WordsArray untracked; + }; + + enum class Type { + CPU, + GPU, + CachedCPU, + Untracked, }; public: @@ -132,68 +152,93 @@ public: BufferBase& operator=(const BufferBase&) = delete; BufferBase(const BufferBase&) = delete; + BufferBase& operator=(BufferBase&&) = default; + BufferBase(BufferBase&&) = default; + /// Returns the inclusive CPU modified range in a begin end pair [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return ModifiedRegion<false>(offset, query_size); + return ModifiedRegion<Type::CPU>(offset, query_size); } /// Returns the inclusive GPU modified range in a begin end pair [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return ModifiedRegion<true>(offset, query_size); + return ModifiedRegion<Type::GPU>(offset, query_size); } /// Returns true if a region has been modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return IsRegionModified<false>(offset, query_size); + return IsRegionModified<Type::CPU>(offset, query_size); } /// Returns true if a region has been modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { const u64 offset = query_cpu_addr - cpu_addr; - return IsRegionModified<true>(offset, query_size); + return IsRegionModified<Type::GPU>(offset, query_size); } /// Mark region as CPU modified, notifying the rasterizer about this change void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { - ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); + ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); } /// Unmark region as CPU modified, notifying the rasterizer about this change void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { - ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); + ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); } /// Mark region as modified from the host GPU void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { - ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); + ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); } /// Unmark region as modified from the host GPU void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { - ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); + ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); + } + + /// Mark region as modified from the CPU + /// but don't mark it as modified until FlusHCachedWrites is called. + void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { + flags |= BufferFlagBits::CachedWrites; + ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); + } + + /// Flushes cached CPU writes, and notify the rasterizer about the deltas + void FlushCachedWrites() noexcept { + flags &= ~BufferFlagBits::CachedWrites; + const u64 num_words = NumWords(); + const u64* const cached_words = Array<Type::CachedCPU>(); + u64* const untracked_words = Array<Type::Untracked>(); + u64* const cpu_words = Array<Type::CPU>(); + for (u64 word_index = 0; word_index < num_words; ++word_index) { + const u64 cached_bits = cached_words[word_index]; + NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); + untracked_words[word_index] |= cached_bits; + cpu_words[word_index] |= cached_bits; + } } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified template <typename Func> void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange<false, true>(query_cpu_range, size, func); + ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template <typename Func> void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { - ForEachModifiedRange<true, false>(query_cpu_range, size, func); + ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified template <typename Func> void ForEachDownloadRange(Func&& func) { - ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); + ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); } /// Mark buffer as picked @@ -206,6 +251,16 @@ public: flags &= ~BufferFlagBits::Picked; } + /// Increases the likeliness of this being a stream buffer + void IncreaseStreamScore(int score) noexcept { + stream_score += score; + } + + /// Returns the likeliness of this being a stream buffer + [[nodiscard]] int StreamScore() const noexcept { + return stream_score; + } + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); @@ -216,6 +271,11 @@ public: return True(flags & BufferFlagBits::Picked); } + /// Returns true when the buffer has pending cached writes + [[nodiscard]] bool HasCachedWrites() const noexcept { + return True(flags & BufferFlagBits::CachedWrites); + } + /// Returns the base CPU address of the buffer [[nodiscard]] VAddr CpuAddr() const noexcept { return cpu_addr; @@ -233,26 +293,48 @@ public: } private: + template <Type type> + u64* Array() noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::CachedCPU) { + return words.cached_cpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + + template <Type type> + const u64* Array() const noexcept { + if constexpr (type == Type::CPU) { + return words.cpu.Pointer(IsShort()); + } else if constexpr (type == Type::GPU) { + return words.gpu.Pointer(IsShort()); + } else if constexpr (type == Type::CachedCPU) { + return words.cached_cpu.Pointer(IsShort()); + } else if constexpr (type == Type::Untracked) { + return words.untracked.Pointer(IsShort()); + } + } + /** * Change the state of a range of pages * - * @param written_words Pages to be marked or unmarked as modified * @param dirty_addr Base address to mark or unmark as modified * @param size Size in bytes to mark or unmark as modified - * - * @tparam enable True when the bits will be set to one, false for zero - * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes */ - template <bool enable, bool notify_rasterizer> - void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, - s64 size) noexcept(!notify_rasterizer) { + template <Type type, bool enable> + void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { const s64 difference = dirty_addr - cpu_addr; const u64 offset = std::max<s64>(difference, 0); size += std::min<s64>(difference, 0); if (offset >= SizeBytes() || size < 0) { return; } - u64* const state_words = written_words.Pointer(IsShort()); + u64* const untracked_words = Array<Type::Untracked>(); + u64* const state_words = Array<type>(); const u64 offset_end = std::min(offset + size, SizeBytes()); const u64 begin_page_index = offset / BYTES_PER_PAGE; const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; @@ -268,13 +350,19 @@ private: u64 bits = ~u64{0}; bits = (bits >> right_offset) << right_offset; bits = (bits << left_offset) >> left_offset; - if constexpr (notify_rasterizer) { - NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); } if constexpr (enable) { state_words[word_index] |= bits; + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + untracked_words[word_index] |= bits; + } } else { state_words[word_index] &= ~bits; + if constexpr (type == Type::CPU || type == Type::CachedCPU) { + untracked_words[word_index] &= ~bits; + } } page_index = 0; ++word_index; @@ -291,7 +379,7 @@ private: * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages */ template <bool add_to_rasterizer> - void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { + void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; while (changed_bits != 0) { @@ -315,21 +403,20 @@ private: * @param query_cpu_range Base CPU address to loop over * @param size Size in bytes of the CPU range to loop over * @param func Function to call for each turned off region - * - * @tparam gpu True for host GPU pages, false for CPU pages - * @tparam notify_rasterizer True when the rasterizer should be notified about state changes */ - template <bool gpu, bool notify_rasterizer, typename Func> + template <Type type, typename Func> void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + static_assert(type != Type::Untracked); + const s64 difference = query_cpu_range - cpu_addr; const u64 query_begin = std::max<s64>(difference, 0); size += std::min<s64>(difference, 0); if (query_begin >= SizeBytes() || size < 0) { return; } - const u64* const cpu_words = words.cpu.Pointer(IsShort()); + u64* const untracked_words = Array<Type::Untracked>(); + u64* const state_words = Array<type>(); const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); - u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); @@ -345,7 +432,8 @@ private: const u64 word_index_end = std::distance(state_words, last_modified_word); const unsigned local_page_begin = std::countr_zero(*first_modified_word); - const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); + const unsigned local_page_end = + static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; const u64 query_page_begin = query_begin / BYTES_PER_PAGE; @@ -371,11 +459,13 @@ private: const u64 current_word = state_words[word_index] & bits; state_words[word_index] &= ~bits; - // Exclude CPU modified pages when visiting GPU pages - const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); - if constexpr (notify_rasterizer) { - NotifyRasterizer<true>(word_index, word, ~u64{0}); + if constexpr (type == Type::CPU) { + const u64 current_bits = untracked_words[word_index] & bits; + untracked_words[word_index] &= ~bits; + NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); } + // Exclude CPU modified pages when visiting GPU pages + const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); u64 page = page_begin; page_begin = 0; @@ -416,17 +506,20 @@ private: * @param offset Offset in bytes from the start of the buffer * @param size Size in bytes of the region to query for modifications */ - template <bool gpu> + template <Type type> [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { - const u64* const cpu_words = words.cpu.Pointer(IsShort()); - const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + static_assert(type != Type::Untracked); + + const u64* const untracked_words = Array<Type::Untracked>(); + const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; const u64 word_end = std::min(word_begin + num_query_words, NumWords()); const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { - const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; + const u64 word = state_words[word_index] & ~off_word; if (word == 0) { continue; } @@ -445,13 +538,13 @@ private: * * @param offset Offset in bytes from the start of the buffer * @param size Size in bytes of the region to query for modifications - * - * @tparam gpu True to query GPU modified pages, false for CPU pages */ - template <bool gpu> + template <Type type> [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { - const u64* const cpu_words = words.cpu.Pointer(IsShort()); - const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + static_assert(type != Type::Untracked); + + const u64* const untracked_words = Array<Type::Untracked>(); + const u64* const state_words = Array<type>(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; const u64 word_end = std::min(word_begin + num_query_words, NumWords()); @@ -460,7 +553,8 @@ private: u64 begin = std::numeric_limits<u64>::max(); u64 end = 0; for (u64 word_index = word_begin; word_index < word_end; ++word_index) { - const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; + const u64 word = state_words[word_index] & ~off_word; if (word == 0) { continue; } @@ -488,8 +582,9 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; - GpuCpuWords words; + Words words; BufferFlagBits flags{}; + int stream_score = 0; }; } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h deleted file mode 100644 index e9306194af..0000000000 --- a/src/video_core/buffer_cache/buffer_block.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace VideoCommon { - -class BufferBlock { -public: - [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { - return (cpu_addr < end) && (cpu_addr_end > start); - } - - [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { - return cpu_addr <= other_start && other_end <= cpu_addr_end; - } - - [[nodiscard]] std::size_t Offset(VAddr in_addr) const { - return static_cast<std::size_t>(in_addr - cpu_addr); - } - - [[nodiscard]] VAddr CpuAddr() const { - return cpu_addr; - } - - [[nodiscard]] VAddr CpuAddrEnd() const { - return cpu_addr_end; - } - - void SetCpuAddr(VAddr new_addr) { - cpu_addr = new_addr; - cpu_addr_end = new_addr + size; - } - - [[nodiscard]] std::size_t Size() const { - return size; - } - - [[nodiscard]] u64 Epoch() const { - return epoch; - } - - void SetEpoch(u64 new_epoch) { - epoch = new_epoch; - } - -protected: - explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { - SetCpuAddr(cpu_addr_); - } - -private: - VAddr cpu_addr{}; - VAddr cpu_addr_end{}; - std::size_t size{}; - u64 epoch{}; -}; - -} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp new file mode 100644 index 0000000000..ab32294c83 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" + +namespace VideoCommon { + +MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128)); +MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); +MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 83b9ee871c..2a6844ab1f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -4,591 +4,1289 @@ #pragma once -#include <list> +#include <algorithm> +#include <array> +#include <deque> #include <memory> #include <mutex> +#include <span> #include <unordered_map> -#include <unordered_set> -#include <utility> #include <vector> #include <boost/container/small_vector.hpp> -#include <boost/icl/interval_set.hpp> -#include <boost/intrusive/set.hpp> -#include "common/alignment.h" -#include "common/assert.h" #include "common/common_types.h" -#include "common/logging/log.h" -#include "core/core.h" +#include "common/div_ceil.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/memory.h" #include "core/settings.h" -#include "video_core/buffer_cache/buffer_block.h" -#include "video_core/buffer_cache/map_interval.h" +#include "video_core/buffer_cache/buffer_base.h" +#include "video_core/delayed_destruction_ring.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" namespace VideoCommon { -template <typename Buffer, typename BufferType, typename StreamBuffer> +MICROPROFILE_DECLARE(GPU_PrepareBuffers); +MICROPROFILE_DECLARE(GPU_BindUploadBuffers); +MICROPROFILE_DECLARE(GPU_DownloadMemory); + +using BufferId = SlotId; + +constexpr u32 NUM_VERTEX_BUFFERS = 32; +constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; +constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; +constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; +constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_STAGES = 5; + +template <typename P> class BufferCache { - using IntervalSet = boost::icl::interval_set<VAddr>; - using IntervalType = typename IntervalSet::interval_type; - using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; + // Page size for caching purposes. + // This is unrelated to the CPU page size and it can be changed as it seems optimal. + static constexpr u32 PAGE_BITS = 16; + static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS; + + static constexpr bool IS_OPENGL = P::IS_OPENGL; + static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = + P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; + static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = + P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; + static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; + static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + + static constexpr BufferId NULL_BUFFER_ID{0}; + + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + + using Runtime = typename P::Runtime; + using Buffer = typename P::Buffer; + + struct Empty {}; + + struct OverlapResult { + std::vector<BufferId> ids; + VAddr begin; + VAddr end; + bool has_stream_leap = false; + }; - static constexpr u64 WRITE_PAGE_BIT = 11; - static constexpr u64 BLOCK_PAGE_BITS = 21; - static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; + struct Binding { + VAddr cpu_addr{}; + u32 size{}; + BufferId buffer_id; + }; -public: - struct BufferInfo { - BufferType handle; - u64 offset; - u64 address; + static constexpr Binding NULL_BINDING{ + .cpu_addr = 0, + .size = 0, + .buffer_id = NULL_BUFFER_ID, }; - BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool is_written = false, bool use_fast_cbuf = false) { - std::lock_guard lock{mutex}; +public: + static constexpr u32 SKIP_CACHE_SIZE = 4096; - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - return GetEmptyBuffer(size); - } + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + Runtime& runtime_); - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - constexpr std::size_t max_stream_size = 0x800; - if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { - const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); - if (use_fast_cbuf) { - u8* dest; - if (is_granular) { - dest = gpu_memory.GetPointer(gpu_addr); - } else { - staging_buffer.resize(size); - dest = staging_buffer.data(); - gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); - } - return ConstBufferUpload(dest, size); - } - if (is_granular) { - u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); - return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { - std::memcpy(dest, host_ptr, size); - }); - } else { - return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { - gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); - }); - } - } - } + void TickFrame(); - Buffer* const block = GetBlock(*cpu_addr, size); - MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); - if (!map) { - return GetEmptyBuffer(size); - } - if (is_written) { - map->MarkAsModified(true, GetModifiedTicks()); - if (Settings::IsGPULevelHigh() && - Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - MarkForAsyncFlush(map); - } - if (!map->is_written) { - map->is_written = true; - MarkRegionAsWritten(map->start, map->end - 1); - } - } + void WriteMemory(VAddr cpu_addr, u64 size); - return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; - } + void CachedWriteMemory(VAddr cpu_addr, u64 size); - /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4) { - std::lock_guard lock{mutex}; - return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { - std::memcpy(dest, raw_pointer, size); - }); - } + void DownloadMemory(VAddr cpu_addr, u64 size); - /// Prepares the buffer cache for data uploading - /// @param max_size Maximum number of bytes that will be uploaded - /// @return True when a stream buffer invalidation was required, false otherwise - void Map(std::size_t max_size) { - std::lock_guard lock{mutex}; + void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); - std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); - buffer_offset = buffer_offset_base; - } + void UpdateGraphicsBuffers(bool is_indexed); - /// Finishes the upload stream - void Unmap() { - std::lock_guard lock{mutex}; - stream_buffer.Unmap(buffer_offset - buffer_offset_base); - } + void UpdateComputeBuffers(); - /// Function called at the end of each frame, inteded for deferred operations - void TickFrame() { - ++epoch; + void BindHostGeometryBuffers(bool is_indexed); - while (!pending_destruction.empty()) { - // Delay at least 4 frames before destruction. - // This is due to triple buffering happening on some drivers. - static constexpr u64 epochs_to_destroy = 5; - if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { - break; - } - pending_destruction.pop(); - } - } + void BindHostStageBuffers(size_t stage); - /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + void BindHostComputeBuffers(); - VectorMapInterval objects = GetMapsInRange(addr, size); - std::sort(objects.begin(), objects.end(), - [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); - for (MapInterval* object : objects) { - if (object->is_modified && object->is_registered) { - mutex.unlock(); - FlushMap(object); - mutex.lock(); - } - } - } + void SetEnabledUniformBuffers(size_t stage, u32 enabled); - bool MustFlushRegion(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + void SetEnabledComputeUniformBuffers(u32 enabled); - const VectorMapInterval objects = GetMapsInRange(addr, size); - return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { - return map->is_modified && map->is_registered; - }); - } + void UnbindGraphicsStorageBuffers(size_t stage); - /// Mark the specified region as being invalidated - void InvalidateRegion(VAddr addr, u64 size) { - std::lock_guard lock{mutex}; + void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, + bool is_written); - for (auto& object : GetMapsInRange(addr, size)) { - if (object->is_registered) { - Unregister(object); - } - } - } + void UnbindComputeStorageBuffers(); - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{mutex}; + void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, + bool is_written); - for (MapInterval* object : GetMapsInRange(addr, size)) { - if (object->is_memory_marked && object->is_registered) { - UnmarkMemory(object); - object->is_sync_pending = true; - marked_for_unregister.emplace_back(object); - } - } - } + void FlushCachedWrites(); - void SyncGuestHost() { - std::lock_guard lock{mutex}; + /// Return true when there are uncommitted buffers to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; - for (auto& object : marked_for_unregister) { - if (object->is_registered) { - object->is_sync_pending = false; - Unregister(object); - } + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; + + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + + std::mutex mutex; + +private: + template <typename Func> + static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { + for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { + const int disabled_bits = std::countr_zero(enabled_mask); + index += disabled_bits; + enabled_mask >>= disabled_bits; + func(index); } - marked_for_unregister.clear(); } - void CommitAsyncFlushes() { - if (uncommitted_flushes) { - auto commit_list = std::make_shared<std::list<MapInterval*>>(); - for (MapInterval* map : *uncommitted_flushes) { - if (map->is_registered && map->is_modified) { - // TODO(Blinkhawk): Implement backend asynchronous flushing - // AsyncFlushMap(map) - commit_list->push_back(map); - } - } - if (!commit_list->empty()) { - committed_flushes.push_back(commit_list); - } else { - committed_flushes.emplace_back(); + template <typename Func> + void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { + const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE); + for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) { + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + ++page; + continue; } - } else { - committed_flushes.emplace_back(); + Buffer& buffer = slot_buffers[buffer_id]; + func(buffer_id, buffer); + + const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); + page = Common::DivCeil(end_addr, PAGE_SIZE); } - uncommitted_flushes.reset(); } - bool ShouldWaitAsyncFlushes() const { - return !committed_flushes.empty() && committed_flushes.front() != nullptr; + static bool IsRangeGranular(VAddr cpu_addr, size_t size) { + return (cpu_addr & ~Core::Memory::PAGE_MASK) == + ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); } - bool HasUncommittedFlushes() const { - return uncommitted_flushes != nullptr; - } + void BindHostIndexBuffer(); - void PopAsyncFlushes() { - if (committed_flushes.empty()) { - return; - } - auto& flush_list = committed_flushes.front(); - if (!flush_list) { - committed_flushes.pop_front(); - return; - } - for (MapInterval* map : *flush_list) { - if (map->is_registered) { - // TODO(Blinkhawk): Replace this for reading the asynchronous flush - FlushMap(map); - } - } - committed_flushes.pop_front(); - } + void BindHostVertexBuffers(); - virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; + void BindHostGraphicsUniformBuffers(size_t stage); -protected: - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - StreamBuffer& stream_buffer_) - : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, - stream_buffer{stream_buffer_} {} + void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); - ~BufferCache() = default; + void BindHostGraphicsStorageBuffers(size_t stage); - virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; + void BindHostTransformFeedbackBuffers(); - virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { - return {}; - } + void BindHostComputeUniformBuffers(); - /// Register an object into the cache - MapInterval* Register(MapInterval new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map.start; - if (!cpu_addr) { - LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map.gpu_addr); - return nullptr; - } - const std::size_t size = new_map.end - new_map.start; - new_map.is_registered = true; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - new_map.is_memory_marked = true; - if (inherit_written) { - MarkRegionAsWritten(new_map.start, new_map.end - 1); - new_map.is_written = true; - } - MapInterval* const storage = mapped_addresses_allocator.Allocate(); - *storage = new_map; - mapped_addresses.insert(*storage); - return storage; - } + void BindHostComputeStorageBuffers(); - void UnmarkMemory(MapInterval* map) { - if (!map->is_memory_marked) { - return; - } - const std::size_t size = map->end - map->start; - rasterizer.UpdatePagesCachedCount(map->start, size, -1); - map->is_memory_marked = false; - } - - /// Unregisters an object from the cache - void Unregister(MapInterval* map) { - UnmarkMemory(map); - map->is_registered = false; - if (map->is_sync_pending) { - map->is_sync_pending = false; - marked_for_unregister.remove(map); + void DoUpdateGraphicsBuffers(bool is_indexed); + + void DoUpdateComputeBuffers(); + + void UpdateIndexBuffer(); + + void UpdateVertexBuffers(); + + void UpdateVertexBuffer(u32 index); + + void UpdateUniformBuffers(size_t stage); + + void UpdateStorageBuffers(size_t stage); + + void UpdateTransformFeedbackBuffers(); + + void UpdateTransformFeedbackBuffer(u32 index); + + void UpdateComputeUniformBuffers(); + + void UpdateComputeStorageBuffers(); + + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); + + [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); + + [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); + + void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); + + [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); + + void Register(BufferId buffer_id); + + void Unregister(BufferId buffer_id); + + template <bool insert> + void ChangeRegister(BufferId buffer_id); + + void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); + + void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); + + void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, + std::span<BufferCopy> copies); + + void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, + std::span<const BufferCopy> copies); + + void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); + + void DeleteBuffer(BufferId buffer_id); + + void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); + + void NotifyBufferDeletion(); + + [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + + [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); + + [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); + + [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; + + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + Core::Memory::Memory& cpu_memory; + Runtime& runtime; + + SlotVector<Buffer> slot_buffers; + DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; + + u32 last_index_count = 0; + + Binding index_buffer; + std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; + std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; + std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; + + std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; + std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; + u32 enabled_compute_uniform_buffers = 0; + + std::array<u32, NUM_STAGES> enabled_storage_buffers{}; + std::array<u32, NUM_STAGES> written_storage_buffers{}; + u32 enabled_compute_storage_buffers = 0; + u32 written_compute_storage_buffers = 0; + + std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + + bool has_deleted_buffers = false; + + std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> + dirty_uniform_buffers{}; + + std::vector<BufferId> cached_write_buffer_ids; + + // TODO: This data structure is not optimal and it should be reworked + std::vector<BufferId> uncommitted_downloads; + std::deque<std::vector<BufferId>> committed_downloads; + + size_t immediate_buffer_capacity = 0; + std::unique_ptr<u8[]> immediate_buffer_alloc; + + std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; +}; + +template <class P> +BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + Runtime& runtime_) + : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + // Ensure the first slot is used for the null buffer + void(slot_buffers.insert(runtime, NullBufferParams{})); +} + +template <class P> +void BufferCache<P>::TickFrame() { + delayed_destruction_ring.Tick(); +} + +template <class P> +void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { + ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { + buffer.MarkRegionAsCpuModified(cpu_addr, size); + }); +} + +template <class P> +void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { + ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { + if (!buffer.HasCachedWrites()) { + cached_write_buffer_ids.push_back(buffer_id); } - if (map->is_written) { - UnmarkRegionAsWritten(map->start, map->end - 1); + buffer.CachedCpuWrite(cpu_addr, size); + }); +} + +template <class P> +void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { + ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { + boost::container::small_vector<BufferCopy, 1> copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { + copies.push_back(BufferCopy{ + .src_offset = range_offset, + .dst_offset = total_size_bytes, + .size = range_size, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }); + if (total_size_bytes == 0) { + return; } - const auto it = mapped_addresses.find(*map); - ASSERT(it != mapped_addresses.end()); - mapped_addresses.erase(it); - mapped_addresses_allocator.Release(map); - } - -private: - MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { - const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); - if (overlaps.empty()) { - const VAddr cpu_addr_end = cpu_addr + size; - if (gpu_memory.IsGranularRange(gpu_addr, size)) { - u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); - block->Upload(block->Offset(cpu_addr), size, host_ptr); - } else { - staging_buffer.resize(size); - gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); + MICROPROFILE_SCOPE(GPU_DownloadMemory); + + if constexpr (USE_MEMORY_MAPS) { + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + const u8* const mapped_memory = download_staging.mapped_span.data(); + const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); + for (BufferCopy& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dst_offset += download_staging.offset; } - return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); - } - - const VAddr cpu_addr_end = cpu_addr + size; - if (overlaps.size() == 1) { - MapInterval* const current_map = overlaps[0]; - if (current_map->IsInside(cpu_addr, cpu_addr_end)) { - return current_map; + runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); + runtime.Finish(); + for (const BufferCopy& copy : copies) { + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* copy_mapped_memory = mapped_memory + dst_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); + } + } else { + const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); + for (const BufferCopy& copy : copies) { + buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); } } - VAddr new_start = cpu_addr; - VAddr new_end = cpu_addr_end; - bool write_inheritance = false; - bool modified_inheritance = false; - // Calculate new buffer parameters - for (MapInterval* overlap : overlaps) { - new_start = std::min(overlap->start, new_start); - new_end = std::max(overlap->end, new_end); - write_inheritance |= overlap->is_written; - modified_inheritance |= overlap->is_modified; + }); +} + +template <class P> +void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, + u32 size) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + uniform_buffers[stage][index] = NULL_BINDING; + return; + } + const Binding binding{ + .cpu_addr = *cpu_addr, + .size = size, + .buffer_id = BufferId{}, + }; + uniform_buffers[stage][index] = binding; +} + +template <class P> +void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { + MICROPROFILE_SCOPE(GPU_PrepareBuffers); + do { + has_deleted_buffers = false; + DoUpdateGraphicsBuffers(is_indexed); + } while (has_deleted_buffers); +} + +template <class P> +void BufferCache<P>::UpdateComputeBuffers() { + MICROPROFILE_SCOPE(GPU_PrepareBuffers); + do { + has_deleted_buffers = false; + DoUpdateComputeBuffers(); + } while (has_deleted_buffers); +} + +template <class P> +void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { + MICROPROFILE_SCOPE(GPU_BindUploadBuffers); + if (is_indexed) { + BindHostIndexBuffer(); + } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { + const auto& regs = maxwell3d.regs; + if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { + runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); } - GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; - for (auto& overlap : overlaps) { - Unregister(overlap); + } + BindHostVertexBuffers(); + BindHostTransformFeedbackBuffers(); +} + +template <class P> +void BufferCache<P>::BindHostStageBuffers(size_t stage) { + MICROPROFILE_SCOPE(GPU_BindUploadBuffers); + BindHostGraphicsUniformBuffers(stage); + BindHostGraphicsStorageBuffers(stage); +} + +template <class P> +void BufferCache<P>::BindHostComputeBuffers() { + MICROPROFILE_SCOPE(GPU_BindUploadBuffers); + BindHostComputeUniformBuffers(); + BindHostComputeStorageBuffers(); +} + +template <class P> +void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + if (enabled_uniform_buffers[stage] != enabled) { + dirty_uniform_buffers[stage] = ~u32{0}; } - UpdateBlock(block, new_start, new_end, overlaps); - - const MapInterval new_map{new_start, new_end, new_gpu_addr}; - MapInterval* const map = Register(new_map, write_inheritance); - if (!map) { - return nullptr; + } + enabled_uniform_buffers[stage] = enabled; +} + +template <class P> +void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { + enabled_compute_uniform_buffers = enabled; +} + +template <class P> +void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { + enabled_storage_buffers[stage] = 0; + written_storage_buffers[stage] = 0; +} + +template <class P> +void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, + u32 cbuf_offset, bool is_written) { + enabled_storage_buffers[stage] |= 1U << ssbo_index; + written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; + + const auto& cbufs = maxwell3d.state.shader_stages[stage]; + const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; + storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); +} + +template <class P> +void BufferCache<P>::UnbindComputeStorageBuffers() { + enabled_compute_storage_buffers = 0; + written_compute_storage_buffers = 0; +} + +template <class P> +void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, + bool is_written) { + enabled_compute_storage_buffers |= 1U << ssbo_index; + written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; + + const auto& launch_desc = kepler_compute.launch_description; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const auto& cbufs = launch_desc.const_buffer_config; + const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; + compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); +} + +template <class P> +void BufferCache<P>::FlushCachedWrites() { + for (const BufferId buffer_id : cached_write_buffer_ids) { + slot_buffers[buffer_id].FlushCachedWrites(); + } + cached_write_buffer_ids.clear(); +} + +template <class P> +bool BufferCache<P>::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} + +template <class P> +bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} + +template <class P> +void BufferCache<P>::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push_front(uncommitted_downloads); + uncommitted_downloads.clear(); +} + +template <class P> +void BufferCache<P>::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); }); + const std::span<const BufferId> download_ids = committed_downloads.back(); + if (download_ids.empty()) { + return; + } + MICROPROFILE_SCOPE(GPU_DownloadMemory); + + boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + for (const BufferId buffer_id : download_ids) { + slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) { + downloads.push_back({ + BufferCopy{ + .src_offset = range_offset, + .dst_offset = total_size_bytes, + .size = range_size, + }, + buffer_id, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }); + } + if (downloads.empty()) { + return; + } + if constexpr (USE_MEMORY_MAPS) { + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + for (auto& [copy, buffer_id] : downloads) { + // Have in mind the staging buffer offset for the copy + copy.dst_offset += download_staging.offset; + const std::array copies{copy}; + runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); } - if (modified_inheritance) { - map->MarkAsModified(true, GetModifiedTicks()); - if (Settings::IsGPULevelHigh() && - Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - MarkForAsyncFlush(map); - } + runtime.Finish(); + for (const auto [copy, buffer_id] : downloads) { + const Buffer& buffer = slot_buffers[buffer_id]; + const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; + cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); + } + } else { + const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); + for (const auto [copy, buffer_id] : downloads) { + Buffer& buffer = slot_buffers[buffer_id]; + buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); + const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; + cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); } - return map; } - - void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { - const IntervalType base_interval{start, end}; - IntervalSet interval_set{}; - interval_set.add(base_interval); - for (auto& overlap : overlaps) { - const IntervalType subtract{overlap->start, overlap->end}; - interval_set.subtract(subtract); +} + +template <class P> +bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { + const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); + for (u64 page = addr >> PAGE_BITS; page < page_end;) { + const BufferId image_id = page_table[page]; + if (!image_id) { + ++page; + continue; } - for (auto& interval : interval_set) { - const std::size_t size = interval.upper() - interval.lower(); - if (size == 0) { - continue; - } - staging_buffer.resize(size); - cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); - block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); + Buffer& buffer = slot_buffers[image_id]; + if (buffer.IsRegionGpuModified(addr, size)) { + return true; } + const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); + page = Common::DivCeil(end_addr, PAGE_SIZE); } - - VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { - VectorMapInterval result; - if (size == 0) { - return result; + return false; +} + +template <class P> +void BufferCache<P>::BindHostIndexBuffer() { + Buffer& buffer = slot_buffers[index_buffer.buffer_id]; + const u32 offset = buffer.Offset(index_buffer.cpu_addr); + const u32 size = index_buffer.size; + SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); + if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { + runtime.BindIndexBuffer(buffer, offset, size); + } else { + runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, + maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, + buffer, offset, size); + } +} + +template <class P> +void BufferCache<P>::BindHostVertexBuffers() { + auto& flags = maxwell3d.dirty.flags; + for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { + const Binding& binding = vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); + if (!flags[Dirty::VertexBuffer0 + index]) { + continue; } + flags[Dirty::VertexBuffer0 + index] = false; - const VAddr addr_end = addr + size; - auto it = mapped_addresses.lower_bound(addr); - if (it != mapped_addresses.begin()) { - --it; + const u32 stride = maxwell3d.regs.vertex_array[index].stride; + const u32 offset = buffer.Offset(binding.cpu_addr); + runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); + } +} + +template <class P> +void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { + u32 dirty = ~0U; + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + dirty = std::exchange(dirty_uniform_buffers[stage], 0); + } + u32 binding_index = 0; + ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + const bool needs_bind = ((dirty >> index) & 1) != 0; + BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); + if constexpr (NEEDS_BIND_UNIFORM_INDEX) { + ++binding_index; } - while (it != mapped_addresses.end() && it->start < addr_end) { - if (it->Overlaps(addr, addr_end)) { - result.push_back(&*it); + }); +} + +template <class P> +void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, + bool needs_bind) { + const Binding& binding = uniform_buffers[stage][index]; + const VAddr cpu_addr = binding.cpu_addr; + const u32 size = binding.size; + Buffer& buffer = slot_buffers[binding.buffer_id]; + if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { + if constexpr (IS_OPENGL) { + if (runtime.HasFastBufferSubData()) { + // Fast path for Nvidia + if (!HasFastUniformBufferBound(stage, binding_index)) { + // We only have to bind when the currently bound buffer is not the fast version + runtime.BindFastUniformBuffer(stage, binding_index, size); + } + const auto span = ImmediateBufferWithData(cpu_addr, size); + runtime.PushFastUniformBuffer(stage, binding_index, span); + return; } - ++it; } - return result; - } + fast_bound_uniform_buffers[stage] |= 1U << binding_index; - /// Returns a ticks counter used for tracking when cached objects were last modified - u64 GetModifiedTicks() { - return ++modified_ticks; + // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan + const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); + cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); + return; } - - void FlushMap(MapInterval* map) { - const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); - ASSERT_OR_EXECUTE(it != blocks.end(), return;); - - std::shared_ptr<Buffer> block = it->second; - - const std::size_t size = map->end - map->start; - staging_buffer.resize(size); - block->Download(block->Offset(map->start), size, staging_buffer.data()); - cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); - map->MarkAsModified(false, 0); + // Classic cached path + SynchronizeBuffer(buffer, cpu_addr, size); + if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + return; } + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - template <typename Callable> - BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { - AlignBuffer(alignment); - const std::size_t uploaded_offset = buffer_offset; - callable(buffer_ptr); - - buffer_ptr += size; - buffer_offset += size; - return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; + const u32 offset = buffer.Offset(cpu_addr); + if constexpr (NEEDS_BIND_UNIFORM_INDEX) { + runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); + } else { + runtime.BindUniformBuffer(buffer, offset, size); } +} + +template <class P> +void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { + const Binding& binding = storage_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; + if constexpr (NEEDS_BIND_STORAGE_INDEX) { + runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); + ++binding_index; + } else { + runtime.BindStorageBuffer(buffer, offset, size, is_written); + } + }); +} - void AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; +template <class P> +void BufferCache<P>::BindHostTransformFeedbackBuffers() { + if (maxwell3d.regs.tfb_enabled == 0) { + return; } + for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { + const Binding& binding = transform_feedback_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); + } +} - std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { - const std::size_t old_size = buffer->Size(); - const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; - const VAddr cpu_addr = buffer->CpuAddr(); - std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); - new_buffer->CopyFrom(*buffer, 0, 0, old_size); - QueueDestruction(std::move(buffer)); - - const VAddr cpu_addr_end = cpu_addr + new_size - 1; - const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; - for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { - blocks.insert_or_assign(page_start, new_buffer); +template <class P> +void BufferCache<P>::BindHostComputeUniformBuffers() { + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + // Mark all uniform buffers as dirty + dirty_uniform_buffers.fill(~u32{0}); + } + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + const Binding& binding = compute_uniform_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + if constexpr (NEEDS_BIND_UNIFORM_INDEX) { + runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); + ++binding_index; + } else { + runtime.BindUniformBuffer(buffer, offset, size); } + }); +} + +template <class P> +void BufferCache<P>::BindHostComputeStorageBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { + const Binding& binding = compute_storage_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; + if constexpr (NEEDS_BIND_STORAGE_INDEX) { + runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); + ++binding_index; + } else { + runtime.BindStorageBuffer(buffer, offset, size, is_written); + } + }); +} - return new_buffer; +template <class P> +void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { + if (is_indexed) { + UpdateIndexBuffer(); } + UpdateVertexBuffers(); + UpdateTransformFeedbackBuffers(); + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + UpdateUniformBuffers(stage); + UpdateStorageBuffers(stage); + } +} + +template <class P> +void BufferCache<P>::DoUpdateComputeBuffers() { + UpdateComputeUniformBuffers(); + UpdateComputeStorageBuffers(); +} + +template <class P> +void BufferCache<P>::UpdateIndexBuffer() { + // We have to check for the dirty flags and index count + // The index count is currently changed without updating the dirty flags + const auto& index_array = maxwell3d.regs.index_array; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { + return; + } + flags[Dirty::IndexBuffer] = false; + last_index_count = index_array.count; + + const GPUVAddr gpu_addr_begin = index_array.StartAddress(); + const GPUVAddr gpu_addr_end = index_array.EndAddress(); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); + const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); + const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); + const u32 size = std::min(address_size, draw_size); + if (size == 0 || !cpu_addr) { + index_buffer = NULL_BINDING; + return; + } + index_buffer = Binding{ + .cpu_addr = *cpu_addr, + .size = size, + .buffer_id = FindBuffer(*cpu_addr, size), + }; +} - std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, - std::shared_ptr<Buffer> second) { - const std::size_t size_1 = first->Size(); - const std::size_t size_2 = second->Size(); - const VAddr first_addr = first->CpuAddr(); - const VAddr second_addr = second->CpuAddr(); - const VAddr new_addr = std::min(first_addr, second_addr); - const std::size_t new_size = size_1 + size_2; - - std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); - new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); - new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); - QueueDestruction(std::move(first)); - QueueDestruction(std::move(second)); +template <class P> +void BufferCache<P>::UpdateVertexBuffers() { + auto& flags = maxwell3d.dirty.flags; + if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { + return; + } + flags[Dirty::VertexBuffers] = false; - const VAddr cpu_addr_end = new_addr + new_size - 1; - const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; - for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { - blocks.insert_or_assign(page_start, new_buffer); - } - return new_buffer; + for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { + UpdateVertexBuffer(index); } +} - Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { - std::shared_ptr<Buffer> found; +template <class P> +void BufferCache<P>::UpdateVertexBuffer(u32 index) { + if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { + return; + } + const auto& array = maxwell3d.regs.vertex_array[index]; + const auto& limit = maxwell3d.regs.vertex_array_limit[index]; + const GPUVAddr gpu_addr_begin = array.StartAddress(); + const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); + const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); + const u32 size = address_size; // TODO: Analyze stride and number of vertices + if (array.enable == 0 || size == 0 || !cpu_addr) { + vertex_buffers[index] = NULL_BINDING; + return; + } + vertex_buffers[index] = Binding{ + .cpu_addr = *cpu_addr, + .size = size, + .buffer_id = FindBuffer(*cpu_addr, size), + }; +} + +template <class P> +void BufferCache<P>::UpdateUniformBuffers(size_t stage) { + ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + Binding& binding = uniform_buffers[stage][index]; + if (binding.buffer_id) { + // Already updated + return; + } + // Mark as dirty + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + dirty_uniform_buffers[stage] |= 1U << index; + } + // Resolve buffer + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + +template <class P> +void BufferCache<P>::UpdateStorageBuffers(size_t stage) { + const u32 written_mask = written_storage_buffers[stage]; + ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { + // Resolve buffer + Binding& binding = storage_buffers[stage][index]; + const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); + binding.buffer_id = buffer_id; + // Mark buffer as written if needed + if (((written_mask >> index) & 1) != 0) { + MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + } + }); +} - const VAddr cpu_addr_end = cpu_addr + size - 1; - const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; - for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { - auto it = blocks.find(page_start); - if (it == blocks.end()) { - if (found) { - found = EnlargeBlock(found); - continue; - } - const VAddr start_addr = page_start << BLOCK_PAGE_BITS; - found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); - blocks.insert_or_assign(page_start, found); - continue; - } - if (!found) { - found = it->second; - continue; - } - if (found != it->second) { - found = MergeBlocks(std::move(found), it->second); +template <class P> +void BufferCache<P>::UpdateTransformFeedbackBuffers() { + if (maxwell3d.regs.tfb_enabled == 0) { + return; + } + for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { + UpdateTransformFeedbackBuffer(index); + } +} + +template <class P> +void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { + const auto& binding = maxwell3d.regs.tfb_bindings[index]; + const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; + const u32 size = binding.buffer_size; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { + transform_feedback_buffers[index] = NULL_BINDING; + return; + } + const BufferId buffer_id = FindBuffer(*cpu_addr, size); + transform_feedback_buffers[index] = Binding{ + .cpu_addr = *cpu_addr, + .size = size, + .buffer_id = buffer_id, + }; + MarkWrittenBuffer(buffer_id, *cpu_addr, size); +} + +template <class P> +void BufferCache<P>::UpdateComputeUniformBuffers() { + ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + Binding& binding = compute_uniform_buffers[index]; + binding = NULL_BINDING; + const auto& launch_desc = kepler_compute.launch_description; + if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { + const auto& cbuf = launch_desc.const_buffer_config[index]; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); + if (cpu_addr) { + binding.cpu_addr = *cpu_addr; + binding.size = cbuf.size; } } - return found.get(); + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + +template <class P> +void BufferCache<P>::UpdateComputeStorageBuffers() { + ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { + // Resolve buffer + Binding& binding = compute_storage_buffers[index]; + const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); + binding.buffer_id = buffer_id; + // Mark as written if needed + if (((written_compute_storage_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + } + }); +} + +template <class P> +void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { + Buffer& buffer = slot_buffers[buffer_id]; + buffer.MarkRegionAsGpuModified(cpu_addr, size); + + const bool is_accuracy_high = Settings::IsGPULevelHigh(); + const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + if (!is_accuracy_high || !is_async) { + return; + } + if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) { + // Already inserted + return; } + uncommitted_downloads.push_back(buffer_id); +} - void MarkRegionAsWritten(VAddr start, VAddr end) { - const u64 page_end = end >> WRITE_PAGE_BIT; - for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { - ++it->second; - } +template <class P> +BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { + if (cpu_addr == 0) { + return NULL_BUFFER_ID; + } + const u64 page = cpu_addr >> PAGE_BITS; + const BufferId buffer_id = page_table[page]; + if (!buffer_id) { + return CreateBuffer(cpu_addr, size); + } + const Buffer& buffer = slot_buffers[buffer_id]; + if (buffer.IsInBounds(cpu_addr, size)) { + return buffer_id; + } + return CreateBuffer(cpu_addr, size); +} + +template <class P> +typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, + u32 wanted_size) { + static constexpr int STREAM_LEAP_THRESHOLD = 16; + std::vector<BufferId> overlap_ids; + VAddr begin = cpu_addr; + VAddr end = cpu_addr + wanted_size; + int stream_score = 0; + bool has_stream_leap = false; + for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) { + const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; + if (!overlap_id) { + continue; + } + Buffer& overlap = slot_buffers[overlap_id]; + if (overlap.IsPicked()) { + continue; + } + overlap_ids.push_back(overlap_id); + overlap.Pick(); + const VAddr overlap_cpu_addr = overlap.CpuAddr(); + if (overlap_cpu_addr < begin) { + cpu_addr = begin = overlap_cpu_addr; + } + end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); + + stream_score += overlap.StreamScore(); + if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { + // When this memory region has been joined a bunch of times, we assume it's being used + // as a stream buffer. Increase the size to skip constantly recreating buffers. + has_stream_leap = true; + end += PAGE_SIZE * 256; } } - - void UnmarkRegionAsWritten(VAddr start, VAddr end) { - const u64 page_end = end >> WRITE_PAGE_BIT; - for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - auto it = written_pages.find(page_start); - if (it != written_pages.end()) { - if (it->second > 1) { - --it->second; - } else { - written_pages.erase(it); - } - } + return OverlapResult{ + .ids = std::move(overlap_ids), + .begin = begin, + .end = end, + .has_stream_leap = has_stream_leap, + }; +} + +template <class P> +void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, + bool accumulate_stream_score) { + Buffer& new_buffer = slot_buffers[new_buffer_id]; + Buffer& overlap = slot_buffers[overlap_id]; + if (accumulate_stream_score) { + new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); + } + std::vector<BufferCopy> copies; + const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); + overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { + copies.push_back(BufferCopy{ + .src_offset = begin, + .dst_offset = dst_base_offset + begin, + .size = range_size, + }); + new_buffer.UnmarkRegionAsCpuModified(begin, range_size); + new_buffer.MarkRegionAsGpuModified(begin, range_size); + }); + if (!copies.empty()) { + runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); + } + ReplaceBufferDownloads(overlap_id, new_buffer_id); + DeleteBuffer(overlap_id); +} + +template <class P> +BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { + const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); + const u32 size = static_cast<u32>(overlap.end - overlap.begin); + const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); + for (const BufferId overlap_id : overlap.ids) { + JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); + } + Register(new_buffer_id); + return new_buffer_id; +} + +template <class P> +void BufferCache<P>::Register(BufferId buffer_id) { + ChangeRegister<true>(buffer_id); +} + +template <class P> +void BufferCache<P>::Unregister(BufferId buffer_id) { + ChangeRegister<false>(buffer_id); +} + +template <class P> +template <bool insert> +void BufferCache<P>::ChangeRegister(BufferId buffer_id) { + const Buffer& buffer = slot_buffers[buffer_id]; + const VAddr cpu_addr_begin = buffer.CpuAddr(); + const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); + const u64 page_begin = cpu_addr_begin / PAGE_SIZE; + const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); + for (u64 page = page_begin; page != page_end; ++page) { + if constexpr (insert) { + page_table[page] = buffer_id; + } else { + page_table[page] = BufferId{}; } } +} - bool IsRegionWritten(VAddr start, VAddr end) const { - const u64 page_end = end >> WRITE_PAGE_BIT; - for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { - if (written_pages.contains(page_start)) { - return true; +template <class P> +void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { + if (buffer.CpuAddr() == 0) { + return; + } + SynchronizeBufferImpl(buffer, cpu_addr, size); +} + +template <class P> +void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { + boost::container::small_vector<BufferCopy, 4> copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { + copies.push_back(BufferCopy{ + .src_offset = total_size_bytes, + .dst_offset = range_offset, + .size = range_size, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }); + if (total_size_bytes == 0) { + return; + } + const std::span<BufferCopy> copies_span(copies.data(), copies.size()); + UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); +} + +template <class P> +void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, + std::span<BufferCopy> copies) { + if constexpr (USE_MEMORY_MAPS) { + MappedUploadMemory(buffer, total_size_bytes, copies); + } else { + ImmediateUploadMemory(buffer, largest_copy, copies); + } +} + +template <class P> +void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, + std::span<const BufferCopy> copies) { + std::span<u8> immediate_buffer; + for (const BufferCopy& copy : copies) { + std::span<const u8> upload_span; + const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; + if (IsRangeGranular(cpu_addr, copy.size)) { + upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); + } else { + if (immediate_buffer.empty()) { + immediate_buffer = ImmediateBuffer(largest_copy); } + cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); + upload_span = immediate_buffer.subspan(0, copy.size); } - return false; + buffer.ImmediateUpload(copy.dst_offset, upload_span); } - - void QueueDestruction(std::shared_ptr<Buffer> buffer) { - buffer->SetEpoch(epoch); - pending_destruction.push(std::move(buffer)); +} + +template <class P> +void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, + std::span<BufferCopy> copies) { + auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); + const std::span<u8> staging_pointer = upload_staging.mapped_span; + for (BufferCopy& copy : copies) { + u8* const src_pointer = staging_pointer.data() + copy.src_offset; + const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; + cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); + + // Apply the staging offset + copy.src_offset += upload_staging.offset; } - - void MarkForAsyncFlush(MapInterval* map) { - if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies); +} + +template <class P> +void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { + const auto scalar_replace = [buffer_id](Binding& binding) { + if (binding.buffer_id == buffer_id) { + binding.buffer_id = BufferId{}; + } + }; + const auto replace = [scalar_replace](std::span<Binding> bindings) { + std::ranges::for_each(bindings, scalar_replace); + }; + scalar_replace(index_buffer); + replace(vertex_buffers); + std::ranges::for_each(uniform_buffers, replace); + std::ranges::for_each(storage_buffers, replace); + replace(transform_feedback_buffers); + replace(compute_uniform_buffers); + replace(compute_storage_buffers); + std::erase(cached_write_buffer_ids, buffer_id); + + // Mark the whole buffer as CPU written to stop tracking CPU writes + Buffer& buffer = slot_buffers[buffer_id]; + buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); + + Unregister(buffer_id); + delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); + + NotifyBufferDeletion(); +} + +template <class P> +void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) { + const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) { + std::ranges::replace(buffers, old_buffer_id, new_buffer_id); + if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) { + buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end()); } - uncommitted_flushes->insert(map); + }; + replace(uncommitted_downloads); + std::ranges::for_each(committed_downloads, replace); +} + +template <class P> +void BufferCache<P>::NotifyBufferDeletion() { + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + dirty_uniform_buffers.fill(~u32{0}); } + auto& flags = maxwell3d.dirty.flags; + flags[Dirty::IndexBuffer] = true; + flags[Dirty::VertexBuffers] = true; + for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { + flags[Dirty::VertexBuffer0 + index] = true; + } + has_deleted_buffers = true; +} + +template <class P> +typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { + const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); + const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr || size == 0) { + return NULL_BINDING; + } + // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range. + // It exists due to some games like Astral Chain operate out of bounds. + // Binding the whole map range would be technically correct, but games have large maps that make + // this approach unaffordable for now. + static constexpr u32 arbitrary_extra_bytes = 0xc000; + const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr)); + const Binding binding{ + .cpu_addr = *cpu_addr, + .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end), + .buffer_id = BufferId{}, + }; + return binding; +} + +template <class P> +std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { + u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); + if (IsRangeGranular(cpu_addr, size) || + base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { + return std::span(base_pointer, size); + } else { + const std::span<u8> span = ImmediateBuffer(size); + cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); + return span; + } +} - VideoCore::RasterizerInterface& rasterizer; - Tegra::MemoryManager& gpu_memory; - Core::Memory::Memory& cpu_memory; - StreamBuffer& stream_buffer; - - u8* buffer_ptr = nullptr; - u64 buffer_offset = 0; - u64 buffer_offset_base = 0; - - MapIntervalAllocator mapped_addresses_allocator; - boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> - mapped_addresses; - - std::unordered_map<u64, u32> written_pages; - std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; - - std::queue<std::shared_ptr<Buffer>> pending_destruction; - u64 epoch = 0; - u64 modified_ticks = 0; - - std::vector<u8> staging_buffer; - - std::list<MapInterval*> marked_for_unregister; - - std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; - std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; - - std::recursive_mutex mutex; -}; +template <class P> +std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { + if (wanted_capacity > immediate_buffer_capacity) { + immediate_buffer_capacity = wanted_capacity; + immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity); + } + return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity); +} + +template <class P> +bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { + if constexpr (IS_OPENGL) { + return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; + } else { + // Only OpenGL has fast uniform buffers + return false; + } +} } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp deleted file mode 100644 index 62587e18a1..0000000000 --- a/src/video_core/buffer_cache/map_interval.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <cstddef> -#include <memory> - -#include "video_core/buffer_cache/map_interval.h" - -namespace VideoCommon { - -MapIntervalAllocator::MapIntervalAllocator() { - FillFreeList(first_chunk); -} - -MapIntervalAllocator::~MapIntervalAllocator() = default; - -void MapIntervalAllocator::AllocateNewChunk() { - *new_chunk = std::make_unique<Chunk>(); - FillFreeList(**new_chunk); - new_chunk = &(*new_chunk)->next; -} - -void MapIntervalAllocator::FillFreeList(Chunk& chunk) { - const std::size_t old_size = free_list.size(); - free_list.resize(old_size + chunk.data.size()); - std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, - [](MapInterval& interval) { return &interval; }); -} - -} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h deleted file mode 100644 index ef974b08ae..0000000000 --- a/src/video_core/buffer_cache/map_interval.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <cstddef> -#include <memory> -#include <vector> - -#include <boost/intrusive/set_hook.hpp> - -#include "common/common_types.h" -#include "video_core/gpu.h" - -namespace VideoCommon { - -struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { - MapInterval() = default; - - /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} - - explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept - : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} - - bool IsInside(VAddr other_start, VAddr other_end) const noexcept { - return start <= other_start && other_end <= end; - } - - bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { - return start < other_end && other_start < end; - } - - void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { - is_modified = is_modified_; - ticks = ticks_; - } - - boost::intrusive::set_member_hook<> member_hook_; - VAddr start = 0; - VAddr end = 0; - GPUVAddr gpu_addr = 0; - u64 ticks = 0; - bool is_written = false; - bool is_modified = false; - bool is_registered = false; - bool is_memory_marked = false; - bool is_sync_pending = false; -}; - -struct MapIntervalCompare { - constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { - return lhs.start < rhs.start; - } -}; - -class MapIntervalAllocator { -public: - MapIntervalAllocator(); - ~MapIntervalAllocator(); - - MapInterval* Allocate() { - if (free_list.empty()) { - AllocateNewChunk(); - } - MapInterval* const interval = free_list.back(); - free_list.pop_back(); - return interval; - } - - void Release(MapInterval* interval) { - free_list.push_back(interval); - } - -private: - struct Chunk { - std::unique_ptr<Chunk> next; - std::array<MapInterval, 0x8000> data; - }; - - void AllocateNewChunk(); - - void FillFreeList(Chunk& chunk); - - std::vector<MapInterval*> free_list; - - Chunk first_chunk; - - std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; -}; - -} // namespace VideoCommon diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 33b3c060ba..a3fda1094a 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -37,59 +37,43 @@ CDmaPusher::CDmaPusher(GPU& gpu_) CDmaPusher::~CDmaPusher() = default; -void CDmaPusher::Push(ChCommandHeaderList&& entries) { - cdma_queue.push(std::move(entries)); -} - -void CDmaPusher::DispatchCalls() { - while (!cdma_queue.empty()) { - Step(); - } -} - -void CDmaPusher::Step() { - const auto entries{cdma_queue.front()}; - cdma_queue.pop(); - - std::vector<u32> values(entries.size()); - std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); - - for (const u32 value : values) { +void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) { + for (const auto& value : entries) { if (mask != 0) { const auto lbs = static_cast<u32>(std::countr_zero(mask)); mask &= ~(1U << lbs); - ExecuteCommand(static_cast<u32>(offset + lbs), value); + ExecuteCommand(offset + lbs, value.raw); continue; } else if (count != 0) { --count; - ExecuteCommand(static_cast<u32>(offset), value); + ExecuteCommand(offset, value.raw); if (incrementing) { ++offset; } continue; } - const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf); + const auto mode = value.submission_mode.Value(); switch (mode) { case ChSubmissionMode::SetClass: { - mask = value & 0x3f; - offset = (value >> 16) & 0xfff; - current_class = static_cast<ChClassId>((value >> 6) & 0x3ff); + mask = value.value & 0x3f; + offset = value.method_offset; + current_class = static_cast<ChClassId>((value.value >> 6) & 0x3ff); break; } case ChSubmissionMode::Incrementing: case ChSubmissionMode::NonIncrementing: - count = value & 0xffff; - offset = (value >> 16) & 0xfff; + count = value.value; + offset = value.method_offset; incrementing = mode == ChSubmissionMode::Incrementing; break; case ChSubmissionMode::Mask: - mask = value & 0xffff; - offset = (value >> 16) & 0xfff; + mask = value.value; + offset = value.method_offset; break; case ChSubmissionMode::Immediate: { - const u32 data = value & 0xfff; - offset = (value >> 16) & 0xfff; - ExecuteCommand(static_cast<u32>(offset), data); + const u32 data = value.value & 0xfff; + offset = value.method_offset; + ExecuteCommand(offset, data); break; } default: @@ -102,8 +86,8 @@ void CDmaPusher::Step() { void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { switch (current_class) { case ChClassId::NvDec: - ThiStateWrite(nvdec_thi_state, state_offset, {data}); - switch (static_cast<ThiMethod>(state_offset)) { + ThiStateWrite(nvdec_thi_state, offset, data); + switch (static_cast<ThiMethod>(offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); const auto syncpoint_id = static_cast<u32>(data & 0xFF); @@ -120,7 +104,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - {data}); + data); break; default: break; @@ -144,7 +128,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast<u32>(vic_thi_state.method_0), data); - vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), {data}); + vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data); break; default: break; @@ -153,7 +137,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ChClassId::Host1x: // This device is mainly for syncpoint synchronization LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); - host1x_processor->ProcessMethod(static_cast<Host1x::Method>(state_offset), {data}); + host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data); break; default: UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); @@ -161,10 +145,9 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { } } -void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, - const std::vector<u32>& arguments) { - u8* const state_offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset; - std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size()); +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, u32 argument) { + u8* const offset_ptr = reinterpret_cast<u8*>(&state) + sizeof(u32) * state_offset; + std::memcpy(offset_ptr, &argument, sizeof(u32)); } } // namespace Tegra diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index e5f212c1a0..1bada44ddc 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -5,9 +5,7 @@ #pragma once #include <memory> -#include <unordered_map> #include <vector> -#include <queue> #include "common/bit_field.h" #include "common/common_types.h" @@ -16,9 +14,9 @@ namespace Tegra { class GPU; +class Host1x; class Nvdec; class Vic; -class Host1x; enum class ChSubmissionMode : u32 { SetClass = 0, @@ -48,16 +46,10 @@ enum class ChClassId : u32 { NvDec = 0xf0 }; -enum class ChMethod : u32 { - Empty = 0, - SetMethod = 0x10, - SetData = 0x11, -}; - union ChCommandHeader { u32 raw; BitField<0, 16, u32> value; - BitField<16, 12, ChMethod> method_offset; + BitField<16, 12, u32> method_offset; BitField<28, 4, ChSubmissionMode> submission_mode; }; static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size"); @@ -99,21 +91,15 @@ public: explicit CDmaPusher(GPU& gpu_); ~CDmaPusher(); - /// Push NVDEC command buffer entries into queue - void Push(ChCommandHeaderList&& entries); - - /// Process queued command buffer entries - void DispatchCalls(); - - /// Process one queue element - void Step(); + /// Process the command entry + void ProcessEntries(ChCommandHeaderList&& entries); +private: /// Invoke command class devices to execute the command based on the current state void ExecuteCommand(u32 state_offset, u32 data); -private: /// Write arguments value to the ThiRegisters member at the specified offset - void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); + void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument); GPU& gpu; std::shared_ptr<Tegra::Nvdec> nvdec_processor; @@ -124,13 +110,10 @@ private: ThiRegisters vic_thi_state{}; ThiRegisters nvdec_thi_state{}; - s32 count{}; - s32 offset{}; + u32 count{}; + u32 offset{}; u32 mask{}; bool incrementing{}; - - // Queue of command lists to be processed - std::queue<ChCommandHeaderList> cdma_queue; }; } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 39bc923a50..d02dc6260d 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -44,8 +44,10 @@ Codec::~Codec() { } void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); - current_codec = codec; + if (current_codec != codec) { + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); + current_codec = codec; + } } void Codec::StateWrite(u32 offset, u64 arguments) { @@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) { void Codec::Decode() { bool is_first_frame = false; - if (!initialized) { if (current_codec == NvdecCommon::VideoCodec::H264) { av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index 79e1f4e137..e4f919afd1 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -12,16 +12,16 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) { +void Nvdec::ProcessMethod(Method method, u32 argument) { if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), arguments[0]); + codec->StateWrite(static_cast<u32>(method), argument); } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8); + codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); } switch (method) { case Method::SetVideoCodec: - codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0])); + codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; case Method::Execute: Execute(); diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e4877c5330..e66be80b88 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -23,7 +23,7 @@ public: ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 55e632346d..0a8b82f2b8 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -18,18 +18,14 @@ extern "C" { namespace Tegra { Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) - : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} -Vic::~Vic() = default; + : gpu(gpu_), + nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {} -void Vic::VicStateWrite(u32 offset, u32 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32); - std::memcpy(state_offset, &arguments, sizeof(u32)); -} +Vic::~Vic() = default; -void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { - LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method); - VicStateWrite(static_cast<u32>(method), arguments[0]); - const u64 arg = static_cast<u64>(arguments[0]) << 8; +void Vic::ProcessMethod(Method method, u32 argument) { + LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method)); + const u64 arg = static_cast<u64>(argument) << 8; switch (method) { case Method::Execute: Execute(); @@ -53,8 +49,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { void Vic::Execute() { if (output_surface_luma_address == 0) { - LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", - vic_state.output_surface.luma_offset); + LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); return; } const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; @@ -89,8 +84,10 @@ void Vic::Execute() { // Get Converted frame const std::size_t linear_size = frame->width * frame->height * 4; - using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; - AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; + // Only allocate frame_buffer once per stream, as the size is not expected to change + if (!converted_frame_buffer) { + converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; + } const int converted_stride{frame->width * 4}; u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; @@ -104,18 +101,16 @@ void Vic::Execute() { const u32 block_height = static_cast<u32>(config.block_linear_height_log2); const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, block_height, 0); - std::vector<u8> swizzled_data(size); + luma_buffer.resize(size); Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, - frame->width, 4, swizzled_data.data(), + frame->width, 4, luma_buffer.data(), converted_frame_buffer.get(), block_height, 0, 0); - gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); - gpu.Maxwell3D().OnMemoryWrite(); + gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); } else { // send pitch linear frame gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, linear_size); - gpu.Maxwell3D().OnMemoryWrite(); } break; } @@ -134,15 +129,15 @@ void Vic::Execute() { const auto stride = frame->linesize[0]; const auto half_stride = frame->linesize[1]; - std::vector<u8> luma_buffer(aligned_width * surface_height); - std::vector<u8> chroma_buffer(aligned_width * half_height); + luma_buffer.resize(aligned_width * surface_height); + chroma_buffer.resize(aligned_width * half_height); // Populate luma buffer for (std::size_t y = 0; y < surface_height - 1; ++y) { - std::size_t src = y * stride; - std::size_t dst = y * aligned_width; + const std::size_t src = y * stride; + const std::size_t dst = y * aligned_width; - std::size_t size = surface_width; + const std::size_t size = surface_width; for (std::size_t offset = 0; offset < size; ++offset) { luma_buffer[dst + offset] = luma_ptr[src + offset]; @@ -153,8 +148,8 @@ void Vic::Execute() { // Populate chroma buffer from both channels with interleaving. for (std::size_t y = 0; y < half_height; ++y) { - std::size_t src = y * half_stride; - std::size_t dst = y * aligned_width; + const std::size_t src = y * half_stride; + const std::size_t dst = y * aligned_width; for (std::size_t x = 0; x < half_width; ++x) { chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; @@ -163,7 +158,6 @@ void Vic::Execute() { } gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), chroma_buffer.size()); - gpu.Maxwell3D().OnMemoryWrite(); break; } default: diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index 8c4e284a1b..f5a2ed100a 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h @@ -15,43 +15,6 @@ namespace Tegra { class GPU; class Nvdec; -struct PlaneOffsets { - u32 luma_offset{}; - u32 chroma_u_offset{}; - u32 chroma_v_offset{}; -}; - -struct VicRegisters { - INSERT_PADDING_WORDS(64); - u32 nop{}; - INSERT_PADDING_WORDS(15); - u32 pm_trigger{}; - INSERT_PADDING_WORDS(47); - u32 set_application_id{}; - u32 set_watchdog_timer{}; - INSERT_PADDING_WORDS(17); - u32 context_save_area{}; - u32 context_switch{}; - INSERT_PADDING_WORDS(43); - u32 execute{}; - INSERT_PADDING_WORDS(63); - std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{}; - u32 picture_index{}; - u32 control_params{}; - u32 config_struct_offset{}; - u32 filter_struct_offset{}; - u32 palette_offset{}; - u32 hist_offset{}; - u32 context_id{}; - u32 fce_ucode_size{}; - PlaneOffsets output_surface{}; - u32 fce_ucode_offset{}; - INSERT_PADDING_WORDS(4); - std::array<u32, 8> slot_context_id{}; - INSERT_PADDING_WORDS(16); -}; -static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size"); - class Vic { public: enum class Method : u32 { @@ -67,14 +30,11 @@ public: ~Vic(); /// Write to the device state. - void ProcessMethod(Method method, const std::vector<u32>& arguments); + void ProcessMethod(Method method, u32 argument); private: void Execute(); - void VicStateWrite(u32 offset, u32 arguments); - VicRegisters vic_state{}; - enum class VideoPixelFormat : u64_le { RGBA8 = 0x1f, BGRA8 = 0x20, @@ -88,8 +48,6 @@ private: BitField<9, 2, u64_le> chroma_loc_vert; BitField<11, 4, u64_le> block_linear_kind; BitField<15, 4, u64_le> block_linear_height_log2; - BitField<19, 3, u64_le> reserved0; - BitField<22, 10, u64_le> reserved1; BitField<32, 14, u64_le> surface_width_minus1; BitField<46, 14, u64_le> surface_height_minus1; }; @@ -97,6 +55,13 @@ private: GPU& gpu; std::shared_ptr<Tegra::Nvdec> nvdec_processor; + /// Avoid reallocation of the following buffers every frame, as their + /// size does not change during a stream + using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; + AVMallocPtr converted_frame_buffer; + std::vector<u8> luma_buffer; + std::vector<u8> chroma_buffer; + GPUVAddr config_struct_address{}; GPUVAddr output_surface_luma_address{}; GPUVAddr output_surface_chroma_u_address{}; diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index b1eaac00cd..7149af2902 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -12,13 +12,30 @@ #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace VideoCommon::Dirty { - +namespace { using Tegra::Engines::Maxwell3D; -void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { +void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) { + static constexpr std::size_t num_array = 3; + for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) { + const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); + const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); + + FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); + FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); + } +} + +void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer); +} + +void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) { FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); +} +void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; @@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl FillBlock(table, OFF(zeta), NUM(zeta), flag); } } +} // Anonymous namespace + +void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { + SetupDirtyVertexBuffers(tables); + SetupIndexBuffer(tables); + SetupDirtyDescriptors(tables); + SetupDirtyRenderTargets(tables); +} } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 875527ddda..702688acec 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -30,6 +30,12 @@ enum : u8 { ColorBuffer7, ZetaBuffer, + VertexBuffers, + VertexBuffer0, + VertexBuffer31 = VertexBuffer0 + 31, + + IndexBuffer, + LastCommonEntry, }; @@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ FillBlock(tables[1], begin, num, index_b); } -void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); +void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); } // namespace VideoCommon::Dirty diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 2c8b200241..8b33c04aba 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() { MICROPROFILE_SCOPE(DispatchCalls); gpu.SyncGuestHost(); - // On entering GPU code, assume all memory may be touched by the ARM core. - gpu.Maxwell3D().OnMemoryWrite(); dma_pushbuffer_subindex = 0; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index a01d334add..0f640fdaeb 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -18,8 +18,8 @@ Fermi2D::Fermi2D() { Fermi2D::~Fermi2D() = default; -void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { - rasterizer = &rasterizer_; +void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; } void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0de3280a27..c808a577df 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -38,7 +38,7 @@ public: ~Fermi2D(); /// Binds a rasterizer to this engine. - void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ba387506ef..a9b75091e0 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage KeplerCompute::~KeplerCompute() = default; -void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { - rasterizer = &rasterizer_; +void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; } void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { @@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal case KEPLER_COMPUTE_REG_INDEX(data_upload): { upload_state.ProcessData(method_argument, is_last_call); if (is_last_call) { - system.GPU().Maxwell3D().OnMemoryWrite(); } break; } diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 9f0a7b76dc..7c40cba38c 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -46,7 +46,7 @@ public: ~KeplerCompute(); /// Binds a rasterizer to this engine. - void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); static constexpr std::size_t NumConstBuffers = 8; diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 9911140e93..5605511575 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call case KEPLERMEMORY_REG_INDEX(data): { upload_state.ProcessData(method_argument, is_last_call); if (is_last_call) { - system.GPU().Maxwell3D().OnMemoryWrite(); } break; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 116ad17226..75517a4f7e 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) Maxwell3D::~Maxwell3D() = default; -void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { - rasterizer = &rasterizer_; +void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; } void Maxwell3D::InitializeRegisterDefaults() { @@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume case MAXWELL3D_REG_INDEX(data_upload): upload_state.ProcessData(argument, is_last_call); if (is_last_call) { - OnMemoryWrite(); } return; case MAXWELL3D_REG_INDEX(fragment_barrier): @@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() { } } -void Maxwell3D::ProcessCBBind(std::size_t stage_index) { +void Maxwell3D::ProcessCBBind(size_t stage_index) { // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. - auto& shader = state.shader_stages[stage_index]; - auto& bind_data = regs.cb_bind[stage_index]; - - ASSERT(bind_data.index < Regs::MaxConstBuffers); - auto& buffer = shader.const_buffers[bind_data.index]; - + const auto& bind_data = regs.cb_bind[stage_index]; + auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index]; buffer.enabled = bind_data.valid.Value() != 0; buffer.address = regs.const_buffer.BufferAddress(); buffer.size = regs.const_buffer.cb_size; + + const bool is_enabled = bind_data.valid.Value() != 0; + const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0; + const u32 size = is_enabled ? regs.const_buffer.cb_size : 0; + rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); } void Maxwell3D::ProcessCBData(u32 value) { @@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() { const u32 id = cb_data_state.id; memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); - OnMemoryWrite(); cb_data_state.id = null_cb_data; cb_data_state.current = null_cb_data; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 002d1b3f97..ffed42a298 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -55,7 +55,7 @@ public: ~Maxwell3D(); /// Binds a rasterizer to this engine. - void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. @@ -1314,8 +1314,7 @@ public: GPUVAddr LimitAddress() const { return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | - limit_low) + - 1; + limit_low); } } vertex_array_limit[NumVertexArrays]; @@ -1403,6 +1402,7 @@ public: }; std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; + u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. }; @@ -1452,11 +1452,6 @@ public: return *rasterizer; } - /// Notify a memory write has happened. - void OnMemoryWrite() { - dirty.flags |= dirty.on_write_stores; - } - enum class MMEDrawMode : u32 { Undefined, Array, @@ -1478,7 +1473,6 @@ public: using Tables = std::array<Table, 2>; Flags flags; - Flags on_write_stores; Tables tables{}; } dirty; @@ -1541,7 +1535,7 @@ private: void FinishCBData(); /// Handles a write to the CB_BIND register. - void ProcessCBBind(std::size_t stage_index); + void ProcessCBBind(size_t stage_index); /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ba750748c9..a2f19559fc 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -60,9 +60,6 @@ void MaxwellDMA::Launch() { return; } - // All copies here update the main memory, so mark all rasterizer states as invalid. - system.GPU().Maxwell3D().OnMemoryWrite(); - if (is_src_pitch && is_dst_pitch) { CopyPitchToPitch(); } else { diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 3512283ff2..f055b61e94 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -143,22 +143,26 @@ private: } bool ShouldWait() const { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || query_cache.ShouldWaitAsyncFlushes(); } bool ShouldFlush() const { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || query_cache.HasUncommittedFlushes(); } void PopAsyncFlushes() { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.PopAsyncFlushes(); buffer_cache.PopAsyncFlushes(); query_cache.PopAsyncFlushes(); } void CommitAsyncFlushes() { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.CommitAsyncFlushes(); buffer_cache.CommitAsyncFlushes(); query_cache.CommitAsyncFlushes(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6ab06775fd..51c63af4ae 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -30,8 +30,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, - dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, - cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_}, + dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, fermi_2d{std::make_unique<Engines::Fermi2D>()}, kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, @@ -44,8 +43,8 @@ GPU::~GPU() = default; void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { renderer = std::move(renderer_); + rasterizer = renderer->ReadRasterizer(); - VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); memory_manager->BindRasterizer(rasterizer); maxwell_3d->BindRasterizer(rasterizer); fermi_2d->BindRasterizer(rasterizer); @@ -171,7 +170,7 @@ void GPU::TickWork() { const std::size_t size = request.size; flush_requests.pop_front(); flush_request_mutex.unlock(); - renderer->Rasterizer().FlushRegion(addr, size); + rasterizer->FlushRegion(addr, size); current_flush_fence.store(fence); flush_request_mutex.lock(); } @@ -193,11 +192,11 @@ u64 GPU::GetTicks() const { } void GPU::FlushCommands() { - renderer->Rasterizer().FlushCommands(); + rasterizer->FlushCommands(); } void GPU::SyncGuestHost() { - renderer->Rasterizer().SyncGuestHost(); + rasterizer->SyncGuestHost(); } enum class GpuSemaphoreOperation { @@ -494,8 +493,7 @@ void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { // TODO(ameerj): RE proper async nvdec operation // gpu_thread.SubmitCommandBuffer(std::move(entries)); - cdma_pusher->Push(std::move(entries)); - cdma_pusher->DispatchCalls(); + cdma_pusher->ProcessEntries(std::move(entries)); } void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b4ce6b1549..b2ee454964 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -366,6 +366,7 @@ protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; std::unique_ptr<VideoCore::RendererBase> renderer; + VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; private: diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 7e490bcc30..eb0e43c0cd 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } auto current_context = context.Acquire(); + VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); CommandDataContainer next; while (state.is_running) { @@ -47,18 +48,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, dma_pusher.DispatchCalls(); } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { // NVDEC - cdma_pusher.Push(std::move(command_list->entries)); - cdma_pusher.DispatchCalls(); + cdma_pusher.ProcessEntries(std::move(command_list->entries)); } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { - renderer.Rasterizer().ReleaseFences(); + rasterizer->ReleaseFences(); } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { - renderer.Rasterizer().FlushRegion(flush->addr, flush->size); + rasterizer->FlushRegion(flush->addr, flush->size); } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { - renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); + rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { return; } else { @@ -84,6 +84,7 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { + rasterizer = renderer.ReadRasterizer(); thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); } @@ -129,12 +130,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { - system.Renderer().Rasterizer().OnCPUWrite(addr, size); + rasterizer->OnCPUWrite(addr, size); } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important - system.Renderer().Rasterizer().OnCPUWrite(addr, size); + rasterizer->OnCPUWrite(addr, size); } void ThreadManager::WaitIdle() const { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2775629e7f..4cd951169d 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -27,6 +27,7 @@ class System; } // namespace Core namespace VideoCore { +class RasterizerInterface; class RendererBase; } // namespace VideoCore @@ -151,11 +152,12 @@ private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); - SynchState state; Core::System& system; - std::thread thread; - std::thread::id thread_id; const bool is_async; + VideoCore::RasterizerInterface* rasterizer = nullptr; + + SynchState state; + std::thread thread; }; } // namespace VideoCommon::GPUThread diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 28f2b8614d..970120acc5 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,7 +12,6 @@ set(SHADER_FILES vulkan_blit_depth_stencil.frag vulkan_present.frag vulkan_present.vert - vulkan_quad_array.comp vulkan_quad_indexed.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp deleted file mode 100644 index 212f4e9981..0000000000 --- a/src/video_core/host_shaders/vulkan_quad_array.comp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#version 460 core - -layout (local_size_x = 1024) in; - -layout (std430, set = 0, binding = 0) buffer OutputBuffer { - uint output_indexes[]; -}; - -layout (push_constant) uniform PushConstants { - uint first; -}; - -void main() { - uint primitive = gl_GlobalInvocationID.x; - if (primitive * 6 >= output_indexes.length()) { - return; - } - - const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); - for (uint vertex = 0; vertex < 6; ++vertex) { - uint index = first + primitive * 4 + quad_map[vertex]; - output_indexes[primitive * 6 + vertex] = index; - } -} diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index ad74d7af96..872291670c 100644 --- a/src/video_core/host_shaders/vulkan_uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp @@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { uint16_t output_indexes[]; }; +uint AssembleIndex(uint id) { + // Most primitive restart indices are 0xFF + // Hardcode this to 0xFF for now + uint index = uint(input_indexes[id]); + return index == 0xFF ? 0xFFFF : index; +} + void main() { uint id = gl_GlobalInvocationID.x; if (id < input_indexes.length()) { - output_indexes[id] = uint16_t(input_indexes[id]); + output_indexes[id] = uint16_t(AssembleIndex(id)); } } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index c841f3cd7b..4eb71efbdc 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,7 +6,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/hle/kernel/memory/page_table.h" +#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/process.h" #include "core/memory.h" #include "video_core/gpu.h" @@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_) MemoryManager::~MemoryManager() = default; -void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { - rasterizer = &rasterizer_; +void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; } GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b468a67de2..b3538d503d 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -72,7 +72,7 @@ public: ~MemoryManager(); /// Binds a renderer to the memory manager. - void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; @@ -157,6 +157,8 @@ private: using MapRange = std::pair<GPUVAddr, size_t>; std::vector<MapRange> map_ranges; + + std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue; }; } // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0cb0f387d4..50491b7586 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -7,6 +7,7 @@ #include <atomic> #include <functional> #include <optional> +#include <span> #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -49,6 +50,10 @@ public: /// Records a GPU query and caches it virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; + /// Signal an uniform buffer binding + virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, + u32 size) = 0; + /// Signal a GPU based semaphore as a fence virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 51dde8eb5c..320ee8d30d 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -37,15 +37,11 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); - /// Initialize the renderer - [[nodiscard]] virtual bool Init() = 0; - - /// Shutdown the renderer - virtual void ShutDown() = 0; - /// Finalize rendering the guest frame and draw into the presentation texture virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; + // Getter/setter functions: // ------------------------ @@ -57,14 +53,6 @@ public: return m_current_frame; } - [[nodiscard]] RasterizerInterface& Rasterizer() { - return *rasterizer; - } - - [[nodiscard]] const RasterizerInterface& Rasterizer() const { - return *rasterizer; - } - [[nodiscard]] Core::Frontend::GraphicsContext& Context() { return *context; } @@ -98,7 +86,6 @@ public: protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. - std::unique_ptr<RasterizerInterface> rasterizer; std::unique_ptr<Core::Frontend::GraphicsContext> context; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 5772cad879..6da3906a48 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,98 +2,208 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <memory> +#include <span> -#include <glad/glad.h> - -#include "common/assert.h" -#include "common/microprofile.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { +namespace { +struct BindlessSSBO { + GLuint64EXT address; + GLsizei length; + GLsizei padding; +}; +static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4); + +constexpr std::array PROGRAM_LUT{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; +} // Anonymous namespace + +Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} + +Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, + VAddr cpu_addr_, u64 size_bytes_) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { + buffer.Create(); + const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); + glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); + glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); + + if (runtime.has_unified_vertex_buffers) { + glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); + } +} -using Maxwell = Tegra::Engines::Maxwell3D::Regs; +void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { + glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), + static_cast<GLsizeiptr>(data.size_bytes()), data.data()); +} -MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); +void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept { + glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), + static_cast<GLsizeiptr>(data.size_bytes()), data.data()); +} -Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) - : BufferBlock{cpu_addr_, size_} { - gl_buffer.Create(); - glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); - if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { - glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); - glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); +void Buffer::MakeResident(GLenum access) noexcept { + // Abuse GLenum's order to exit early + // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE + if (access <= current_residency_access || buffer.handle == 0) { + return; + } + if (std::exchange(current_residency_access, access) != GL_NONE) { + // If the buffer is already resident, remove its residency before promoting it + glMakeNamedBufferNonResidentNV(buffer.handle); } + glMakeNamedBufferResidentNV(buffer.handle, access); } -Buffer::~Buffer() = default; - -void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { - glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(data_size), data); +BufferCacheRuntime::BufferCacheRuntime(const Device& device_) + : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, + use_assembly_shaders{device.UseAssemblyShaders()}, + has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, + stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { + GLint gl_max_attributes; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); + max_attributes = static_cast<u32>(gl_max_attributes); + for (auto& stage_uniforms : fast_uniforms) { + for (OGLBuffer& buffer : stage_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); + } + } + for (auto& stage_uniforms : copy_uniforms) { + for (OGLBuffer& buffer : stage_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); + } + } + for (OGLBuffer& buffer : copy_compute_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); + } } -void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); - const GLintptr gl_offset = static_cast<GLintptr>(offset); - if (read_buffer.handle == 0) { - read_buffer.Create(); - glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, - GL_STREAM_READ); +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies) { + for (const VideoCommon::BufferCopy& copy : copies) { + glCopyNamedBufferSubData( + src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), + static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); } - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); - glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); } -void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t copy_size) { - glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), - static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); +void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, + static_cast<GLsizeiptr>(size)); + } else { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); + index_buffer_offset = offset; + } } -OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, OGLStreamBuffer& stream_buffer_, - StateTracker& state_tracker) - : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { - if (!device.HasFastBufferSubData()) { +void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, + u32 stride) { + if (index >= max_attributes) { return; } - - static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); - glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); - for (const GLuint cbuf : cbufs) { - glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, + buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); + } else { + glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), + static_cast<GLsizei>(stride)); } } -OGLBufferCache::~OGLBufferCache() { - glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); +void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, + u32 offset, u32 size) { + if (use_assembly_shaders) { + GLuint handle; + if (offset != 0) { + handle = copy_uniforms[stage][binding_index].handle; + glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); + } else { + handle = buffer.Handle(); + } + glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, + static_cast<GLsizeiptr>(size)); + } else { + const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } } -std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(device, cpu_addr, size); +void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, + u32 size) { + if (use_assembly_shaders) { + GLuint handle; + if (offset != 0) { + handle = copy_compute_uniforms[binding_index].handle; + glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); + } else { + handle = buffer.Handle(); + } + glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, + static_cast<GLsizeiptr>(size)); + } else { + glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } } -OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { - return {0, 0, 0}; +void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, + u32 offset, u32 size, bool is_written) { + if (use_assembly_shaders) { + const BindlessSSBO ssbo{ + .address = buffer.HostGpuAddr() + offset, + .length = static_cast<GLsizei>(size), + .padding = 0, + }; + buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); + glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); + } else { + const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } } -OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, - std::size_t size) { - DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); - const GLuint cbuf = cbufs[cbuf_cursor++]; +void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, + u32 size, bool is_written) { + if (use_assembly_shaders) { + const BindlessSSBO ssbo{ + .address = buffer.HostGpuAddr() + offset, + .length = static_cast<GLsizei>(size), + .padding = 0, + }; + buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); + glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, + reinterpret_cast<const GLuint*>(&ssbo)); + } else if (size == 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } +} - glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {cbuf, 0, 0}; +void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, + u32 size) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 17ee90316c..d8b20a9af8 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -5,79 +5,157 @@ #pragma once #include <array> -#include <memory> +#include <span> +#include "common/alignment.h" #include "common/common_types.h" +#include "common/dynamic_library.h" #include "video_core/buffer_cache/buffer_cache.h" -#include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -namespace Core { -class System; -} - namespace OpenGL { -class Device; -class OGLStreamBuffer; -class RasterizerOpenGL; -class StateTracker; +class BufferCacheRuntime; -class Buffer : public VideoCommon::BufferBlock { +class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { public: - explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); - ~Buffer(); + explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, + u64 size_bytes); + explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); - void Upload(std::size_t offset, std::size_t data_size, const u8* data); + void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; - void Download(std::size_t offset, std::size_t data_size, u8* data); + void ImmediateDownload(size_t offset, std::span<u8> data) noexcept; - void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t copy_size); + void MakeResident(GLenum access) noexcept; - GLuint Handle() const noexcept { - return gl_buffer.handle; + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { + return address; } - u64 Address() const noexcept { - return gpu_address; + [[nodiscard]] GLuint Handle() const noexcept { + return buffer.handle; } private: - OGLBuffer gl_buffer; - OGLBuffer read_buffer; - u64 gpu_address = 0; + GLuint64EXT address = 0; + OGLBuffer buffer; + GLenum current_residency_access = GL_NONE; }; -using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; -class OGLBufferCache final : public GenericBufferCache { +class BufferCacheRuntime { + friend Buffer; + public: - explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, OGLStreamBuffer& stream_buffer, - StateTracker& state_tracker); - ~OGLBufferCache(); + static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); + + explicit BufferCacheRuntime(const Device& device_); + + void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, + std::span<const VideoCommon::BufferCopy> copies); + + void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); + + void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + + void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); + + void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); + + void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, + bool is_written); + + void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, + bool is_written); + + void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + if (use_assembly_shaders) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); + glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); + } else { + const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_UNIFORM_BUFFER, binding, + fast_uniforms[stage][binding_index].handle, 0, + static_cast<GLsizeiptr>(size)); + } + } - BufferInfo GetEmptyBuffer(std::size_t) override; + void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) { + if (use_assembly_shaders) { + glProgramBufferParametersIuivNV( + PABO_LUT[stage], binding_index, 0, + static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)), + reinterpret_cast<const GLuint*>(data.data())); + } else { + glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0, + static_cast<GLsizeiptr>(data.size_bytes()), data.data()); + } + } - void Acquire() noexcept { - cbuf_cursor = 0; + std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { + const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); + const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + return mapped_span; } -protected: - std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; + [[nodiscard]] const GLvoid* IndexOffset() const noexcept { + return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset)); + } - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; + [[nodiscard]] bool HasFastBufferSubData() const noexcept { + return has_fast_buffer_sub_data; + } private: - static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + static constexpr std::array PABO_LUT{ + GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, + GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, + GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, + }; const Device& device; - std::size_t cbuf_cursor = 0; - std::array<GLuint, NUM_CBUFS> cbufs{}; + bool has_fast_buffer_sub_data = false; + bool use_assembly_shaders = false; + bool has_unified_vertex_buffers = false; + + u32 max_attributes = 0; + + std::optional<StreamBuffer> stream_buffer; + + std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, + VideoCommon::NUM_STAGES> + fast_uniforms; + std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, + VideoCommon::NUM_STAGES> + copy_uniforms; + std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; + + u32 index_buffer_offset = 0; +}; + +struct BufferCacheParams { + using Runtime = OpenGL::BufferCacheRuntime; + using Buffer = OpenGL::Buffer; + + static constexpr bool IS_OPENGL = true; + static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; + static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; + static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; + static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool USE_MEMORY_MAPS = false; }; +using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 04c267ee4b..1ae5f1d624 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -21,9 +21,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { - namespace { - // One uniform block is reserved for emulation purposes constexpr u32 ReservedUniformBlocks = 1; @@ -197,11 +195,13 @@ bool IsASTCSupported() { const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); } - } // Anonymous namespace -Device::Device() - : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { +Device::Device() { + if (!GLAD_GL_VERSION_4_6) { + LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); + throw std::runtime_error{"Insufficient version"}; + } const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); @@ -217,6 +217,9 @@ Device::Device() "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } + + max_uniform_buffers = BuildMaxUniformBuffers(); + base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); @@ -236,6 +239,7 @@ Device::Device() has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); + has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" @@ -272,6 +276,7 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_texture_shadow_lod = true; has_variable_aoffi = true; + has_depth_buffer_float = true; } bool Device::TestVariableAoffi() { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9141de6352..f24bd0c7b7 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -10,11 +10,9 @@ namespace OpenGL { -static constexpr u32 EmulationUniformBlockBinding = 0; - -class Device final { +class Device { public: - struct BaseBindings final { + struct BaseBindings { u32 uniform_buffer{}; u32 shader_storage_buffer{}; u32 sampler{}; @@ -124,6 +122,10 @@ public: return use_driver_cache; } + bool HasDepthBufferFloat() const { + return has_depth_buffer_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -152,6 +154,7 @@ private: bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; bool use_driver_cache{}; + bool has_depth_buffer_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 3e9c922f53..1512901012 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -47,7 +47,7 @@ void GLInnerFence::Wait() { FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, TextureCache& texture_cache_, - OGLBufferCache& buffer_cache_, QueryCache& query_cache_) + BufferCache& buffer_cache_, QueryCache& query_cache_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 30dbee6130..e714aa1156 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -32,14 +32,13 @@ private: }; using Fence = std::shared_ptr<GLInnerFence>; -using GenericFenceManager = - VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; +using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; class FenceManagerOpenGL final : public GenericFenceManager { public: - explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, - QueryCache& query_cache_); + explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + TextureCache& texture_cache, BufferCache& buffer_cache, + QueryCache& query_cache); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8aa63d3291..4610fd1604 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -38,37 +38,21 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using GLvec4 = std::array<GLfloat, 4>; using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; -MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); -MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { -constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; -constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = - NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; -constexpr size_t TOTAL_CONST_BUFFER_BYTES = - NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; - -constexpr size_t MAX_TEXTURES = 192; -constexpr size_t MAX_IMAGES = 48; struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { @@ -104,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } -std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry) { - if (!entry.IsIndirect()) { - return entry.GetSize(); - } - if (buffer.size > Maxwell::MaxConstBufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, - Maxwell::MaxConstBufferSize); - return Maxwell::MaxConstBufferSize; - } - - return buffer.size; -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -150,14 +120,6 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } -void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { - if (num_ssbos == 0) { - return; - } - glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), - reinterpret_cast<const GLuint*>(ssbos)); -} - ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { if (entry.is_buffer) { return ImageViewType::Buffer; @@ -204,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), - stream_buffer(device, state_tracker), texture_cache_runtime(device, program_manager, state_tracker), texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), + buffer_cache_runtime(device), + buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), - buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), async_shaders(emu_window_) { - unified_uniform_buffer.Create(); - glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); - - if (device.UseAssemblyShaders()) { - glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); - for (const GLuint cbuf : staging_cbufs) { - glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), - nullptr, 0); - } - } if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); } } -RasterizerOpenGL::~RasterizerOpenGL() { - if (device.UseAssemblyShaders()) { - glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); - } -} +RasterizerOpenGL::~RasterizerOpenGL() = default; -void RasterizerOpenGL::SetupVertexFormat() { +void RasterizerOpenGL::SyncVertexFormats() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { return; } flags[Dirty::VertexFormats] = false; - MICROPROFILE_SCOPE(OpenGL_VAO); - // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables // the first 16 vertex attributes always, as we don't know which ones are actually used until // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to @@ -277,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() { } } -void RasterizerOpenGL::SetupVertexBuffer() { - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::VertexBuffers]) { - return; - } - flags[Dirty::VertexBuffers] = false; - - MICROPROFILE_SCOPE(OpenGL_VB); - - const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); - - // Upload all guest vertex arrays sequentially to our buffer - const auto& regs = maxwell3d.regs; - for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { - if (!flags[Dirty::VertexBuffer0 + index]) { - continue; - } - flags[Dirty::VertexBuffer0 + index] = false; - - const auto& vertex_array = regs.vertex_array[index]; - if (!vertex_array.IsEnabled()) { - continue; - } - - const GPUVAddr start = vertex_array.StartAddress(); - const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end >= start); - - const GLuint gl_index = static_cast<GLuint>(index); - const u64 size = end - start; - if (size == 0) { - glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); - if (use_unified_memory) { - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); - } - continue; - } - const auto info = buffer_cache.UploadMemory(start, size); - if (use_unified_memory) { - glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, - info.address + info.offset, size); - } else { - glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); - } - } -} - -void RasterizerOpenGL::SetupVertexInstances() { +void RasterizerOpenGL::SyncVertexInstances() { auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexInstances]) { return; @@ -346,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() { } } -GLintptr RasterizerOpenGL::SetupIndexBuffer() { - MICROPROFILE_SCOPE(OpenGL_Index); - const auto& regs = maxwell3d.regs; - const std::size_t size = CalculateIndexBufferSize(); - const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); - return info.offset; -} - -void RasterizerOpenGL::SetupShaders() { - MICROPROFILE_SCOPE(OpenGL_Shader); +void RasterizerOpenGL::SetupShaders(bool is_indexed) { u32 clip_distances = 0; std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; @@ -413,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() { const size_t stage = index == 0 ? 0 : index - 1; shaders[stage] = shader; - SetupDrawConstBuffers(stage, shader); - SetupDrawGlobalMemory(stage, shader); SetupDrawTextures(shader, stage); SetupDrawImages(shader, stage); + buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); + + buffer_cache.UnbindGraphicsStorageBuffers(stage); + u32 ssbo_index = 0; + for (const auto& buffer : shader->GetEntries().global_memory_entries) { + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, + buffer.cbuf_offset, buffer.is_written); + ++ssbo_index; + } + // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -433,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() { SyncClipEnabled(clip_distances); maxwell3d.dirty.flags[Dirty::Shaders] = false; + buffer_cache.UpdateGraphicsBuffers(is_indexed); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.BindHostGeometryBuffers(is_indexed); + size_t image_view_index = 0; size_t texture_index = 0; size_t image_index = 0; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader* const shader = shaders[stage]; - if (shader) { - const auto base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } - } -} - -std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { - const auto& regs = maxwell3d.regs; - - std::size_t size = 0; - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (!regs.vertex_array[index].IsEnabled()) + if (!shader) { continue; - - const GPUVAddr start = regs.vertex_array[index].StartAddress(); - const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - - size += end - start; - ASSERT(end >= start); + } + buffer_cache.BindHostStageBuffers(stage); + const auto& base = device.GetBaseBindings(stage); + BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, + texture_index, image_index); } - - return size; -} - -std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { - return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * - static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, @@ -478,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s } void RasterizerOpenGL::Clear() { + MICROPROFILE_SCOPE(OpenGL_Clears); if (!maxwell3d.ShouldExecute()) { return; } @@ -528,11 +408,9 @@ void RasterizerOpenGL::Clear() { } UNIMPLEMENTED_IF(regs.clear_flags.viewport); - { - auto lock = texture_cache.AcquireLock(); - texture_cache.UpdateRenderTargets(true); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - } + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UpdateRenderTargets(true); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -544,7 +422,6 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } - ++num_queued_commands; } @@ -553,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - SyncViewport(); - SyncRasterizeEnable(); - SyncPolygonModes(); - SyncColorMask(); - SyncFragmentColorClampState(); - SyncMultiSampleState(); - SyncDepthTestState(); - SyncDepthClamp(); - SyncStencilTestState(); - SyncBlendState(); - SyncLogicOpState(); - SyncCullMode(); - SyncPrimitiveRestart(); - SyncScissorTest(); - SyncPointState(); - SyncLineState(); - SyncPolygonOffset(); - SyncAlphaTest(); - SyncFramebufferSRGB(); - - buffer_cache.Acquire(); - current_cbuf = 0; - - std::size_t buffer_size = CalculateVertexArraysSize(); - - // Add space for index buffer - if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); - } - - // Uniform space for the 5 shader stages - buffer_size = - Common::AlignUp<std::size_t>(buffer_size, 4) + - (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; - - // Add space for at least 18 constant buffers - buffer_size += Maxwell::MaxConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); - - // Prepare the vertex array. - buffer_cache.Map(buffer_size); - - // Prepare vertex array format. - SetupVertexFormat(); - - // Upload vertex and index data. - SetupVertexBuffer(); - SetupVertexInstances(); - GLintptr index_buffer_offset = 0; - if (is_indexed) { - index_buffer_offset = SetupIndexBuffer(); - } - - // Setup emulation uniform buffer. - if (!device.UseAssemblyShaders()) { - MaxwellUniformData ubo; - ubo.SetFromRegs(maxwell3d); - const auto info = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, - static_cast<GLsizeiptr>(sizeof(ubo))); - } + SyncState(); // Setup shaders and their used resources. - auto lock = texture_cache.AcquireLock(); - SetupShaders(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + SetupShaders(is_indexed); - // Signal the buffer cache that we are not going to upload more things. - buffer_cache.Unmap(); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); program_manager.BindGraphicsPipeline(); @@ -635,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (is_indexed) { const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); - const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); + const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { glDrawElements(primitive_mode, num_vertices, format, offset); @@ -675,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - buffer_cache.Acquire(); - current_cbuf = 0; - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; BindComputeTextures(kernel); - const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); - buffer_cache.Map(buffer_size); - - SetupComputeConstBuffers(kernel); - SetupComputeGlobalMemory(kernel); - - buffer_cache.Unmap(); + const auto& entries = kernel->GetEntries(); + buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); + buffer_cache.UnbindComputeStorageBuffers(); + u32 ssbo_index = 0; + for (const auto& buffer : entries.global_memory_entries) { + buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, + buffer.is_written); + ++ssbo_index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); const auto& launch_desc = kepler_compute.launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); @@ -706,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, query_cache.Query(gpu_addr, type, timestamp); } +void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, + u32 size) { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); +} + void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { @@ -714,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { return; } { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - buffer_cache.FlushRegion(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.DownloadMemory(addr, size); + } query_cache.FlushRegion(addr, size); } bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; if (!Settings::IsGPULevelHigh()) { - return buffer_cache.MustFlushRegion(addr, size); + return buffer_cache.IsRegionGpuModified(addr, size); } return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.MustFlushRegion(addr, size); + buffer_cache.IsRegionGpuModified(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { @@ -735,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { return; } { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.WriteMemory(addr, size); + } shader_cache.InvalidateRegion(addr, size); - buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); } @@ -748,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } + shader_cache.OnCPUWrite(addr, size); { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - shader_cache.OnCPUWrite(addr, size); - buffer_cache.OnCPUWrite(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.CachedWriteMemory(addr, size); + } } void RasterizerOpenGL::SyncGuestHost() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - buffer_cache.SyncGuestHost(); shader_cache.SyncGuestHost(); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.FlushCachedWrites(); + } } void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.UnmapMemory(addr, size); } - buffer_cache.OnCPUWrite(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.WriteMemory(addr, size); + } shader_cache.OnCPUWrite(addr, size); } @@ -802,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { } void RasterizerOpenGL::WaitForIdle() { - // Place a barrier on everything that is not framebuffer related. - // This is related to another flag that is not currently implemented. - glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | - GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | - GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | - GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | - GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT | - GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); + glMemoryBarrier(GL_ALL_BARRIER_BITS); } void RasterizerOpenGL::FragmentBarrier() { @@ -834,18 +663,21 @@ void RasterizerOpenGL::TickFrame() { num_queued_commands = 0; fence_manager.TickFrame(); - buffer_cache.TickFrame(); { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.TickFrame(); } + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.TickFrame(); + } } bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.BlitImage(dst, src, copy_config); return true; } @@ -857,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; if (!image_view) { return false; @@ -924,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te } } -void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { - static constexpr std::array PARAMETER_LUT{ - GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, - GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, - GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, - }; - MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& stages = maxwell3d.state.shader_stages; - const auto& shader_stage = stages[stage_index]; - const auto& entries = shader->GetEntries(); - const bool use_unified = entries.use_unified_uniforms; - const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; - - const auto base_bindings = device.GetBaseBindings(stage_index); - u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; - for (const auto& entry : entries.const_buffers) { - const u32 index = entry.GetIndex(); - const auto& buffer = shader_stage.const_buffers[index]; - SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, - base_unified_offset + index * Maxwell::MaxConstBufferSize); - ++binding; - } - if (use_unified) { - const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + - entries.global_memory_entries.size()); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, - base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); - } -} - -void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { - MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& launch_desc = kepler_compute.launch_description; - const auto& entries = kernel->GetEntries(); - const bool use_unified = entries.use_unified_uniforms; - - u32 binding = 0; - for (const auto& entry : entries.const_buffers) { - const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; - const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); - Tegra::Engines::ConstBufferInfo buffer; - buffer.address = config.Address(); - buffer.size = config.size; - buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, - use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); - ++binding; - } - if (use_unified) { - const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, - NUM_CONST_BUFFERS_BYTES_PER_STAGE); - } -} - -void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, - const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry, bool use_unified, - std::size_t unified_offset) { - if (!buffer.enabled) { - // Set values to zero to unbind buffers - if (device.UseAssemblyShaders()) { - glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); - } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); - } - return; - } - - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 - // UBO alignment requirements. - const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); - - const bool fast_upload = !use_unified && device.HasFastBufferSubData(); - - const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); - const GPUVAddr gpu_addr = buffer.address; - auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); - - if (device.UseAssemblyShaders()) { - UNIMPLEMENTED_IF(use_unified); - if (info.offset != 0) { - const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); - info.handle = staging_cbuf; - info.offset = 0; - } - glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); - return; - } - - if (use_unified) { - glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, - unified_offset, size); - } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); - } -} - -void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { - static constexpr std::array TARGET_LUT = { - GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, - GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, - }; - const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; - const auto& entries{shader->GetEntries().global_memory_entries}; - - std::array<BindlessSSBO, 32> ssbos; - ASSERT(entries.size() < ssbos.size()); - - const bool assembly_shaders = device.UseAssemblyShaders(); - u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : entries) { - const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; - const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; - const u32 size{gpu_memory.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); - ++binding; - } - if (assembly_shaders) { - UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); - } -} - -void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { - const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; - const auto& entries{kernel->GetEntries().global_memory_entries}; - - std::array<BindlessSSBO, 32> ssbos; - ASSERT(entries.size() < ssbos.size()); - - u32 binding = 0; - for (const auto& entry : entries) { - const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; - const u32 size{gpu_memory.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); - ++binding; - } - if (device.UseAssemblyShaders()) { - UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); - } -} - -void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { - const size_t alignment{device.GetShaderStorageBufferAlignment()}; - const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - if (device.UseAssemblyShaders()) { - *ssbo = BindlessSSBO{ - .address = static_cast<GLuint64EXT>(info.address + info.offset), - .length = static_cast<GLsizei>(size), - .padding = 0, - }; - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, - static_cast<GLsizeiptr>(size)); - } -} - void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { const bool via_header_index = maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; @@ -1131,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { } } +void RasterizerOpenGL::SyncState() { + SyncViewport(); + SyncRasterizeEnable(); + SyncPolygonModes(); + SyncColorMask(); + SyncFragmentColorClampState(); + SyncMultiSampleState(); + SyncDepthTestState(); + SyncDepthClamp(); + SyncStencilTestState(); + SyncBlendState(); + SyncLogicOpState(); + SyncCullMode(); + SyncPrimitiveRestart(); + SyncScissorTest(); + SyncPointState(); + SyncLineState(); + SyncPolygonOffset(); + SyncAlphaTest(); + SyncFramebufferSRGB(); + SyncVertexFormats(); + SyncVertexInstances(); +} + void RasterizerOpenGL::SyncViewport() { auto& flags = maxwell3d.dirty.flags; const auto& regs = maxwell3d.regs; @@ -1166,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() { if (regs.screen_y_control.y_negate != 0) { flip_y = !flip_y; } - glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, - regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE - : GL_NEGATIVE_ONE_TO_ONE); + const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne; + const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; + const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE; + state_tracker.ClipControl(origin, depth); + state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); } if (dirty_viewport) { @@ -1191,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() { const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; const GLdouble far_depth = src.translate_z + src.scale_z; - glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + if (device.HasDepthBufferFloat()) { + glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth); + } else { + glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + } if (!GLAD_GL_NV_viewport_swizzle) { continue; @@ -1652,36 +1354,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { SyncTransformFeedback(); } - UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); - - for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { - const auto& binding = regs.tfb_bindings[index]; - if (!binding.buffer_enable) { - if (enabled_transform_feedback_buffers[index]) { - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, - 0); - } - enabled_transform_feedback_buffers[index] = false; - continue; - } - enabled_transform_feedback_buffers[index] = true; - - auto& tfb_buffer = transform_feedback_buffers[index]; - tfb_buffer.Create(); - - const GLuint handle = tfb_buffer.handle; - const std::size_t size = binding.buffer_size; - glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, - static_cast<GLsizeiptr>(size)); - } + UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); // We may have to call BeginTransformFeedbackNV here since they seem to call different // implementations on Nvidia's driver (the pointer is different) but we are using @@ -1695,23 +1374,7 @@ void RasterizerOpenGL::EndTransformFeedback() { if (regs.tfb_enabled == 0) { return; } - glEndTransformFeedback(); - - for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { - const auto& binding = regs.tfb_bindings[index]; - if (!binding.buffer_enable) { - continue; - } - UNIMPLEMENTED_IF(binding.buffer_offset != 0); - - const GLuint handle = transform_feedback_buffers[index].handle; - const GPUVAddr gpu_addr = binding.Address(); - const std::size_t size = binding.buffer_size; - const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, - static_cast<GLsizeiptr>(size)); - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 82e03e6773..3745cf6379 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -30,7 +30,6 @@ #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" @@ -72,6 +71,7 @@ public: void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; + void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size) override; @@ -119,27 +119,6 @@ private: void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, size_t& image_view_index, size_t& texture_index, size_t& image_index); - /// Configures the current constbuffers to use for the draw command. - void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); - - /// Configures the current constbuffers to use for the kernel invocation. - void SetupComputeConstBuffers(Shader* kernel); - - /// Configures a constant buffer. - void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const ConstBufferEntry& entry, bool use_unified, - std::size_t unified_offset); - - /// Configures the current global memory entries to use for the draw command. - void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); - - /// Configures the current global memory entries to use for the kernel invocation. - void SetupComputeGlobalMemory(Shader* kernel); - - /// Configures a global memory buffer. - void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - size_t size, BindlessSSBO* ssbo); - /// Configures the current textures to use for the draw command. void SetupDrawTextures(const Shader* shader, size_t stage_index); @@ -152,6 +131,9 @@ private: /// Configures images in a compute shader. void SetupComputeImages(const Shader* shader); + /// Syncs state to match guest's + void SyncState(); + /// Syncs the viewport and depth range to match the guest state void SyncViewport(); @@ -215,6 +197,12 @@ private: /// Syncs the framebuffer sRGB state to match the guest state void SyncFramebufferSRGB(); + /// Syncs vertex formats to match the guest state + void SyncVertexFormats(); + + /// Syncs vertex instances to match the guest state + void SyncVertexInstances(); + /// Syncs transform feedback state to match guest state /// @note Only valid on assembly shaders void SyncTransformFeedback(); @@ -225,19 +213,7 @@ private: /// End a transform feedback void EndTransformFeedback(); - std::size_t CalculateVertexArraysSize() const; - - std::size_t CalculateIndexBufferSize() const; - - /// Updates the current vertex format - void SetupVertexFormat(); - - void SetupVertexBuffer(); - void SetupVertexInstances(); - - GLintptr SetupIndexBuffer(); - - void SetupShaders(); + void SetupShaders(bool is_indexed); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -249,12 +225,12 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - OGLStreamBuffer stream_buffer; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; + BufferCacheRuntime buffer_cache_runtime; + BufferCache buffer_cache; ShaderCacheOpenGL shader_cache; QueryCache query_cache; - OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; VideoCommon::Shader::AsyncShaders async_shaders; @@ -262,20 +238,8 @@ private: boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; - std::array<GLuint, MAX_TEXTURES> texture_handles; - std::array<GLuint, MAX_IMAGES> image_handles; - - std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> - transform_feedback_buffers; - std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> - enabled_transform_feedback_buffers; - - static constexpr std::size_t NUM_CONSTANT_BUFFERS = - Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; - std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; - std::size_t current_cbuf = 0; - OGLBuffer unified_uniform_buffer; + std::array<GLuint, MAX_TEXTURES> texture_handles{}; + std::array<GLuint, MAX_IMAGES> image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. std::size_t num_queued_commands = 0; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0e34a0f203..3428e5e214 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -171,12 +171,6 @@ void OGLBuffer::Release() { handle = 0; } -void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { - ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); - - glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); -} - void OGLSync::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f483986697..552d79db47 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -234,9 +234,6 @@ public: /// Deletes the internal OpenGL resource void Release(); - // Converts the buffer into a stream copy buffer with a fixed size - void MakeStreamCopy(std::size_t buffer_size); - GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 529570ff0c..5cf7cd1512 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop const VideoCore::DiskResourceLoadCallback& callback) { disk_cache.BindTitleID(title_id); const std::optional transferable = disk_cache.LoadTransferable(); + + LOG_INFO(Render_OpenGL, "Total Shader Count: {}", + transferable.has_value() ? transferable->size() : 0); + if (!transferable) { return; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c35b71b6b6..ac78d344ca 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); -constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt +constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt #define ftou floatBitsToUint #define itof intBitsToFloat #define utof uintBitsToFloat @@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); - -layout (std140, binding = {}) uniform vs_config {{ - float y_direction; -}}; )"; class ShaderWriter final { @@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } -bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { - const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); - // We waste one UBO for emulation - const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; - return num_ubos > num_available_ubos; -} - struct GenericVaryingDescription { std::string name; u8 first_element = 0; @@ -420,9 +409,8 @@ public: explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, ShaderType stage_, std::string_view identifier_, std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, - suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ - UseUnifiedUniforms(device_, ir_, stage_)} { + : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, + identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { if (stage != ShaderType::Compute) { transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); } @@ -516,7 +504,8 @@ private: if (!identifier.empty()) { code.AddLine("// {}", identifier); } - code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); + const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); + code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); if (device.HasShaderBallot()) { code.AddLine("#extension GL_ARB_shader_ballot : require"); @@ -542,7 +531,7 @@ private: code.AddNewLine(); - code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); + code.AddLine(COMMON_DECLARATIONS); } void DeclareVertex() { @@ -865,17 +854,6 @@ private: } void DeclareConstantBuffers() { - if (use_unified_uniforms) { - const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + - static_cast<u32>(ir.GetGlobalMemory().size()); - code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", - binding); - code.AddLine(" uint cbufs[];"); - code.AddLine("}};"); - code.AddNewLine(); - return; - } - u32 binding = device.GetBaseBindings(stage).uniform_buffer; for (const auto& [index, info] : ir.GetConstantBuffers()) { const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); @@ -1081,29 +1059,17 @@ private: if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); - const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - if (use_unified_uniforms) { - return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), - Type::Uint}; - } else { - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - } - - // Indirect access - if (use_unified_uniforms) { - return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, - Visit(offset).AsUint()), + return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4), Type::Uint}; } + // Indirect access const std::string final_offset = code.GenerateTemporary(); code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); @@ -2293,7 +2259,6 @@ private: } } } - if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. @@ -2337,7 +2302,8 @@ private: } Expression YNegate(Operation operation) { - return {"y_direction", Type::Float}; + // Y_NEGATE is mapped to this uniform value + return {"gl_FrontMaterial.ambient.a", Type::Float}; } template <u32 element> @@ -2787,7 +2753,6 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; - const bool use_unified_uniforms; std::unordered_map<u8, VaryingTFB> transform_feedback; ShaderWriter code; @@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s for (std::size_t i = 0; i < std::size(clip_distances); ++i) { entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; } + for (const auto& buffer : entries.const_buffers) { + entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); + } entries.shader_length = ir.GetLength(); - entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); return entries; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index be68994bb2..0397a000c2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -55,7 +55,7 @@ struct ShaderEntries { std::vector<ImageEntry> images; std::size_t shader_length{}; u32 clip_distances{}; - bool use_unified_uniforms{}; + u32 enabled_uniform_buffers{}; }; ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 60e6fa39f9..dbdf5230fa 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) { FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); } -void SetupDirtyVertexArrays(Tables& tables) { - static constexpr std::size_t num_array = 3; +void SetupDirtyVertexInstances(Tables& tables) { static constexpr std::size_t instance_base_offset = 3; for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); - const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); - - FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); - FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); - const std::size_t instance_array_offset = array_offset + instance_base_offset; tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); tables[1][instance_array_offset] = VertexInstances; @@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) { StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { auto& dirty = gpu.Maxwell3D().dirty; auto& tables = dirty.tables; - SetupDirtyRenderTargets(tables); + SetupDirtyFlags(tables); SetupDirtyColorMasks(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); - SetupDirtyVertexArrays(tables); + SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); @@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyClipControl(tables); SetupDirtyDepthClampEnabled(tables); SetupDirtyMisc(tables); - - auto& store = dirty.on_write_stores; - store[VertexBuffers] = true; - for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { - store[VertexBuffer0 + i] = true; - } -} - -void StateTracker::InvalidateStreamBuffer() { - flags[Dirty::VertexBuffers] = true; - for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { - flags[index] = true; - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 574615d3c4..94c9051160 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -28,10 +28,6 @@ enum : u8 { VertexFormat0, VertexFormat31 = VertexFormat0 + 31, - VertexBuffers, - VertexBuffer0, - VertexBuffer31 = VertexBuffer0 + 31, - VertexInstances, VertexInstance0, VertexInstance31 = VertexInstance0 + 31, @@ -92,8 +88,6 @@ class StateTracker { public: explicit StateTracker(Tegra::GPU& gpu); - void InvalidateStreamBuffer(); - void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { return; @@ -110,13 +104,32 @@ public: glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); } + void ClipControl(GLenum new_origin, GLenum new_depth) { + if (new_origin == origin && new_depth == depth) { + return; + } + origin = new_origin; + depth = new_depth; + glClipControl(origin, depth); + } + + void SetYNegate(bool new_y_negate) { + if (new_y_negate == y_negate) { + return; + } + // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a + y_negate = new_y_negate; + const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f}; + glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data()); + } + void NotifyScreenDrawVertexArray() { flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; flags[OpenGL::Dirty::VertexFormat0 + 1] = true; - flags[OpenGL::Dirty::VertexBuffers] = true; - flags[OpenGL::Dirty::VertexBuffer0] = true; + flags[VideoCommon::Dirty::VertexBuffers] = true; + flags[VideoCommon::Dirty::VertexBuffer0] = true; flags[OpenGL::Dirty::VertexInstances] = true; flags[OpenGL::Dirty::VertexInstance0 + 0] = true; @@ -202,6 +215,9 @@ private: GLuint framebuffer = 0; GLuint index_buffer = 0; + GLenum origin = GL_LOWER_LEFT; + GLenum depth = GL_NEGATIVE_ONE_TO_ONE; + bool y_negate = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index e0819cdf27..77b3ee0fe6 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -1,70 +1,64 @@ -// Copyright 2018 Citra Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <tuple> -#include <vector> +#include <array> +#include <memory> +#include <span> + +#include <glad/glad.h> #include "common/alignment.h" #include "common/assert.h" -#include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", - MP_RGB(128, 128, 192)); - namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) - : state_tracker{state_tracker_} { - gl_buffer.Create(); - - static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; - glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); - mapped_ptr = static_cast<u8*>( - glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); - - if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { - glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); +StreamBuffer::StreamBuffer() { + static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + buffer.Create(); + glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); + glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); + mapped_pointer = + static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); + for (OGLSync& sync : fences) { + sync.Create(); } } -OGLStreamBuffer::~OGLStreamBuffer() { - glUnmapNamedBuffer(gl_buffer.handle); - gl_buffer.Release(); -} - -std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= BUFFER_SIZE); - ASSERT(alignment <= BUFFER_SIZE); - mapped_size = size; - - if (alignment > 0) { - buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); +std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { + ASSERT(size < REGION_SIZE); + for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; + ++region) { + fences[region].Create(); } + used_iterator = iterator; - if (buffer_pos + size > BUFFER_SIZE) { - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - glInvalidateBufferData(gl_buffer.handle); - state_tracker.InvalidateStreamBuffer(); - - buffer_pos = 0; + for (size_t region = Region(free_iterator) + 1, + region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region < region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); } - - return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); -} - -void OGLStreamBuffer::Unmap(GLsizeiptr size) { - ASSERT(size <= mapped_size); - - if (size > 0) { - glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); + if (iterator + size >= free_iterator) { + free_iterator = iterator + size; } - - buffer_pos += size; + if (iterator + size > STREAM_BUFFER_SIZE) { + for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { + fences[region].Create(); + } + used_iterator = 0; + iterator = 0; + free_iterator = size; + + for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { + glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); + fences[region].Release(); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return {std::span(mapped_pointer + offset, size), offset}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index dd9cf67ebb..6dbb6bfba0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -1,9 +1,12 @@ -// Copyright 2018 Citra Emulator Project +// Copyright 2021 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once +#include <array> +#include <memory> +#include <span> #include <utility> #include <glad/glad.h> @@ -13,48 +16,35 @@ namespace OpenGL { -class Device; -class StateTracker; +class StreamBuffer { + static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024; + static constexpr size_t NUM_SYNCS = 16; + static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS; + static constexpr size_t MAX_ALIGNMENT = 256; + static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0); + static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0); + static_assert(REGION_SIZE % MAX_ALIGNMENT == 0); -class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); - ~OGLStreamBuffer(); - - /* - * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes - * and the optional alignment requirement. - * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, and the offset within the buffer. - * The actual used size must be specified on unmapping the chunk. - */ - std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); - - void Unmap(GLsizeiptr size); - - GLuint Handle() const { - return gl_buffer.handle; - } + explicit StreamBuffer(); - u64 Address() const { - return gpu_address; - } + [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept; - GLsizeiptr Size() const noexcept { - return BUFFER_SIZE; + [[nodiscard]] GLuint Handle() const noexcept { + return buffer.handle; } private: - static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; - - StateTracker& state_tracker; - - OGLBuffer gl_buffer; + [[nodiscard]] static size_t Region(size_t offset) noexcept { + return offset / REGION_SIZE; + } - GLuint64EXT gpu_address = 0; - GLintptr buffer_pos = 0; - GLsizeiptr mapped_size = 0; - u8* mapped_ptr = nullptr; + size_t iterator = 0; + size_t used_iterator = 0; + size_t free_iterator = 0; + u8* mapped_pointer = nullptr; + OGLBuffer buffer; + std::array<OGLSync, NUM_SYNCS> fences; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 546cb6d002..12434db67f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } // Anonymous namespace -ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) - : span(map, size), sync{sync_}, handle{handle_} {} - ImageBufferMap::~ImageBufferMap() { if (sync) { sync->Create(); @@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() { glFinish(); } -ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { +ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { return upload_buffers.RequestMap(size, true); } -ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { +ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return download_buffers.RequestMap(size, false); } @@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, } void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, - size_t buffer_offset, std::span<const SwizzleParameters> swizzles) { switch (image.info.type) { case ImageType::e2D: - return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload2D(image, map, swizzles); case ImageType::e3D: - return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); + return util_shaders.BlockLinearUpload3D(image, map, swizzles); case ImageType::Linear: - return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); + return util_shaders.PitchUpload(image, map, swizzles); default: UNREACHABLE(); break; @@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_ bool insert_fence) { const size_t index = RequestBuffer(requested_size); OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; - return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); + return ImageBufferMap{ + .mapped_span = std::span(maps[index], requested_size), + .sync = sync, + .buffer = buffers[index].handle, + }; } size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { @@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); - glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); @@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); } - CopyBufferToImage(copy, buffer_offset); + CopyBufferToImage(copy, map.offset); } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies) { for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, + glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, copy.dst_offset, copy.size); } } -void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, +void Image::DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API - glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); + glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); glPixelStorei(GL_PACK_ALIGNMENT, 1); u32 current_row_length = std::numeric_limits<u32>::max(); @@ -759,7 +759,38 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, current_image_height = copy.buffer_image_height; glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); } - CopyImageToBuffer(copy, buffer_offset); + CopyImageToBuffer(copy, map.offset); + } +} + +GLuint Image::StorageHandle() noexcept { + switch (info.format) { + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_SRGB: + if (store_view.handle != 0) { + return store_view.handle; + } + store_view.Create(); + glTextureView(store_view.handle, ImageTarget(info), texture.handle, GL_RGBA8, 0, + info.resources.levels, 0, info.resources.layers); + return store_view.handle; + default: + return texture.handle; } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 15b7c36766..a6172f009e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -31,23 +31,13 @@ using VideoCommon::NUM_RT; using VideoCommon::Offset2D; using VideoCommon::RenderTargets; -class ImageBufferMap { -public: - explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); +struct ImageBufferMap { ~ImageBufferMap(); - GLuint Handle() const noexcept { - return handle; - } - - std::span<u8> Span() const noexcept { - return span; - } - -private: - std::span<u8> span; + std::span<u8> mapped_span; + size_t offset = 0; OGLSync* sync; - GLuint handle; + GLuint buffer; }; struct FormatProperties { @@ -69,9 +59,9 @@ public: void Finish(); - ImageBufferMap MapUploadBuffer(size_t size); + ImageBufferMap UploadStagingBuffer(size_t size); - ImageBufferMap MapDownloadBuffer(size_t size); + ImageBufferMap DownloadStagingBuffer(size_t size); void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); @@ -89,7 +79,7 @@ public: Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Operation operation); - void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void AccelerateImageUpload(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void InsertUploadMemoryBarrier(); @@ -148,14 +138,14 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span<const VideoCommon::BufferCopy> copies); + void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); + + void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); - void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, - std::span<const VideoCommon::BufferImageCopy> copies); + GLuint StorageHandle() noexcept; GLuint Handle() const noexcept { return texture.handle; @@ -167,8 +157,8 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLTextureView store_view; OGLBuffer buffer; + OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; GLenum gl_type = GL_NONE; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index cbccfdeb45..f7ad8f3708 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -4,23 +4,10 @@ #pragma once -#include <array> #include <glad/glad.h> -#include "common/common_types.h" -#include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" -namespace OpenGL { - -using GLvec2 = std::array<GLfloat, 2>; -using GLvec3 = std::array<GLfloat, 3>; -using GLvec4 = std::array<GLfloat, 4>; - -using GLuvec2 = std::array<GLuint, 2>; -using GLuvec3 = std::array<GLuint, 3>; -using GLuvec4 = std::array<GLuint, 4>; - -namespace MaxwellToGL { +namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { return GL_ZERO; } -inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { - switch (source) { - case Tegra::Texture::SwizzleSource::Zero: - return GL_ZERO; - case Tegra::Texture::SwizzleSource::R: - return GL_RED; - case Tegra::Texture::SwizzleSource::G: - return GL_GREEN; - case Tegra::Texture::SwizzleSource::B: - return GL_BLUE; - case Tegra::Texture::SwizzleSource::A: - return GL_ALPHA; - case Tegra::Texture::SwizzleSource::OneInt: - case Tegra::Texture::SwizzleSource::OneFloat: - return GL_ONE; - } - UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source); - return GL_ZERO; -} - inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); } -} // namespace MaxwellToGL -} // namespace OpenGL +} // namespace OpenGL::MaxwellToGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 21159e4981..9d2acd4d9b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -29,9 +29,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - constexpr GLint PositionLocation = 0; constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; @@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit break; } } - } // Anonymous namespace RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, @@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, - emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} + emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, + program_manager{device}, + rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { + if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { + glEnable(GL_DEBUG_OUTPUT); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + glDebugMessageCallback(DebugHandler, nullptr); + } + AddTelemetryFields(); + InitOpenGLObjects(); +} RendererOpenGL::~RendererOpenGL() = default; @@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { ++m_current_frame; - rasterizer->TickFrame(); + rasterizer.TickFrame(); context->SwapBuffers(); render_window.OnFrameDisplayed(); @@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf framebuffer_crop_rect = framebuffer.crop_rect; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { + if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { return; } @@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() { // Enable unified vertex attributes and query vertex buffer address when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, @@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::CreateRasterizer() { - if (rasterizer) { - return; - } - rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device, - screen_info, program_manager, state_tracker); -} - void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { texture.width = framebuffer.width; @@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { program_manager.BindHostPipeline(pipeline.handle); + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { glEnable(GL_FRAMEBUFFER_SRGB); @@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glCullFace(GL_BACK); glFrontFace(GL_CW); glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), static_cast<GLfloat>(layout.height)); glDepthRangeIndexed(0, 0.0, 0.0); @@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() { renderer_settings.screenshot_requested = false; } -bool RendererOpenGL::Init() { - if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { - glEnable(GL_DEBUG_OUTPUT); - glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); - glDebugMessageCallback(DebugHandler, nullptr); - } - - AddTelemetryFields(); - - if (!GLAD_GL_VERSION_4_6) { - return false; - } - - InitOpenGLObjects(); - CreateRasterizer(); - - return true; -} - -void RendererOpenGL::ShutDown() {} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 44e109794d..cc19a110f1 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,6 +10,7 @@ #include "common/math_util.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -63,18 +64,18 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererOpenGL() override; - bool Init() override; - void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + VideoCore::RasterizerInterface* ReadRasterizer() override { + return &rasterizer; + } + private: /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); void AddTelemetryFields(); - void CreateRasterizer(); - void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); @@ -98,8 +99,10 @@ private: Core::Memory::Memory& cpu_memory; Tegra::GPU& gpu; - const Device device; - StateTracker state_tracker{gpu}; + Device device; + StateTracker state_tracker; + ProgramManager program_manager; + RasterizerOpenGL rasterizer; // OpenGL object IDs OGLSampler present_sampler; @@ -115,9 +118,6 @@ private: /// Display information for Switch screen ScreenInfo screen_info; - /// Global dummy shader pipeline - ProgramManager program_manager; - /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index eb849cbf2d..31ec68505d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) UtilShaders::~UtilShaders() = default; -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); - glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -91,16 +91,16 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s glUniform1ui(5, params.x_shift); glUniform1ui(6, params.block_height); glUniform1ui(7, params.block_height_mask); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), - input_offset, image.guest_size_bytes - swizzle.buffer_offset); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, + image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } program_manager.RestoreGuestCompute(); } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); @@ -132,16 +132,16 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s glUniform1ui(7, params.block_height_mask); glUniform1ui(8, params.block_depth); glUniform1ui(9, params.block_depth_mask); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), - input_offset, image.guest_size_bytes - swizzle.buffer_offset); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, + image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); } program_manager.RestoreGuestCompute(); } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, +void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, std::span<const SwizzleParameters> swizzles) { static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr GLuint BINDING_INPUT_BUFFER = 0; @@ -159,21 +159,22 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu "Non-power of two images are not implemented"); program_manager.BindHostCompute(pitch_unswizzle_program.handle); - glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); + glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); glUniform1ui(LOC_PITCH, pitch); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, + format); for (const SwizzleParameters& swizzle : swizzles) { const Extent3D num_tiles = swizzle.num_tiles; - const size_t input_offset = swizzle.buffer_offset + buffer_offset; + const size_t input_offset = swizzle.buffer_offset + map.offset; const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), - input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, + image.guest_size_bytes - swizzle.buffer_offset); glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); } program_manager.RestoreGuestCompute(); @@ -195,9 +196,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); - glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level, - GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); - glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(), + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), + copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); } diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 3599972556..7b1d16b094 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -15,21 +15,22 @@ namespace OpenGL { class Image; -class ImageBufferMap; class ProgramManager; +struct ImageBufferMap; + class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); ~UtilShaders(); - void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); - void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, + void PitchUpload(Image& image, const ImageBufferMap& map, std::span<const VideoCommon::SwizzleParameters> swizzles); void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5be6dabd92..362278f015 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -12,14 +12,15 @@ #include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { namespace { -constexpr std::size_t POINT = 0; -constexpr std::size_t LINE = 1; -constexpr std::size_t POLYGON = 2; +constexpr size_t POINT = 0; +constexpr size_t LINE = 1; +constexpr size_t POLYGON = 2; constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines @@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace -void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const std::array enabled_lut = {regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable}; +void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, + bool has_extended_dynamic_state) { + const Maxwell& regs = maxwell3d.regs; + const std::array enabled_lut{ + regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable, + }; const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); raw1 = 0; @@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta raw2 = 0; const auto test_func = - regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; + regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); point_size = Common::BitCast<u32>(regs.point_size); - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - binding_divisors[index] = - regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { + maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + } } - - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast<u32>(input.type.Value())); - attribute.size.Assign(static_cast<u32>(input.size.Value())); - attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { + maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast<u32>(input.type.Value())); + attribute.size.Assign(static_cast<u32>(input.size.Value())); + } } - - for (std::size_t index = 0; index < std::size(attachments); ++index) { - attachments[index].Fill(regs, index); + if (maxwell3d.dirty.flags[Dirty::Blending]) { + maxwell3d.dirty.flags[Dirty::Blending] = false; + for (size_t index = 0; index < attachments.size(); ++index) { + attachments[index].Refresh(regs, index); + } + } + if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) { + maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false; + const auto& transform = regs.viewport_transform; + std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) { + return static_cast<u16>(viewport.swizzle.raw); + }); } - - const auto& transform = regs.viewport_transform; - std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(), - [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); }); - if (!has_extended_dynamic_state) { no_extended_dynamic_state.Assign(1); - dynamic_state.Fill(regs); + dynamic_state.Refresh(regs); } } -void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { +void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; raw = 0; @@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { +void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face @@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { }); } -std::size_t FixedPipelineState::Hash() const noexcept { +size_t FixedPipelineState::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); - return static_cast<std::size_t>(hash); + return static_cast<size_t>(hash); } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 465a55fdba..a0eb83a68d 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -58,7 +58,7 @@ struct FixedPipelineState { BitField<30, 1, u32> enable; }; - void Fill(const Maxwell& regs, std::size_t index); + void Refresh(const Maxwell& regs, size_t index); constexpr std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; @@ -96,8 +96,6 @@ struct FixedPipelineState { BitField<6, 14, u32> offset; BitField<20, 3, u32> type; BitField<23, 6, u32> size; - // Not really an element of a vertex attribute, but it can be packed here - BitField<29, 1, u32> binding_index_enabled; constexpr Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); @@ -108,7 +106,7 @@ struct FixedPipelineState { } }; - template <std::size_t Position> + template <size_t Position> union StencilFace { BitField<Position + 0, 3, u32> action_stencil_fail; BitField<Position + 3, 3, u32> action_depth_fail; @@ -152,7 +150,7 @@ struct FixedPipelineState { // Vertex stride is a 12 bits value, we have 4 bits to spare per element std::array<u16, Maxwell::NumVertexArrays> vertex_strides; - void Fill(const Maxwell& regs); + void Refresh(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); @@ -199,9 +197,9 @@ struct FixedPipelineState { std::array<u16, Maxwell::NumViewports> viewport_swizzles; DynamicState dynamic_state; - void Fill(const Maxwell& regs, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept; @@ -209,8 +207,8 @@ struct FixedPipelineState { return !operator==(rhs); } - std::size_t Size() const noexcept { - const std::size_t total_size = sizeof *this; + size_t Size() const noexcept { + const size_t total_size = sizeof *this; return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); } }; @@ -224,7 +222,7 @@ namespace std { template <> struct hash<Vulkan::FixedPipelineState> { - std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { + size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 85121d9fdd..19aaf034fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { return {}; } -VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { +VkIndexType IndexFormat(Maxwell::IndexFormat index_format) { switch (index_format) { case Maxwell::IndexFormat::UnsignedByte: - if (!device.IsExtIndexTypeUint8Supported()) { - UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); - return VK_INDEX_TYPE_UINT16; - } return VK_INDEX_TYPE_UINT8_EXT; case Maxwell::IndexFormat::UnsignedShort: return VK_INDEX_TYPE_UINT16; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7c34b47dc1..e3e06ba38a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); -VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); +VkIndexType IndexFormat(Maxwell::IndexFormat index_format); VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 61796e33a1..1cc720ddd9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext return separated_extensions; } +Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, + VkSurfaceKHR surface) { + const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); + const s32 device_index = Settings::values.vulkan_device.GetValue(); + if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { + LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); + throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); + } + const vk::PhysicalDevice physical_device(devices[device_index], dld); + return Device(*instance, physical_device, surface, dld); +} } // Anonymous namespace RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, - std::unique_ptr<Core::Frontend::GraphicsContext> context_) - : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, - cpu_memory{cpu_memory_}, gpu{gpu_} {} + std::unique_ptr<Core::Frontend::GraphicsContext> context_) try + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), + instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + true, Settings::values.renderer_debug)), + debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), + surface(CreateSurface(instance, render_window)), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), + swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, + render_window.GetFramebufferLayout().height, false), + blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, + screen_info), + rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, + memory_allocator, state_tracker, scheduler) { + Report(); +} catch (const vk::Exception& exception) { + LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); + throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; +} RendererVulkan::~RendererVulkan() { - ShutDown(); + void(device.GetLogical().WaitIdle()); } void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; const bool use_accelerated = - rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { - swapchain->Create(layout.width, layout.height, is_srgb); - blit_screen->Recreate(); + if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { + swapchain.Create(layout.width, layout.height, is_srgb); + blit_screen.Recreate(); } - scheduler->WaitWorker(); + scheduler.WaitWorker(); - swapchain->AcquireNextImage(); - const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); + swapchain.AcquireNextImage(); + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - scheduler->Flush(render_semaphore); + scheduler.Flush(render_semaphore); - if (swapchain->Present(render_semaphore)) { - blit_screen->Recreate(); + if (swapchain.Present(render_semaphore)) { + blit_screen.Recreate(); } - - rasterizer->TickFrame(); + rasterizer.TickFrame(); } render_window.OnFrameDisplayed(); } -bool RendererVulkan::Init() try { - library = OpenLibrary(); - instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, - true, Settings::values.renderer_debug); - if (Settings::values.renderer_debug) { - debug_callback = CreateDebugCallback(instance); - } - surface = CreateSurface(instance, render_window); - - InitializeDevice(); - Report(); - - memory_allocator = std::make_unique<MemoryAllocator>(*device); - - state_tracker = std::make_unique<StateTracker>(gpu); - - scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); - - const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); - swapchain->Create(framebuffer.width, framebuffer.height, false); - - rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), - cpu_memory, screen_info, *device, - *memory_allocator, *state_tracker, *scheduler); - - blit_screen = - std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, - *memory_allocator, *swapchain, *scheduler, screen_info); - return true; - -} catch (const vk::Exception& exception) { - LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); - return false; -} - -void RendererVulkan::ShutDown() { - if (!device) { - return; - } - if (const auto& dev = device->GetLogical()) { - dev.WaitIdle(); - } - rasterizer.reset(); - blit_screen.reset(); - scheduler.reset(); - swapchain.reset(); - memory_allocator.reset(); - device.reset(); -} - -void RendererVulkan::InitializeDevice() { - const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); - const s32 device_index = Settings::values.vulkan_device.GetValue(); - if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { - LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); - throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); - } - const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); - device = std::make_unique<Device>(*instance, physical_device, *surface, dld); -} - void RendererVulkan::Report() const { - const std::string vendor_name{device->GetVendorName()}; - const std::string model_name{device->GetModelName()}; - const std::string driver_version = GetDriverVersion(*device); + const std::string vendor_name{device.GetVendorName()}; + const std::string model_name{device.GetModelName()}; + const std::string driver_version = GetDriverVersion(device); const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); - const std::string api_version = GetReadableVersion(device->ApiVersion()); + const std::string api_version = GetReadableVersion(device.ApiVersion()); - const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); + const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions()); LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); LOG_INFO(Render_Vulkan, "Device: {}", model_name); @@ -209,21 +179,4 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -std::vector<std::string> RendererVulkan::EnumerateDevices() try { - vk::InstanceDispatch dld; - const Common::DynamicLibrary library = OpenLibrary(); - const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); - const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); - std::vector<std::string> names; - names.reserve(physical_devices.size()); - for (const VkPhysicalDevice device : physical_devices) { - names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); - } - return names; - -} catch (const vk::Exception& exception) { - LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); - return {}; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index daf55b9b46..72071316c9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -9,8 +9,14 @@ #include <vector> #include "common/dynamic_library.h" - #include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { @@ -27,20 +33,6 @@ class GPU; namespace Vulkan { -class Device; -class StateTracker; -class MemoryAllocator; -class VKBlitScreen; -class VKSwapchain; -class VKScheduler; - -struct VKScreenInfo { - VkImageView image_view{}; - u32 width{}; - u32 height{}; - bool is_srgb{}; -}; - class RendererVulkan final : public VideoCore::RendererBase { public: explicit RendererVulkan(Core::TelemetrySession& telemtry_session, @@ -49,15 +41,13 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererVulkan() override; - bool Init() override; - void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - static std::vector<std::string> EnumerateDevices(); + VideoCore::RasterizerInterface* ReadRasterizer() override { + return &rasterizer; + } private: - void InitializeDevice(); - void Report() const; Core::TelemetrySession& telemetry_session; @@ -68,18 +58,18 @@ private: vk::InstanceDispatch dld; vk::Instance instance; - + vk::DebugUtilsMessenger debug_callback; vk::SurfaceKHR surface; VKScreenInfo screen_info; - vk::DebugUtilsMessenger debug_callback; - std::unique_ptr<Device> device; - std::unique_ptr<MemoryAllocator> memory_allocator; - std::unique_ptr<StateTracker> state_tracker; - std::unique_ptr<VKScheduler> scheduler; - std::unique_ptr<VKSwapchain> swapchain; - std::unique_ptr<VKBlitScreen> blit_screen; + Device device; + MemoryAllocator memory_allocator; + StateTracker state_tracker; + VKScheduler scheduler; + VKSwapchain swapchain; + VKBlitScreen blit_screen; + RasterizerVulkan rasterizer; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 3e3b895e0c..a1a32aabef 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -18,7 +18,6 @@ #include "video_core/gpu.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" -#include "video_core/rasterizer_interface.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" @@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData { }; VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, - Core::Frontend::EmuWindow& render_window_, - VideoCore::RasterizerInterface& rasterizer_, const Device& device_, + Core::Frontend::EmuWindow& render_window_, const Device& device_, MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, VKScheduler& scheduler_, const VKScreenInfo& screen_info_) - : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, - device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, - scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { + : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, + memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, + image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { resource_ticks.resize(image_count); CreateStaticResources(); @@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool SetUniformData(data, framebuffer); SetVertexData(data, framebuffer); - const std::span<u8> map = buffer_commit.Map(); - std::memcpy(map.data(), &data, sizeof(data)); + const std::span<u8> mapped_span = buffer_commit.Map(); + std::memcpy(mapped_span.data(), &data, sizeof(data)); if (!use_accelerated) { const u64 image_offset = GetRawImageOffset(framebuffer, image_index); @@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); const size_t size_bytes = GetSizeInBytes(framebuffer); - rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); Tegra::Texture::UnswizzleTexture( - map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); const VkBufferImageCopy copy{ .bufferOffset = image_offset, @@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); - return *semaphores[image_index]; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index b52576957b..5e31776857 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -38,12 +38,18 @@ class RasterizerVulkan; class VKScheduler; class VKSwapchain; -class VKBlitScreen final { +struct VKScreenInfo { + VkImageView image_view{}; + u32 width{}; + u32 height{}; + bool is_srgb{}; +}; + +class VKBlitScreen { public: explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, - Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const Device& device, - MemoryAllocator& memory_allocator, VKSwapchain& swapchain, + Core::Frontend::EmuWindow& render_window, const Device& device, + MemoryAllocator& memory_manager, VKSwapchain& swapchain, VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); @@ -84,7 +90,6 @@ private: Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; - VideoCore::RasterizerInterface& rasterizer; const Device& device; MemoryAllocator& memory_allocator; VKSwapchain& swapchain; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8ad40a0f6..668633e7be 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -3,188 +3,304 @@ // Refer to the license.txt file included. #include <algorithm> +#include <array> #include <cstring> -#include <memory> +#include <span> +#include <vector> -#include "core/core.h" #include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - namespace { +VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { + return VkBufferCopy{ + .srcOffset = copy.src_offset, + .dstOffset = copy.dst_offset, + .size = copy.size, + }; +} -constexpr VkBufferUsageFlags BUFFER_USAGE = - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - -constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = - VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - -constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; +VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) { + if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) { + return VK_INDEX_TYPE_UINT8_EXT; + } + if (num_elements <= 0xffff) { + return VK_INDEX_TYPE_UINT16; + } + return VK_INDEX_TYPE_UINT32; +} -constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = - VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; +size_t BytesPerIndex(VkIndexType index_type) { + switch (index_type) { + case VK_INDEX_TYPE_UINT8_EXT: + return 1; + case VK_INDEX_TYPE_UINT16: + return 2; + case VK_INDEX_TYPE_UINT32: + return 4; + default: + UNREACHABLE_MSG("Invalid index type={}", index_type); + return 1; + } +} +template <typename T> +std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { + std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; + std::ranges::transform(indices, indices.begin(), + [quad, first](u32 index) { return first + index + quad * 4; }); + return indices; +} } // Anonymous namespace -Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, - StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) - : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ - staging_pool_} { - buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ +Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} + +Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, + VAddr cpu_addr_, u64 size_bytes_) + : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { + buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = static_cast<VkDeviceSize>(size_), - .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }); - commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); + if (runtime.device.HasDebuggingToolAttached()) { + buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); + } + commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); } -Buffer::~Buffer() = default; +BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, + VKScheduler& scheduler_, StagingBufferPool& staging_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKDescriptorPool& descriptor_pool) + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, + uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} -void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { - const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); - std::memcpy(staging.mapped_span.data(), data, data_size); +StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_pool.Request(size, MemoryUsage::Upload); +} - scheduler.RequestOutsideRenderPassOperationContext(); +StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_pool.Request(size, MemoryUsage::Download); +} - const VkBuffer handle = Handle(); - scheduler.Record([staging = staging.buffer, handle, offset, data_size, - &device = device](vk::CommandBuffer cmdbuf) { - const VkBufferMemoryBarrier read_barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = - VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | - VK_ACCESS_HOST_WRITE_BIT | - (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = data_size, - }; - const VkBufferMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = UPLOAD_ACCESS_BARRIERS, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = data_size, - }; +void BufferCacheRuntime::Finish() { + scheduler.Finish(); +} + +void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, + std::span<const VideoCommon::BufferCopy> copies) { + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + // Measuring a popular game, this number never exceeds the specified size once data is warmed up + boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); + std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, - write_barrier); + 0, READ_BARRIER); + cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, WRITE_BARRIER); }); } -void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { - auto staging = staging_pool.Request(data_size, MemoryUsage::Download); - scheduler.RequestOutsideRenderPassOperationContext(); +void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, + u32 base_vertex, u32 num_indices, VkBuffer buffer, + u32 offset, [[maybe_unused]] u32 size) { + VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); + VkDeviceSize vk_offset = offset; + VkBuffer vk_buffer = buffer; + if (topology == PrimitiveTopology::Quads) { + vk_index_type = VK_INDEX_TYPE_UINT32; + std::tie(vk_buffer, vk_offset) = + quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); + } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { + vk_index_type = VK_INDEX_TYPE_UINT16; + std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); + } + if (vk_buffer == VK_NULL_HANDLE) { + // Vulkan doesn't support null index buffers. Replace it with our own null buffer. + ReserveNullIndexBuffer(); + vk_buffer = *null_index_buffer; + } + scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); + }); +} - const VkBuffer handle = Handle(); - scheduler.Record( - [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { - const VkBufferMemoryBarrier barrier{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = handle, - .offset = offset, - .size = data_size, - }; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); - cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); - }); - scheduler.Finish(); +void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { + ReserveQuadArrayLUT(first + count, true); - std::memcpy(data, staging.mapped_span.data(), data_size); + // The LUT has the indices 0, 1, 2, and 3 copied as an array + // To apply these 'first' offsets we can apply an offset based on the modulus. + const VkIndexType index_type = quad_array_lut_index_type; + const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); + const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); + scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer, offset, index_type); + }); } -void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t copy_size) { - scheduler.RequestOutsideRenderPassOperationContext(); +void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, + u32 stride) { + if (device.IsExtExtendedDynamicStateSupported()) { + scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { + const VkDeviceSize vk_offset = offset; + const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; + const VkDeviceSize vk_stride = stride; + cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); + }); + } else { + scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffer(index, buffer, offset); + }); + } +} - const VkBuffer dst_buffer = Handle(); - scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, - copy_size](vk::CommandBuffer cmdbuf) { - cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); - - std::array<VkBufferMemoryBarrier, 2> barriers; - barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barriers[0].pNext = nullptr; - barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[0].buffer = src_buffer; - barriers[0].offset = src_offset; - barriers[0].size = copy_size; - barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barriers[1].pNext = nullptr; - barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barriers[1].buffer = dst_buffer; - barriers[1].offset = dst_offset; - barriers[1].size = copy_size; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, - barriers, {}); +void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, + u32 size) { + if (!device.IsExtTransformFeedbackSupported()) { + // Already logged in the rasterizer + return; + } + scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { + const VkDeviceSize vk_offset = offset; + const VkDeviceSize vk_size = size; + cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, - Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, - const Device& device_, MemoryAllocator& memory_allocator_, - VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, - StagingBufferPool& staging_pool_) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, - cpu_memory_, stream_buffer_}, - device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, - staging_pool{staging_pool_} {} - -VKBufferCache::~VKBufferCache() = default; - -std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, - size); +void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { + if (num_indices <= current_num_indices) { + return; + } + if (wait_for_idle) { + scheduler.Finish(); + } + current_num_indices = num_indices; + quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); + + const u32 num_quads = num_indices / 4; + const u32 num_triangle_indices = num_quads * 6; + const u32 num_first_offset_copies = 4; + const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); + const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; + quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size_bytes, + .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + if (device.HasDebuggingToolAttached()) { + quad_array_lut.SetObjectNameEXT("Quad LUT"); + } + quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); + + const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); + u8* staging_data = staging.mapped_span.data(); + const size_t quad_size = bytes_per_index * 6; + for (u32 first = 0; first < num_first_offset_copies; ++first) { + for (u32 quad = 0; quad < num_quads; ++quad) { + switch (quad_array_lut_index_type) { + case VK_INDEX_TYPE_UINT8_EXT: + std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); + break; + case VK_INDEX_TYPE_UINT16: + std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); + break; + case VK_INDEX_TYPE_UINT32: + std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); + break; + default: + UNREACHABLE(); + break; + } + staging_data += quad_size; + } + } + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, + dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { + const VkBufferCopy copy{ + .srcOffset = src_offset, + .dstOffset = 0, + .size = size_bytes, + }; + const VkBufferMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = dst_buffer, + .offset = 0, + .size = size_bytes, + }; + cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + 0, write_barrier); + }); } -VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { - size = std::max(size, std::size_t(4)); - const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); +void BufferCacheRuntime::ReserveNullIndexBuffer() { + if (null_index_buffer) { + return; + } + null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 4, + .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + if (device.HasDebuggingToolAttached()) { + null_index_buffer.SetObjectNameEXT("Null index buffer"); + } + null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal); + scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, size, 0); + scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) { + cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); }); - return {empty.buffer, 0, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 41d5775109..982e92191f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -4,69 +4,126 @@ #pragma once -#include <memory> - -#include "common/common_types.h" #include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; +class VKDescriptorPool; class VKScheduler; -class Buffer final : public VideoCommon::BufferBlock { -public: - explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, - StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); - ~Buffer(); - - void Upload(std::size_t offset, std::size_t data_size, const u8* data); +class BufferCacheRuntime; - void Download(std::size_t offset, std::size_t data_size, u8* data); - - void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, - std::size_t copy_size); +class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { +public: + explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); + explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, + VAddr cpu_addr_, u64 size_bytes_); - VkBuffer Handle() const { + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } - u64 Address() const { - return 0; + operator VkBuffer() const noexcept { + return *buffer; } private: - const Device& device; - VKScheduler& scheduler; - StagingBufferPool& staging_pool; - vk::Buffer buffer; MemoryCommit commit; }; -class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { +class BufferCacheRuntime { + friend Buffer; + + using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology; + using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat; + public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, - Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, - const Device& device, MemoryAllocator& memory_allocator, - VKScheduler& scheduler, VKStreamBuffer& stream_buffer, - StagingBufferPool& staging_pool); - ~VKBufferCache(); + explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, + VKScheduler& scheduler_, StagingBufferPool& staging_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKDescriptorPool& descriptor_pool); + + void Finish(); + + [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + + [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, + std::span<const VideoCommon::BufferCopy> copies); + + void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, + u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); + + void BindQuadArrayIndexBuffer(u32 first, u32 count); - BufferInfo GetEmptyBuffer(std::size_t size) override; + void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); -protected: - std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; + void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + + std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, + [[maybe_unused]] u32 binding_index, u32 size) { + const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); + BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size); + return ref.mapped_span; + } + + void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { + BindBuffer(buffer, offset, size); + } + + void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size, + [[maybe_unused]] bool is_written) { + BindBuffer(buffer, offset, size); + } private: + void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { + update_descriptor_queue.AddBuffer(buffer, offset, size); + } + + void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); + + void ReserveNullIndexBuffer(); + const Device& device; MemoryAllocator& memory_allocator; VKScheduler& scheduler; StagingBufferPool& staging_pool; + VKUpdateDescriptorQueue& update_descriptor_queue; + + vk::Buffer quad_array_lut; + MemoryCommit quad_array_lut_commit; + VkIndexType quad_array_lut_index_type{}; + u32 current_num_indices = 0; + + vk::Buffer null_index_buffer; + MemoryCommit null_index_buffer_commit; + + Uint8Pass uint8_pass; + QuadIndexedPass quad_index_pass; }; +struct BufferCacheParams { + using Runtime = Vulkan::BufferCacheRuntime; + using Buffer = Vulkan::Buffer; + + static constexpr bool IS_OPENGL = false; + static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; + static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; + static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; + static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool USE_MEMORY_MAPS = true; +}; + +using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5eb6a54be9..2f9a7b0284 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -10,7 +10,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" -#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" +#include "common/div_ceil.h" #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -22,30 +22,7 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - namespace { - -VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - return { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }; -} - -VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} - VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { return { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, @@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return set; } -QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - StagingBufferPool& staging_buffer_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), - BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), - scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, - update_descriptor_queue{update_descriptor_queue_} {} - -QuadArrayPass::~QuadArrayPass() = default; - -std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { - const u32 num_triangle_vertices = (num_vertices / 4) * 6; - const std::size_t staging_size = num_triangle_vertices * sizeof(u32); - const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); - - update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - - scheduler.RequestOutsideRenderPassOperationContext(); - - ASSERT(num_vertices % 4 == 0); - const u32 num_quads = num_vertices / 4; - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, - num_quads, first, set](vk::CommandBuffer cmdbuf) { - constexpr u32 dispatch_size = 1024; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); - cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); - }); - return {staging_ref.buffer, 0}; -} - Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, Uint8Pass::~Uint8Pass() = default; -std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, - u64 src_offset) { +std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset) { const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); - const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); + const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { - constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); + cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging_ref.buffer, 0}; + return {staging.buffer, staging.offset}; } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, @@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, QuadIndexedPass::~QuadIndexedPass() = default; -std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( +std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, - VkBuffer src_buffer, u64 src_offset) { + VkBuffer src_buffer, u32 src_offset) { const u32 index_shift = [index_format] { switch (index_format) { case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: @@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( const u32 num_tri_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_tri_vertices * sizeof(u32); - const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); + const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); - update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); + update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { - static constexpr u32 dispatch_size = 1024; + static constexpr u32 DISPATCH_SIZE = 1024; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + }; const std::array push_constants = {base_vertex, index_shift}; cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); - cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); - - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = buffer; - barrier.offset = 0; - barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); + cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); }); - return {staging_ref.buffer, 0}; + return {staging.buffer, staging.offset}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index f5c6f5f174..17d781d99c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -41,22 +41,6 @@ private: vk::ShaderModule module; }; -class QuadArrayPass final : public VKComputePass { -public: - explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - StagingBufferPool& staging_buffer_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_); - ~QuadArrayPass(); - - std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); - -private: - VKScheduler& scheduler; - StagingBufferPool& staging_buffer_pool; - VKUpdateDescriptorQueue& update_descriptor_queue; -}; - class Uint8Pass final : public VKComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, @@ -64,7 +48,10 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); - std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); + /// Assemble uint8 indices into an uint16 index buffer + /// Returns a pair with the staging buffer, and the offset where the assembled data is + std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, + u32 src_offset); private: VKScheduler& scheduler; @@ -80,9 +67,9 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); - std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, - u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, - u64 src_offset); + std::pair<VkBuffer, VkDeviceSize> Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, + u32 base_vertex, VkBuffer src_buffer, u32 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 6cd00884dd..3bec48d141 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -45,8 +45,8 @@ void InnerFence::Wait() { } VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, - VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + VKQueryCache& query_cache_, const Device& device_, VKScheduler& scheduler_) : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, scheduler{scheduler_} {} diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 9c5e5aa8f3..2f8322d29a 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -22,7 +22,6 @@ class RasterizerInterface; namespace Vulkan { class Device; -class VKBufferCache; class VKQueryCache; class VKScheduler; @@ -45,14 +44,14 @@ private: using Fence = std::shared_ptr<InnerFence>; using GenericFenceManager = - VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; + VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>; class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, - Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, - VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, - VKScheduler& scheduler_); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + TextureCache& texture_cache, BufferCache& buffer_cache, + VKQueryCache& query_cache, const Device& device, + VKScheduler& scheduler); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d50dca604b..fc6dd83eb5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (state.attributes[index].binding_index_enabled == 0) { - continue; - } const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index f336f1862b..2c7ed654db 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -21,7 +21,12 @@ public: /// Returns the current logical tick. [[nodiscard]] u64 CurrentTick() const noexcept { - return current_tick; + return current_tick.load(std::memory_order_relaxed); + } + + /// Returns the last known GPU tick. + [[nodiscard]] u64 KnownGpuTick() const noexcept { + return gpu_tick.load(std::memory_order_relaxed); } /// Returns the timeline semaphore handle. @@ -31,7 +36,7 @@ public: /// Returns true when a tick has been hit by the GPU. [[nodiscard]] bool IsFree(u64 tick) { - return gpu_tick >= tick; + return gpu_tick.load(std::memory_order_relaxed) >= tick; } /// Advance to the logical tick. @@ -41,7 +46,7 @@ public: /// Refresh the known GPU tick void Refresh() { - gpu_tick = semaphore.GetCounter(); + gpu_tick.store(semaphore.GetCounter(), std::memory_order_relaxed); } /// Waits for a tick to be hit on the GPU diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0a1118291..dfd38f5759 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -8,8 +8,6 @@ #include <mutex> #include <vector> -#include <boost/container/static_vector.hpp> - #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" @@ -24,7 +22,6 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25 MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); -MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); namespace { +struct DrawParams { + u32 base_instance; + u32 num_instances; + u32 base_vertex; + u32 num_vertices; + bool is_indexed; +}; constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); @@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport{ .x = src.translate_x - src.scale_x, .y = src.translate_y - src.scale_y, @@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in .minDepth = src.translate_z - src.scale_z * reduce_z, .maxDepth = src.translate_z + src.scale_z, }; - if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } - return viewport; } @@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); } -template <size_t N> -std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { - std::array<VkDeviceSize, N> expanded; - std::copy(strides.begin(), strides.end(), expanded.begin()); - return expanded; -} - ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { if (entry.is_buffer) { return ImageViewType::e2D; @@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca } } -} // Anonymous namespace - -class BufferBindings final { -public: - void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { - vertex.buffers[vertex.num_buffers] = buffer; - vertex.offsets[vertex.num_buffers] = offset; - vertex.sizes[vertex.num_buffers] = size; - vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); - ++vertex.num_buffers; - } - - void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { - index.buffer = buffer; - index.offset = offset; - index.type = type; - } - - void Bind(const Device& device, VKScheduler& scheduler) const { - // Use this large switch case to avoid dispatching more memory in the record lambda than - // what we need. It looks horrible, but it's the best we can do on standard C++. - switch (vertex.num_buffers) { - case 0: - return BindStatic<0>(device, scheduler); - case 1: - return BindStatic<1>(device, scheduler); - case 2: - return BindStatic<2>(device, scheduler); - case 3: - return BindStatic<3>(device, scheduler); - case 4: - return BindStatic<4>(device, scheduler); - case 5: - return BindStatic<5>(device, scheduler); - case 6: - return BindStatic<6>(device, scheduler); - case 7: - return BindStatic<7>(device, scheduler); - case 8: - return BindStatic<8>(device, scheduler); - case 9: - return BindStatic<9>(device, scheduler); - case 10: - return BindStatic<10>(device, scheduler); - case 11: - return BindStatic<11>(device, scheduler); - case 12: - return BindStatic<12>(device, scheduler); - case 13: - return BindStatic<13>(device, scheduler); - case 14: - return BindStatic<14>(device, scheduler); - case 15: - return BindStatic<15>(device, scheduler); - case 16: - return BindStatic<16>(device, scheduler); - case 17: - return BindStatic<17>(device, scheduler); - case 18: - return BindStatic<18>(device, scheduler); - case 19: - return BindStatic<19>(device, scheduler); - case 20: - return BindStatic<20>(device, scheduler); - case 21: - return BindStatic<21>(device, scheduler); - case 22: - return BindStatic<22>(device, scheduler); - case 23: - return BindStatic<23>(device, scheduler); - case 24: - return BindStatic<24>(device, scheduler); - case 25: - return BindStatic<25>(device, scheduler); - case 26: - return BindStatic<26>(device, scheduler); - case 27: - return BindStatic<27>(device, scheduler); - case 28: - return BindStatic<28>(device, scheduler); - case 29: - return BindStatic<29>(device, scheduler); - case 30: - return BindStatic<30>(device, scheduler); - case 31: - return BindStatic<31>(device, scheduler); - case 32: - return BindStatic<32>(device, scheduler); - } - UNREACHABLE(); - } - -private: - // Some of these fields are intentionally left uninitialized to avoid initializing them twice. - struct { - size_t num_buffers = 0; - std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; - std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; - std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; - std::array<u16, Maxwell::NumVertexArrays> strides; - } vertex; - - struct { - VkBuffer buffer = nullptr; - VkDeviceSize offset; - VkIndexType type; - } index; - - template <size_t N> - void BindStatic(const Device& device, VKScheduler& scheduler) const { - if (device.IsExtExtendedDynamicStateSupported()) { - if (index.buffer) { - BindStatic<N, true, true>(scheduler); - } else { - BindStatic<N, false, true>(scheduler); - } - } else { - if (index.buffer) { - BindStatic<N, true, false>(scheduler); - } else { - BindStatic<N, false, false>(scheduler); - } - } - } - - template <size_t N, bool is_indexed, bool has_extended_dynamic_state> - void BindStatic(VKScheduler& scheduler) const { - static_assert(N <= Maxwell::NumVertexArrays); - if constexpr (N == 0) { - return; - } - - std::array<VkBuffer, N> buffers; - std::array<VkDeviceSize, N> offsets; - std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); - std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); - - if constexpr (has_extended_dynamic_state) { - // With extended dynamic states we can specify the length and stride of a vertex buffer - std::array<VkDeviceSize, N> sizes; - std::array<u16, N> strides; - std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); - std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); - - if constexpr (is_indexed) { - scheduler.Record( - [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); - cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), - offsets.data(), sizes.data(), - ExpandStrides(strides).data()); - }); - } else { - scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), - offsets.data(), sizes.data(), - ExpandStrides(strides).data()); - }); - } - return; - } - - if constexpr (is_indexed) { - // Indexed draw - scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); - cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); - }); - } else { - // Array draw - scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); - }); - } - } -}; - -void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { - if (is_indexed) { - cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); - } else { - cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); +DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, + bool is_indexed) { + DrawParams params{ + .base_instance = regs.vb_base_instance, + .num_instances = is_instanced ? num_instances : 1, + .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, + .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .is_indexed = is_indexed, + }; + if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { + // 6 triangle vertices per quad, base vertex is part of the index + // See BindQuadArrayIndexBuffer for more details + params.num_vertices = (params.num_vertices / 4) * 6; + params.base_vertex = 0; + params.is_indexed = true; } + return params; } +} // Anonymous namespace RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MemoryManager& gpu_memory_, @@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, - state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), + state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), update_descriptor_queue(device, scheduler), blit_image(device, scheduler, state_tracker, descriptor_pool), - quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), + buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, + update_descriptor_queue, descriptor_pool), + buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue), - buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, - stream_buffer, staging_pool), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, - fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { @@ -446,52 +267,51 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - GraphicsPipelineCacheKey key; - key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); - - buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); + graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - BufferBindings buffer_bindings; - const DrawParameters draw_params = - SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - auto lock = texture_cache.AcquireLock(); texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); - key.shaders = GetShaderAddresses(shaders); - SetupShaderDescriptors(shaders); + graphics_key.shaders = GetShaderAddresses(shaders); - buffer_cache.Unmap(); + SetupShaderDescriptors(shaders, is_indexed); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - key.renderpass = framebuffer->RenderPass(); + graphics_key.renderpass = framebuffer->RenderPass(); - auto* const pipeline = - pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); + VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( + graphics_key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; } - buffer_bindings.Bind(device, scheduler); - BeginTransformFeedback(); scheduler.RequestRenderpass(framebuffer); scheduler.BindGraphicsPipeline(pipeline->GetHandle()); UpdateDynamicStates(); - const auto pipeline_layout = pipeline->GetLayout(); - const auto descriptor_set = pipeline->CommitDescriptorSet(); + const auto& regs = maxwell3d.regs; + const u32 num_instances = maxwell3d.mme_draw.instance_count; + const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); + const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); + const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, {}); + DESCRIPTOR_SET, descriptor_set, nullptr); + } + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); } - draw_params.Draw(cmdbuf); }); EndTransformFeedback(); @@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() { return; } - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.UpdateRenderTargets(true); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); const VkExtent2D render_area = framebuffer->RenderArea(); @@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() { if (use_stencil) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } - scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { VkClearAttachment attachment; @@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { auto& pipeline = pipeline_cache.GetComputePipeline({ .shader = code_addr, .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size = - { - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, + .workgroup_size{ + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, }); // Compute dispatches can't be executed inside a renderpass @@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { image_view_indices.clear(); sampler_handles.clear(); - auto lock = texture_cache.AcquireLock(); - texture_cache.SynchronizeComputeDescriptors(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; const auto& entries = pipeline.GetEntries(); + buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); + buffer_cache.UnbindComputeStorageBuffers(); + u32 ssbo_index = 0; + for (const auto& buffer : entries.global_buffers) { + buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, + buffer.is_written); + ++ssbo_index; + } + buffer_cache.UpdateComputeBuffers(); + + texture_cache.SynchronizeComputeDescriptors(); + SetupComputeUniformTexels(entries); SetupComputeTextures(entries); SetupComputeStorageTexels(entries); @@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - buffer_cache.Map(CalculateComputeStreamBufferSize()); - update_descriptor_queue.Acquire(); - SetupComputeConstBuffers(entries); - SetupComputeGlobalBuffers(entries); + buffer_cache.BindHostComputeBuffers(); ImageViewId* image_view_id_ptr = image_view_ids.data(); VkSampler* sampler_ptr = sampler_handles.data(); PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, sampler_ptr); - buffer_cache.Unmap(); - const VkPipeline pipeline_handle = pipeline.GetHandle(); const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); @@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, query_cache.Query(gpu_addr, type, timestamp); } +void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, + u32 size) { + buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); +} + void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { @@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { return; } { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - buffer_cache.FlushRegion(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.DownloadMemory(addr, size); + } query_cache.FlushRegion(addr, size); } bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; if (!Settings::IsGPULevelHigh()) { - return buffer_cache.MustFlushRegion(addr, size); + return buffer_cache.IsRegionGpuModified(addr, size); } return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.MustFlushRegion(addr, size); + buffer_cache.IsRegionGpuModified(addr, size); } void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { @@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { return; } { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.WriteMemory(addr, size); + } pipeline_cache.InvalidateRegion(addr, size); - buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); } @@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } + pipeline_cache.OnCPUWrite(addr, size); { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - pipeline_cache.OnCPUWrite(addr, size); - buffer_cache.OnCPUWrite(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.CachedWriteMemory(addr, size); + } } void RasterizerVulkan::SyncGuestHost() { - buffer_cache.SyncGuestHost(); pipeline_cache.SyncGuestHost(); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.FlushCachedWrites(); + } } void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.UnmapMemory(addr, size); } - buffer_cache.OnCPUWrite(addr, size); + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.WriteMemory(addr, size); + } pipeline_cache.OnCPUWrite(addr, size); } @@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() { draw_counter = 0; update_descriptor_queue.TickFrame(); fence_manager.TickFrame(); - buffer_cache.TickFrame(); staging_pool.TickFrame(); { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.TickFrame(); } + { + std::scoped_lock lock{buffer_cache.mutex}; + buffer_cache.TickFrame(); + } } bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; texture_cache.BlitImage(dst, src, copy_config); return true; } @@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!framebuffer_addr) { return false; } - - auto lock = texture_cache.AcquireLock(); + std::scoped_lock lock{texture_cache.mutex}; ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; @@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, - BufferBindings& buffer_bindings, - bool is_indexed, - bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Geometry); - - const auto& regs = maxwell3d.regs; - - SetupVertexArrays(buffer_bindings); - - const u32 base_instance = regs.vb_base_instance; - const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; - const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; - const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; - - DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; - SetupIndexBuffer(buffer_bindings, params, is_indexed); - - return params; -} - void RasterizerVulkan::SetupShaderDescriptors( - const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { + const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { image_view_indices.clear(); sampler_handles.clear(); for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { @@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors( if (!shader) { continue; } - const auto& entries = shader->GetEntries(); + const ShaderEntries& entries = shader->GetEntries(); SetupGraphicsUniformTexels(entries, stage); SetupGraphicsTextures(entries, stage); SetupGraphicsStorageTexels(entries, stage); SetupGraphicsImages(entries, stage); + + buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + u32 ssbo_index = 0; + for (const auto& buffer : entries.global_buffers) { + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, + buffer.cbuf_offset, buffer.is_written); + ++ssbo_index; + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache.UpdateGraphicsBuffers(is_indexed); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.BindHostGeometryBuffers(is_indexed); + update_descriptor_queue.Acquire(); ImageViewId* image_view_id_ptr = image_view_ids.data(); @@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors( if (!shader) { continue; } - const auto& entries = shader->GetEntries(); - SetupGraphicsConstBuffers(entries, stage); - SetupGraphicsGlobalBuffers(entries, stage); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); + buffer_cache.BindHostStageBuffers(stage); + PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, + image_view_id_ptr, sampler_ptr); } } @@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() { LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); return; } - UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); - - UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); - UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); - UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); - - const auto& binding = regs.tfb_bindings[0]; - UNIMPLEMENTED_IF(binding.buffer_enable == 0); - UNIMPLEMENTED_IF(binding.buffer_offset != 0); - - const GPUVAddr gpu_addr = binding.Address(); - const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); - const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - - scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { - cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); - cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); - }); + scheduler.Record( + [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } void RasterizerVulkan::EndTransformFeedback() { @@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() { if (!device.IsExtTransformFeedbackSupported()) { return; } - scheduler.Record( [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { - const auto& regs = maxwell3d.regs; - - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const auto& vertex_array = regs.vertex_array[index]; - if (!vertex_array.IsEnabled()) { - continue; - } - const GPUVAddr start{vertex_array.StartAddress()}; - const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; - - ASSERT(end >= start); - const size_t size = end - start; - if (size == 0) { - buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); - continue; - } - const auto info = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); - } -} - -void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, - bool is_indexed) { - if (params.num_vertices == 0) { - return; - } - const auto& regs = maxwell3d.regs; - switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: { - if (!params.is_indexed) { - const auto [buffer, offset] = - quad_array_pass.Assemble(params.num_vertices, params.base_vertex); - buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); - params.base_vertex = 0; - params.num_vertices = params.num_vertices * 6 / 4; - params.is_indexed = true; - break; - } - const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); - VkBuffer buffer = info.handle; - u64 offset = info.offset; - std::tie(buffer, offset) = quad_indexed_pass.Assemble( - regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); - - buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); - params.num_vertices = (params.num_vertices / 4) * 6; - params.base_vertex = 0; - break; - } - default: { - if (!is_indexed) { - break; - } - const GPUVAddr gpu_addr = regs.index_array.IndexStart(); - const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); - VkBuffer buffer = info.handle; - u64 offset = info.offset; - - auto format = regs.index_array.format; - const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; - if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { - std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); - format = Maxwell::IndexFormat::UnsignedShort; - } - - buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); - break; - } - } -} - -void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& shader_stage = maxwell3d.state.shader_stages[stage]; - for (const auto& entry : entries.const_buffers) { - SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); - } -} - -void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - const auto& cbufs{maxwell3d.state.shader_stages[stage]}; - - for (const auto& entry : entries.global_buffers) { - const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); - SetupGlobalBuffer(entry, addr); - } -} - void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_Textures); const auto& regs = maxwell3d.regs; const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.uniform_texels) { @@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, } void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_Textures); const auto& regs = maxwell3d.regs; const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.samplers) { @@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_ } void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_Textures); const auto& regs = maxwell3d.regs; const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.storage_texels) { @@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, } void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - MICROPROFILE_SCOPE(Vulkan_Images); const auto& regs = maxwell3d.regs; const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; for (const auto& entry : entries.images) { @@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t } } -void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& launch_desc = kepler_compute.launch_description; - for (const auto& entry : entries.const_buffers) { - const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; - const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); - const Tegra::Engines::ConstBufferInfo info{ - .address = config.Address(), - .size = config.size, - .enabled = mask[entry.GetIndex()], - }; - SetupConstBuffer(entry, info); - } -} - -void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; - for (const auto& entry : entries.global_buffers) { - const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; - SetupGlobalBuffer(entry, addr); - } -} - void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_Textures); const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.uniform_texels) { const TextureHandle handle = @@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_Textures); const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.samplers) { for (size_t index = 0; index < entry.size; ++index) { @@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { } void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_Textures); const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.storage_texels) { const TextureHandle handle = @@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - MICROPROFILE_SCOPE(Vulkan_Images); const bool via_header_index = kepler_compute.launch_description.linked_tsc; for (const auto& entry : entries.images) { const TextureHandle handle = @@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { } } -void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer) { - if (!buffer.enabled) { - // Set values to zero to unbind buffers - update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); - return; - } - // Align the size to avoid bad std140 interactions - const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); - ASSERT(size <= MaxConstbufferSize); - - const u64 alignment = device.GetUniformBufferAlignment(); - const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); - update_descriptor_queue.AddBuffer(info.handle, info.offset, size); -} - -void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { - const u64 actual_addr = gpu_memory.Read<u64>(address); - const u32 size = gpu_memory.Read<u32>(address + 8); - - if (size == 0) { - // Sometimes global memory pointers don't have a proper size. Upload a dummy entry - // because Vulkan doesn't like empty buffers. - // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the - // default buffer. - static constexpr size_t dummy_size = 4; - const auto info = buffer_cache.GetEmptyBuffer(dummy_size); - update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); - return; - } - - const auto info = buffer_cache.UploadMemory( - actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); - update_descriptor_queue.AddBuffer(info.handle, info.offset, size); -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; @@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), - GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; + GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), + }; scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); } @@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } - const std::array scissors = { + const std::array scissors{ GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), - GetScissorState(regs, 15)}; + GetScissorState(regs, 15), + }; scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); } @@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } -size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { - size_t size = CalculateVertexArraysSize(); - if (is_indexed) { - size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); - } - size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); - return size; -} - -size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { - return Tegra::Engines::KeplerCompute::NumConstBuffers * - (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); -} - -size_t RasterizerVulkan::CalculateVertexArraysSize() const { - const auto& regs = maxwell3d.regs; - - size_t size = 0; - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - // This implementation assumes that all attributes are used in the shader. - const GPUVAddr start{regs.vertex_array[index].StartAddress()}; - const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; - DEBUG_ASSERT(end >= start); - - size += (end - start) * regs.vertex_array[index].enable; - } - return size; -} - -size_t RasterizerVulkan::CalculateIndexBufferSize() const { - return static_cast<size_t>(maxwell3d.regs.index_array.count) * - static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); -} - -size_t RasterizerVulkan::CalculateConstBufferSize( - const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { - if (entry.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - return buffer.size; - } else { - // Buffer is accessed directly, upload just what we use - return entry.GetSize(); - } -} - -VkBuffer RasterizerVulkan::DefaultBuffer() { - if (default_buffer) { - return *default_buffer; - } - default_buffer = device.GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = DEFAULT_BUFFER_SIZE, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); - default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); - - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { - cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); - }); - return *default_buffer; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8e261b9bd7..acea1ba2dc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -18,14 +18,13 @@ #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader/async_shaders.h" @@ -49,7 +48,6 @@ namespace Vulkan { struct VKScreenInfo; class StateTracker; -class BufferBindings; class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: @@ -65,6 +63,7 @@ public: void DispatchCompute(GPUVAddr code_addr) override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; + void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; bool MustFlushRegion(VAddr addr, u64 size) override; @@ -107,24 +106,11 @@ private: static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); - struct DrawParameters { - void Draw(vk::CommandBuffer cmdbuf) const; - - u32 base_instance = 0; - u32 num_instances = 0; - u32 base_vertex = 0; - u32 num_vertices = 0; - bool is_indexed = 0; - }; - void FlushWork(); - /// Setups geometry buffers and state. - DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, - bool is_indexed, bool is_instanced); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); + void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, + bool is_indexed); void UpdateDynamicStates(); @@ -132,16 +118,6 @@ private: void EndTransformFeedback(); - void SetupVertexArrays(BufferBindings& buffer_bindings); - - void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); - - /// Setup constant buffers in the graphics pipeline. - void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); - - /// Setup global buffers in the graphics pipeline. - void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); - /// Setup uniform texels in the graphics pipeline. void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); @@ -154,12 +130,6 @@ private: /// Setup images in the graphics pipeline. void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - /// Setup constant buffers in the compute pipeline. - void SetupComputeConstBuffers(const ShaderEntries& entries); - - /// Setup global buffers in the compute pipeline. - void SetupComputeGlobalBuffers(const ShaderEntries& entries); - /// Setup texel buffers in the compute pipeline. void SetupComputeUniformTexels(const ShaderEntries& entries); @@ -172,11 +142,6 @@ private: /// Setup images in the compute pipeline. void SetupComputeImages(const ShaderEntries& entries); - void SetupConstBuffer(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer); - - void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -193,19 +158,6 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); - size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; - - size_t CalculateComputeStreamBufferSize() const; - - size_t CalculateVertexArraysSize() const; - - size_t CalculateIndexBufferSize() const; - - size_t CalculateConstBufferSize(const ConstBufferEntry& entry, - const Tegra::Engines::ConstBufferInfo& buffer) const; - - VkBuffer DefaultBuffer(); - Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; @@ -217,24 +169,21 @@ private: StateTracker& state_tracker; VKScheduler& scheduler; - VKStreamBuffer stream_buffer; StagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; - QuadArrayPass quad_array_pass; - QuadIndexedPass quad_indexed_pass; - Uint8Pass uint8_pass; + + GraphicsPipelineCacheKey graphics_key; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; + BufferCacheRuntime buffer_cache_runtime; + BufferCache buffer_cache; VKPipelineCache pipeline_cache; - VKBufferCache buffer_cache; VKQueryCache query_cache; VKFenceManager fence_manager; - vk::Buffer default_buffer; - MemoryCommit default_buffer_commit; vk::Event wfi_event; VideoCommon::Shader::AsyncShaders async_shaders; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index ee274ac598..a8bf7bda8c 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -17,21 +17,21 @@ ResourcePool::~ResourcePool() = default; size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results master_semaphore.Refresh(); - - const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { + const u64 gpu_tick = master_semaphore.KnownGpuTick(); + const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { for (size_t iterator = begin; iterator < end; ++iterator) { - if (master_semaphore.IsFree(ticks[iterator])) { + if (gpu_tick >= ticks[iterator]) { ticks[iterator] = master_semaphore.CurrentTick(); return iterator; } } - return {}; + return std::nullopt; }; // Try to find a free resource from the hinted position to the end. - auto found = search(free_iterator, ticks.size()); + std::optional<size_t> found = search(hint_iterator, ticks.size()); if (!found) { // Search from beginning to the hinted position. - found = search(0, free_iterator); + found = search(0, hint_iterator); if (!found) { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); @@ -41,7 +41,7 @@ size_t ResourcePool::CommitResource() { } } // Free iterator is hinted to the resource after the one that's been commited. - free_iterator = (*found + 1) % ticks.size(); + hint_iterator = (*found + 1) % ticks.size(); return *found; } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index a018c7ec22..9d0bb3b4d0 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -36,7 +36,7 @@ private: MasterSemaphore& master_semaphore; size_t grow_step = 0; ///< Number of new resources created after an overflow - size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector<u64> ticks; ///< Ticks for each resource }; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 66004f9c0c..f35c120b09 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -u64 VKScheduler::CurrentTick() const noexcept { - return master_semaphore->CurrentTick(); -} - -bool VKScheduler::IsFree(u64 tick) const noexcept { - return master_semaphore->IsFree(tick); -} - -void VKScheduler::Wait(u64 tick) { - master_semaphore->Wait(tick); -} - void VKScheduler::Flush(VkSemaphore semaphore) { SubmitExecution(semaphore); AllocateNewContext(); @@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, vk::Span(barriers.data(), num_images)); }); state.renderpass = nullptr; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 15f2987eb4..3ce48e9d28 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -14,6 +14,7 @@ #include "common/alignment.h" #include "common/common_types.h" #include "common/threadsafe_queue.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -21,7 +22,6 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; -class MasterSemaphore; class StateTracker; class VKQueryCache; @@ -32,15 +32,6 @@ public: explicit VKScheduler(const Device& device, StateTracker& state_tracker); ~VKScheduler(); - /// Returns the current command buffer tick. - [[nodiscard]] u64 CurrentTick() const noexcept; - - /// Returns true when a tick has been triggered by the GPU. - [[nodiscard]] bool IsFree(u64 tick) const noexcept; - - /// Waits for the given tick to trigger on the GPU. - void Wait(u64 tick); - /// Sends the current execution context to the GPU. void Flush(VkSemaphore semaphore = nullptr); @@ -82,6 +73,21 @@ public: (void)chunk->Record(command); } + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return master_semaphore->CurrentTick(); + } + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return master_semaphore->IsFree(tick); + } + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick) { + master_semaphore->Wait(tick); + } + /// Returns the master timeline semaphore. [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { return *master_semaphore; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 61d52b961e..c6846d8861 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1845,13 +1845,21 @@ private: Expression TextureGather(Operation operation) { const auto& meta = std::get<MetaTexture>(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); const Id coords = GetCoordinates(operation, Type::Float); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id texture{}; + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); + } + if (meta.sampler.is_shadow) { texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare))); + AsFloat(Visit(meta.depth_compare)), mask, operands); } else { u32 component_value = 0; if (meta.component) { @@ -1860,7 +1868,7 @@ private: component_value = component->GetValue(); } texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value)); + Constant(t_uint, component_value), mask, operands); } return GetTextureElement(operation, texture, Type::Float); } @@ -1928,13 +1936,22 @@ private: const Id image = GetTextureImage(operation); const Id coords = GetCoordinates(operation, Type::Int); + + spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; + std::vector<Id> operands; Id fetch; + if (meta.lod && !meta.sampler.is_buffer) { - fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod, - AsInt(Visit(meta.lod))); - } else { - fetch = OpImageFetch(t_float4, image, coords); + mask = mask | spv::ImageOperandsMask::Lod; + operands.push_back(AsInt(Visit(meta.lod))); + } + + if (!meta.aoffi.empty()) { + mask = mask | spv::ImageOperandsMask::Offset; + operands.push_back(GetOffsetCoordinates(operation)); } + + fetch = OpImageFetch(t_float4, image, coords, mask, operands); return GetTextureElement(operation, fetch, Type::Float); } @@ -3106,7 +3123,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { entries.const_buffers.emplace_back(cbuf.second, cbuf.first); } for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); + entries.global_buffers.emplace_back(GlobalBufferEntry{ + .cbuf_index = base.cbuf_index, + .cbuf_offset = base.cbuf_offset, + .is_written = usage.is_written, + }); } for (const auto& sampler : ir.GetSamplers()) { if (sampler.is_buffer) { @@ -3127,6 +3148,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { entries.attributes.insert(GetGenericAttributeLocation(attribute)); } } + for (const auto& buffer : entries.const_buffers) { + entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); + } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); entries.uses_warps = ir.UsesWarps(); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 26381e4441..5d94132a51 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -39,24 +39,7 @@ private: u32 index{}; }; -class GlobalBufferEntry { -public: - constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} - - constexpr u32 GetCbufIndex() const { - return cbuf_index; - } - - constexpr u32 GetCbufOffset() const { - return cbuf_offset; - } - - constexpr bool IsWritten() const { - return is_written; - } - -private: +struct GlobalBufferEntry { u32 cbuf_index{}; u32 cbuf_offset{}; bool is_written{}; @@ -78,6 +61,7 @@ struct ShaderEntries { std::set<u32> attributes; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; + u32 enabled_uniform_buffers{}; bool uses_warps{}; }; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc17..7a12324973 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -8,6 +8,7 @@ #include <fmt/format.h> +#include "common/alignment.h" #include "common/assert.h" #include "common/bit_util.h" #include "common/common_types.h" @@ -17,18 +18,119 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +// Maximum potential alignment of a Vulkan buffer +constexpr VkDeviceSize MAX_ALIGNMENT = 256; +// Maximum size to put elements in the stream buffer +constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; +// Stream buffer size in bytes +constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; + +constexpr VkMemoryPropertyFlags HOST_FLAGS = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; +constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; + +bool IsStreamHeap(VkMemoryHeap heap) noexcept { + return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; +} + +std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, + VkMemoryPropertyFlags flags) noexcept { + for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { + if (((type_mask >> type_index) & 1) == 0) { + // Memory type is incompatible + continue; + } + const VkMemoryType& memory_type = props.memoryTypes[type_index]; + if ((memory_type.propertyFlags & flags) != flags) { + // Memory type doesn't have the flags we want + continue; + } + if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { + // Memory heap is not suitable for streaming + continue; + } + // Success! + return type_index; + } + return std::nullopt; +} + +u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { + // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this + std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); + if (type) { + return *type; + } + // Otherwise try without the DEVICE_LOCAL_BIT + type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); + if (type) { + return *type; + } + // This should never happen, and in case it does, signal it as an out of memory situation + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); +} + +size_t Region(size_t iterator) noexcept { + return iterator / REGION_SIZE; +} +} // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + const vk::Device& dev = device.GetLogical(); + stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = STREAM_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + if (device.HasDebuggingToolAttached()) { + stream_buffer.SetObjectNameEXT("Stream Buffer"); + } + VkMemoryDedicatedRequirements dedicated_reqs{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + .pNext = nullptr, + .prefersDedicatedAllocation = VK_FALSE, + .requiresDedicatedAllocation = VK_FALSE, + }; + const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); + const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || + dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; + const VkMemoryDedicatedAllocateInfo dedicated_info{ + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = nullptr, + .image = nullptr, + .buffer = *stream_buffer, + }; + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = make_dedicated ? &dedicated_info : nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), + }); + if (device.HasDebuggingToolAttached()) { + stream_memory.SetObjectNameEXT("Stream Buffer Memory"); + } + stream_buffer.BindMemory(*stream_memory, 0); + stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); +} StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { - if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { - return *ref; + if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + return GetStreamBuffer(size); } - return CreateStagingBuffer(size, usage); + return GetStagingBuffer(size, usage); } void StagingBufferPool::TickFrame() { @@ -39,6 +141,52 @@ void StagingBufferPool::TickFrame() { ReleaseCache(MemoryUsage::Download); } +StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { + if (AreRegionsActive(Region(free_iterator) + 1, + std::min(Region(iterator + size) + 1, NUM_SYNCS))) { + // Avoid waiting for the previous usages to be free + return GetStagingBuffer(size, MemoryUsage::Upload); + } + const u64 current_tick = scheduler.CurrentTick(); + std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), + current_tick); + used_iterator = iterator; + free_iterator = std::max(free_iterator, iterator + size); + + if (iterator + size >= STREAM_BUFFER_SIZE) { + std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, + current_tick); + used_iterator = 0; + iterator = 0; + free_iterator = size; + + if (AreRegionsActive(0, Region(size) + 1)) { + // Avoid waiting for the previous usages to be free + return GetStagingBuffer(size, MemoryUsage::Upload); + } + } + const size_t offset = iterator; + iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + return StagingBufferRef{ + .buffer = *stream_buffer, + .offset = static_cast<VkDeviceSize>(offset), + .mapped_span = std::span<u8>(stream_pointer + offset, size), + }; +} + +bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const { + const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick(); + return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end, + [gpu_tick](u64 sync_tick) { return gpu_tick < sync_tick; }); +}; + +StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) { + if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { + return *ref; + } + return CreateStagingBuffer(size, usage); +} + std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, MemoryUsage usage) { StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47b..69f7618dee 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -19,11 +19,14 @@ class VKScheduler; struct StagingBufferRef { VkBuffer buffer; + VkDeviceSize offset; std::span<u8> mapped_span; }; class StagingBufferPool { public: + static constexpr size_t NUM_SYNCS = 16; + explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler); ~StagingBufferPool(); @@ -33,6 +36,11 @@ public: void TickFrame(); private: + struct StreamBufferCommit { + size_t upper_bound; + u64 tick; + }; + struct StagingBuffer { vk::Buffer buffer; MemoryCommit commit; @@ -42,6 +50,7 @@ private: StagingBufferRef Ref() const noexcept { return { .buffer = *buffer, + .offset = 0, .mapped_span = mapped_span, }; } @@ -56,6 +65,12 @@ private: static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; + StagingBufferRef GetStreamBuffer(size_t size); + + bool AreRegionsActive(size_t region_begin, size_t region_end) const; + + StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage); + std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); @@ -70,6 +85,15 @@ private: MemoryAllocator& memory_allocator; VKScheduler& scheduler; + vk::Buffer stream_buffer; + vk::DeviceMemory stream_memory; + u8* stream_pointer = nullptr; + + size_t iterator = 0; + size_t used_iterator = 0; + size_t free_iterator = 0; + std::array<u64, NUM_SYNCS> sync_ticks{}; + StagingBuffersCache device_local_cache; StagingBuffersCache upload_cache; StagingBuffersCache download_cache; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 1779a2e301..956f868457 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -18,9 +18,7 @@ #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace Vulkan { - namespace { - using namespace Dirty; using namespace VideoCommon::Dirty; using Tegra::Engines::Maxwell3D; @@ -30,15 +28,18 @@ using Table = Maxwell3D::DirtyState::Table; using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { - static constexpr std::array INVALIDATION_FLAGS{ + static constexpr int INVALIDATION_FLAGS[]{ Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { flags[flag] = true; } + for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { + flags[index] = true; + } return flags; } @@ -125,12 +126,40 @@ void SetupDirtyStencilTestEnable(Tables& tables) { tables[0][OFF(stencil_enable)] = StencilTestEnable; } +void SetupDirtyBlending(Tables& tables) { + tables[0][OFF(color_mask_common)] = Blending; + tables[0][OFF(independent_blend_enable)] = Blending; + FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending); + FillBlock(tables[0], OFF(blend), NUM(blend), Blending); + FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); +} + +void SetupDirtyInstanceDivisors(Tables& tables) { + static constexpr size_t divisor_offset = 3; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; + tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = + InstanceDivisors; + } +} + +void SetupDirtyVertexAttributes(Tables& tables) { + FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); +} + +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; + } +} } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { auto& tables = gpu.Maxwell3D().dirty.tables; - SetupDirtyRenderTargets(tables); + SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); SetupDirtyDepthBias(tables); @@ -145,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyFrontFace(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); + SetupDirtyBlending(tables); + SetupDirtyInstanceDivisors(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyViewportSwizzles(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index c335d2bdfb..84e918a715 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -35,6 +35,11 @@ enum : u8 { StencilOp, StencilTestEnable, + Blending, + InstanceDivisors, + VertexAttributes, + ViewportSwizzles, + Last }; static_assert(Last <= std::numeric_limits<u8>::max()); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 725a2a05d6..0b63bd6c86 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) - : surface{surface_}, device{device_}, scheduler{scheduler_} {} +VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_, + u32 width, u32 height, bool srgb) + : surface{surface_}, device{device_}, scheduler{scheduler_} { + Create(width, height, srgb); +} VKSwapchain::~VKSwapchain() = default; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2eadd62b3a..a728511e02 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -20,7 +20,8 @@ class VKScheduler; class VKSwapchain { public: - explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); + explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler, + u32 width, u32 height, bool srgb); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index aa7c5d7c62..22a1014a99 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, VkImageAspectFlags aspect_mask, bool is_initialized, std::span<const VkBufferImageCopy> copies) { - static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = ACCESS_FLAGS, + .srcAccessMask = WRITE_ACCESS_FLAGS, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, }; const VkImageMemoryBarrier write_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = ACCESS_FLAGS, + .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, - .subresourceRange = - { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); @@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() { scheduler.Finish(); } -ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { - const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); - return { - .handle = staging_ref.buffer, - .span = staging_ref.mapped_span, - }; +StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { + return staging_buffer_pool.Request(size, MemoryUsage::Upload); } -ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { - const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); - return { - .handle = staging_ref.buffer, - .span = staging_ref.mapped_span, - }; +StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { + return staging_buffer_pool.Request(size, MemoryUsage::Download); } void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, @@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, @@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span<const BufferImageCopy> copies) { +void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); - const VkBuffer src_buffer = map.handle; + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); + const VkBuffer src_buffer = map.buffer; const VkImage vk_image = *image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; const bool is_initialized = std::exchange(initialized, true); @@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, }); } -void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, +void Image::UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); - const VkBuffer src_buffer = map.handle; + std::vector vk_copies = TransformBufferCopies(copies, map.offset); + const VkBuffer src_buffer = map.buffer; const VkBuffer dst_buffer = *buffer; scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { // TODO: Barriers @@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, }); } -void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span<const BufferImageCopy> copies) { - std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); - scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, +void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { + std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); + scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkImageMemoryBarrier image_write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, memory_write_barrier, nullptr, image_write_barrier); }); } @@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM .pAttachments = attachments.data(), .width = key.size.width, .height = key.size.height, - .layers = static_cast<u32>(num_layers), + .layers = static_cast<u32>(std::max(num_layers, 1)), }); if (runtime.device.HasDebuggingToolAttached()) { framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8d29361a1a..b08c234590 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include <compare> #include <span> +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> { namespace Vulkan { -struct ImageBufferMap { - [[nodiscard]] VkBuffer Handle() const noexcept { - return handle; - } - - [[nodiscard]] std::span<u8> Span() const noexcept { - return span; - } - - VkBuffer handle; - std::span<u8> span; -}; - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -76,9 +64,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); + [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); + [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const std::array<Offset2D, 2>& dst_region, @@ -94,7 +82,7 @@ struct TextureCacheRuntime { return false; } - void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, + void AccelerateImageUpload(Image&, const StagingBufferRef&, std::span<const VideoCommon::SwizzleParameters>) { UNREACHABLE(); } @@ -112,13 +100,12 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, + void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); - void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, - std::span<const VideoCommon::BufferCopy> copies); + void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); - void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, + void DownloadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferImageCopy> copies); [[nodiscard]] VkImage Handle() const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index f99273c6a9..dc45fdcb19 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; void VKUpdateDescriptorQueue::TickFrame() { - payload.clear(); + payload_cursor = payload.data(); } void VKUpdateDescriptorQueue::Acquire() { // Minimum number of entries required. // This is the maximum number of entries a single draw call migth use. - static constexpr std::size_t MIN_ENTRIES = 0x400; + static constexpr size_t MIN_ENTRIES = 0x400; - if (payload.size() + MIN_ENTRIES >= payload.max_size()) { + if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); scheduler.WaitWorker(); - payload.clear(); + payload_cursor = payload.data(); } - upload_start = &*payload.end(); + upload_start = payload_cursor; } void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index e214f71959..d35e77c447 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -4,8 +4,7 @@ #pragma once -#include <variant> -#include <boost/container/static_vector.hpp> +#include <array> #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -16,13 +15,15 @@ class Device; class VKScheduler; struct DescriptorUpdateEntry { - DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} + struct Empty {}; + DescriptorUpdateEntry() = default; + DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} - DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} union { + Empty empty{}; VkDescriptorImageInfo image; VkDescriptorBufferInfo buffer; VkBufferView texel_buffer; @@ -41,39 +42,40 @@ public: void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = sampler, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } void AddImage(VkImageView image_view) { - payload.emplace_back(VkDescriptorImageInfo{ + *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, .imageView = image_view, .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }); + }; } - void AddBuffer(VkBuffer buffer, u64 offset, size_t size) { - payload.emplace_back(VkDescriptorBufferInfo{ + void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { + *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, .offset = offset, .range = size, - }); + }; } void AddTexelBuffer(VkBufferView texel_buffer) { - payload.emplace_back(texel_buffer); + *(payload_cursor++) = texel_buffer; } private: const Device& device; VKScheduler& scheduler; + DescriptorUpdateEntry* payload_cursor = nullptr; const DescriptorUpdateEntry* upload_start = nullptr; - boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; + std::array<DescriptorUpdateEntry, 0x10000> payload; }; } // namespace Vulkan diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 3b40db9bcb..02adcf9c79 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -64,6 +64,7 @@ void AsyncShaders::FreeWorkers() { void AsyncShaders::KillWorkers() { is_thread_exiting.store(true); + cv.notify_all(); for (auto& thread : worker_threads) { thread.detach(); } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 0dbb1a31fd..7fdff6e568 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -9,16 +9,7 @@ #include <shared_mutex> #include <thread> -// This header includes both Vulkan and OpenGL headers, this has to be fixed -// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues. -// Forcefully include glad early and undefine macros #include <glad/glad.h> -#ifdef CreateEvent -#undef CreateEvent -#endif -#ifdef CreateSemaphore -#undef CreateSemaphore -#endif #include "common/common_types.h" #include "video_core/renderer_opengl/gl_device.h" diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 50f4e7d358..7728f600e6 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { case StoreType::Bits32: (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); break; + case StoreType::Unsigned16: case StoreType::Signed16: { Node address = GetAddress(0); Node memory = (this->*get_memory)(address); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d3ea07aac9..5f88537bc4 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: + uses_y_negate = true; return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 833fa2a39f..c69681e8d8 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); @@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is std::vector<Node> coords; for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + coords.push_back( + GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); } const Node array = is_array ? GetRegister(array_register) : nullptr; // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); + std::vector<Node> aoffi; + if (aoffi_enabled) { + aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); + } + Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; + MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0c6ab0f075..1cd7c14d76 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -139,6 +139,10 @@ public: return uses_legacy_varyings; } + bool UsesYNegate() const { + return uses_y_negate; + } + bool UsesWarps() const { return uses_warps; } @@ -465,6 +469,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_legacy_varyings{}; + bool uses_y_negate{}; bool uses_warps{}; bool uses_indexed_samplers{}; diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index c3c71657d9..693e47158e 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <mutex> #include "video_core/shader_notify.h" using namespace std::chrono_literals; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d1080300f8..b1da69971d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -103,9 +103,6 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); - /// Return an unique mutually exclusive lock for the cache - [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); - /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -179,6 +176,8 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + std::mutex mutex; + private: /// Iterate over all page indices in a range template <typename Func> @@ -212,8 +211,8 @@ private: void RefreshContents(Image& image); /// Upload data from guest to an image - template <typename MapBuffer> - void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); + template <typename StagingBuffer> + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); /// Find or create an image view from a guest descriptor [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); @@ -325,8 +324,6 @@ private: RenderTargets render_targets; - std::mutex mutex; - std::unordered_map<TICEntry, ImageViewId> image_views; std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<RenderTargets, FramebufferId> framebuffers; @@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() { } template <class P> -std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { - return std::unique_lock{mutex}; -} - -template <class P> const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { return slot_image_views[id]; } @@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { }); for (const ImageId image_id : images) { Image& image = slot_images[image_id]; - auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, 0, copies); + image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } } @@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() { for (const ImageId image_id : download_ids) { total_size_bytes += slot_images[image_id].unswizzled_size_bytes; } - auto download_map = runtime.MapDownloadBuffer(total_size_bytes); - size_t buffer_offset = 0; + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; for (const ImageId image_id : download_ids) { Image& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, buffer_offset, copies); - buffer_offset += image.unswizzled_size_bytes; + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; } // Wait for downloads to finish runtime.Finish(); - buffer_offset = 0; - const std::span<u8> download_span = download_map.Span(); + download_map.offset = original_offset; + std::span<u8> download_span = download_map.mapped_span; for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - const std::span<u8> image_download_span = download_span.subspan(buffer_offset); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); - buffer_offset += image.unswizzled_size_bytes; + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); } committed_downloads.pop(); } @@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } - auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); - UploadImageContents(image, map, 0); + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); runtime.InsertUploadMemoryBarrier(); } template <class P> -template <typename MapBuffer> -void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { - const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); +template <typename StagingBuffer> +void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span<u8> mapped_span = staging.mapped_span; const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + runtime.AccelerateImageUpload(image, staging, uploads); } else if (True(image.flags & ImageFlagBits::Converted)) { std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else if (image.info.type == ImageType::Buffer) { const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(map, buffer_offset, copies); + image.UploadMemory(staging, copies); } } diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index bb2cdef81e..a0bc1f7b69 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -169,40 +169,6 @@ template <u32 GOB_EXTENT> return Common::DivCeil(AdjustMipSize(size, level), block_size); } -[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { - return config.Width() * config.Height() * BytesPerBlock(format); -} - -[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { - switch (type) { - case TextureType::Texture2D: - case TextureType::Texture2DArray: - case TextureType::Texture2DNoMipmap: - case TextureType::Texture3D: - case TextureType::TextureCubeArray: - case TextureType::TextureCubemap: - return true; - case TextureType::Texture1D: - case TextureType::Texture1DArray: - case TextureType::Texture1DBuffer: - return false; - } - return false; -} - -[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { - switch (type) { - case ImageType::e2D: - case ImageType::e3D: - case ImageType::Linear: - return true; - case ImageType::e1D: - case ImageType::Buffer: - return false; - } - UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); -} - [[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { switch (num_samples) { case 1: diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 53444e9455..e1b38c6acf 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -38,19 +38,18 @@ namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); - std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( - system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); - + const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); - const auto scope = context->Acquire(); - - auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); - if (!renderer->Init()) { + auto scope = context->Acquire(); + try { + auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); + gpu->BindRenderer(std::move(renderer)); + return gpu; + } catch (const std::runtime_error& exception) { + LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); return nullptr; } - - gpu->BindRenderer(std::move(renderer)); - return gpu; } u16 GetResolutionScaleFactor(const RendererBase& renderer) { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 51f53bc39d..34d3964340 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -18,27 +18,22 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - namespace { - namespace Alternatives { - -constexpr std::array Depth24UnormS8_UINT{ +constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT, - VkFormat{}, + VK_FORMAT_UNDEFINED, }; -constexpr std::array Depth16UnormS8_UINT{ +constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, - VkFormat{}, + VK_FORMAT_UNDEFINED, }; - } // namespace Alternatives constexpr std::array REQUIRED_EXTENSIONS{ - VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, @@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, + VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, +#ifdef _WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, +#endif +#ifdef __linux__ + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, +#endif }; template <typename T> @@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) { constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { switch (format) { case VK_FORMAT_D24_UNORM_S8_UINT: - return Alternatives::Depth24UnormS8_UINT.data(); + return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); case VK_FORMAT_D16_UNORM_S8_UINT: - return Alternatives::Depth16UnormS8_UINT.data(); + return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data(); default: return nullptr; } @@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, format_properties{GetFormatProperties(physical)} { - CheckSuitability(); + CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(); + const std::vector extensions = LoadExtensions(surface != nullptr); VkPhysicalDeviceFeatures2 features2{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = nullptr, - .features{}, + .features{ + .robustBufferAccess = true, + .fullDrawIndexUint32 = false, + .imageCubeArray = true, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = true, + .sampleRateShading = false, + .dualSrcBlend = false, + .logicOp = false, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = false, + .depthBounds = false, + .wideLines = false, + .largePoints = true, + .alphaToOne = false, + .multiViewport = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = false, + .textureCompressionASTC_LDR = is_optimal_astc_supported, + .textureCompressionBC = false, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = false, + .vertexPipelineStoresAndAtomics = true, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = false, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = is_shader_storage_image_multisample, + .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = false, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .shaderResourceResidency = false, + .shaderResourceMinLod = false, + .sparseBinding = false, + .sparseResidencyBuffer = false, + .sparseResidencyImage2D = false, + .sparseResidencyImage3D = false, + .sparseResidency2Samples = false, + .sparseResidency4Samples = false, + .sparseResidency8Samples = false, + .sparseResidency16Samples = false, + .sparseResidencyAliased = false, + .variableMultisampleRate = false, + .inheritedQueries = false, + }, }; const void* first_next = &features2; void** next = &features2.pNext; - features2.features = { - .robustBufferAccess = false, - .fullDrawIndexUint32 = false, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = false, - .dualSrcBlend = false, - .logicOp = false, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = false, - .depthBounds = false, - .wideLines = false, - .largePoints = true, - .alphaToOne = false, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = is_optimal_astc_supported, - .textureCompressionBC = false, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = true, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = is_shader_storage_image_multisample, - .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .shaderResourceResidency = false, - .shaderResourceMinLod = false, - .sparseBinding = false, - .sparseResidencyBuffer = false, - .sparseResidencyImage2D = false, - .sparseResidencyImage3D = false, - .sparseResidency2Samples = false, - .sparseResidency4Samples = false, - .sparseResidency8Samples = false, - .sparseResidency16Samples = false, - .sparseResidencyAliased = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, .pNext = nullptr, @@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } - VkPhysicalDeviceRobustness2FeaturesEXT robustness2; - if (ext_robustness2) { - robustness2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, - .pNext = nullptr, - .robustBufferAccess2 = false, - .robustImageAccess2 = true, - .nullDescriptor = true, - }; - SetNext(next, robustness2); - } else { - LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); - } - if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } -void Device::CheckSuitability() const { +void Device::CheckSuitability(bool requires_swapchain) const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; + bool has_swapchain = false; for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { - for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { + const std::string_view name{property.extensionName}; + for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { continue; } - const std::string_view name{property.extensionName}; available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; } + has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME; } for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { if (available_extensions[i]) { @@ -553,6 +542,11 @@ void Device::CheckSuitability() const { LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); } + if (requires_swapchain && !has_swapchain) { + LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain"); + throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); + } + struct LimitTuple { u32 minimum; u32 value; @@ -572,9 +566,20 @@ void Device::CheckSuitability() const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } - const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; + robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + + VkPhysicalDeviceFeatures2 features2{}; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + features2.pNext = &robustness2; + + physical.GetFeatures2KHR(features2); + + const VkPhysicalDeviceFeatures& features{features2.features}; const std::array feature_report{ + std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -589,6 +594,9 @@ void Device::CheckSuitability() const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), + std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), + std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), }; for (const auto& [is_supported, name] : feature_report) { if (is_supported) { @@ -599,17 +607,19 @@ void Device::CheckSuitability() const { } } -std::vector<const char*> Device::LoadExtensions() { +std::vector<const char*> Device::LoadExtensions(bool requires_surface) { std::vector<const char*> extensions; - extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); + extensions.reserve(8 + REQUIRED_EXTENSIONS.size()); extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); + if (requires_surface) { + extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + } bool has_khr_shader_float16_int8{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; - bool has_ext_robustness2{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, bool push) { @@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() { test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); - test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } } - VkPhysicalDeviceFeatures2KHR features; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; @@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() { is_float16_supported = float16_int8_features.shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } - if (has_ext_subgroup_size_control) { VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; subgroup_features.sType = @@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() { } else { is_warp_potentially_bigger = true; } - if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; @@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() { ext_transform_feedback = true; } } - if (has_ext_custom_border_color) { VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; @@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() { ext_custom_border_color = true; } } - if (has_ext_extended_dynamic_state) { VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; @@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() { ext_extended_dynamic_state = true; } } - - if (has_ext_robustness2) { - VkPhysicalDeviceRobustness2FeaturesEXT robustness2; - robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = nullptr; - features.pNext = &robustness2; - physical.GetFeatures2KHR(features); - if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { - extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); - ext_robustness2 = true; - } - } - return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4b66dba7a8..67d70cd224 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer }; const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. -class Device final { +class Device { public: explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, const vk::InstanceDispatch& dld); @@ -227,10 +227,10 @@ public: private: /// Checks if the physical device is suitable. - void CheckSuitability() const; + void CheckSuitability(bool requires_swapchain) const; /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(); + std::vector<const char*> LoadExtensions(bool requires_surface); /// Sets up queue families. void SetupFamilies(VkSurfaceKHR surface); @@ -285,7 +285,6 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 889ecda0c6..bfd6e6addb 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <future> #include <optional> #include <span> #include <utility> @@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } - vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); + vk::Instance instance = + std::async([&] { + return vk::Instance::Create(required_version, layers, extensions, dld); + }).get(); if (!vk::Load(*instance, dld)) { LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index d6eb3af317..2a8b7a907e 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -7,6 +7,8 @@ #include <optional> #include <vector> +#include <glad/glad.h> + #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" @@ -55,10 +57,24 @@ struct Range { class MemoryAllocation { public: - explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, - VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) - : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, - property_flags{properties}, shifted_memory_type{1U << type} {} + explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, + u64 allocation_size_, u32 type) + : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, + shifted_memory_type{1U << type} {} + +#if defined(_WIN32) || defined(__linux__) + ~MemoryAllocation() { + if (owning_opengl_handle != 0) { + glDeleteMemoryObjectsEXT(1, &owning_opengl_handle); + } + } +#endif + + MemoryAllocation& operator=(const MemoryAllocation&) = delete; + MemoryAllocation(const MemoryAllocation&) = delete; + + MemoryAllocation& operator=(MemoryAllocation&&) = delete; + MemoryAllocation(MemoryAllocation&&) = delete; [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { const std::optional<u64> alloc = FindFreeRegion(size, alignment); @@ -88,6 +104,31 @@ public: return memory_mapped_span; } +#ifdef _WIN32 + [[nodiscard]] u32 ExportOpenGLHandle() { + if (!owning_opengl_handle) { + glCreateMemoryObjectsEXT(1, &owning_opengl_handle); + glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size, + GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, + memory.GetMemoryWin32HandleKHR()); + } + return owning_opengl_handle; + } +#elif __linux__ + [[nodiscard]] u32 ExportOpenGLHandle() { + if (!owning_opengl_handle) { + glCreateMemoryObjectsEXT(1, &owning_opengl_handle); + glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, + memory.GetMemoryFdKHR()); + } + return owning_opengl_handle; + } +#else + [[nodiscard]] u32 ExportOpenGLHandle() { + return 0; + } +#endif + /// Returns whether this allocation is compatible with the arguments. [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; @@ -118,13 +159,15 @@ private: return candidate; } - const Device& device; ///< Vulkan device. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const u64 allocation_size; ///< Size of this allocation. const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. const u32 shifted_memory_type; ///< Shifted Vulkan memory type. std::vector<Range> commits; ///< All commit ranges done from this allocation. std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. +#if defined(_WIN32) || defined(__linux__) + u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle. +#endif }; MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, @@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() { return span; } +u32 MemoryCommit::ExportOpenGLHandle() const { + return allocation->ExportOpenGLHandle(); +} + void MemoryCommit::Release() { if (allocation) { allocation->Free(begin); } } -MemoryAllocator::MemoryAllocator(const Device& device_) - : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} +MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) + : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, + export_allocations{export_allocations_} {} MemoryAllocator::~MemoryAllocator() = default; @@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { const u32 type = FindType(flags, type_mask).value(); + const VkExportMemoryAllocateInfo export_allocate_info{ + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, +#ifdef _WIN32 + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, +#elif __linux__ + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, +#else + .handleTypes = 0, +#endif + }; vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, + .pNext = export_allocations ? &export_allocate_info : nullptr, .allocationSize = size, .memoryTypeIndex = type, }); - allocations.push_back( - std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type)); + allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); } std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index 9e6cfabf96..d1ce294504 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -43,6 +43,9 @@ public: /// It will map the backing allocation if it hasn't been mapped before. std::span<u8> Map(); + /// Returns an non-owning OpenGL handle, creating one if it doesn't exist. + u32 ExportOpenGLHandle() const; + /// Returns the Vulkan memory handler. VkDeviceMemory Memory() const { return memory; @@ -67,7 +70,15 @@ private: /// Allocates and releases memory allocations on demand. class MemoryAllocator { public: - explicit MemoryAllocator(const Device& device_); + /** + * Construct memory allocator + * + * @param device_ Device to allocate from + * @param export_allocations_ True when allocations have to be exported + * + * @throw vk::Exception on failure + */ + explicit MemoryAllocator(const Device& device_, bool export_allocations_); ~MemoryAllocator(); MemoryAllocator& operator=(const MemoryAllocator&) = delete; @@ -106,8 +117,9 @@ private: /// Returns index to the fastest memory type compatible with the passed requirements. std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; - const Device& device; ///< Device handle. - const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const Device& device; ///< Device handle. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const bool export_allocations; ///< True when memory allocations have to be exported. std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. }; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 5e15ad6075..2aa0ffbe65 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkFreeCommandBuffers); X(vkFreeDescriptorSets); X(vkFreeMemory); - X(vkGetBufferMemoryRequirements); + X(vkGetBufferMemoryRequirements2); X(vkGetDeviceQueue); X(vkGetEventStatus); X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); + X(vkGetMemoryFdKHR); +#ifdef _WIN32 + X(vkGetMemoryWin32HandleKHR); +#endif X(vkGetQueryPoolResults); X(vkGetSemaphoreCounterValueKHR); X(vkMapMemory); @@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); } +int DeviceMemory::GetMemoryFdKHR() const { + const VkMemoryGetFdInfoKHR get_fd_info{ + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .pNext = nullptr, + .memory = handle, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + int fd; + Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd)); + return fd; +} + +#ifdef _WIN32 +HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const { + const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{ + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .pNext = nullptr, + .memory = handle, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, + }; + HANDLE win32_handle; + Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle)); + return win32_handle; +} +#endif + void DeviceMemory::SetObjectNameEXT(const char* name) const { SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); } @@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { return DeviceMemory(memory, handle, *dld); } -VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { - VkMemoryRequirements requirements; - dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); - return requirements; +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext) const noexcept { + const VkBufferMemoryRequirementsInfo2 info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + .pNext = nullptr, + .buffer = buffer, + }; + VkMemoryRequirements2 requirements{ + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = pnext, + .memoryRequirements{}, + }; + dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); + return requirements.memoryRequirements; } VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 9689de0cb4..3e36d356a3 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -15,8 +15,19 @@ #include <vector> #define VK_NO_PROTOTYPES +#ifdef _WIN32 +#define VK_USE_PLATFORM_WIN32_KHR +#endif #include <vulkan/vulkan.h> +// Sanitize macros +#ifdef CreateEvent +#undef CreateEvent +#endif +#ifdef CreateSemaphore +#undef CreateSemaphore +#endif + #include "common/common_types.h" #ifdef _MSC_VER @@ -174,7 +185,7 @@ struct InstanceDispatch { }; /// Table holding Vulkan device function pointers. -struct DeviceDispatch : public InstanceDispatch { +struct DeviceDispatch : InstanceDispatch { PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; @@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; PFN_vkFreeMemory vkFreeMemory{}; - PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; + PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; PFN_vkGetDeviceQueue vkGetDeviceQueue{}; PFN_vkGetEventStatus vkGetEventStatus{}; PFN_vkGetFenceStatus vkGetFenceStatus{}; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; + PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; +#ifdef _WIN32 + PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; +#endif PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; PFN_vkMapMemory vkMapMemory{}; @@ -344,6 +359,9 @@ public: /// Construct an empty handle. Handle() = default; + /// Construct an empty handle. + Handle(std::nullptr_t) {} + /// Copying Vulkan objects is not supported and will never be. Handle(const Handle&) = delete; Handle& operator=(const Handle&) = delete; @@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; public: + int GetMemoryFdKHR() const; + +#ifdef _WIN32 + HANDLE GetMemoryWin32HandleKHR() const; +#endif + /// Set object name. void SetObjectNameEXT(const char* name) const; @@ -847,7 +871,8 @@ public: DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; - VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, + void* pnext = nullptr) const noexcept; VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; @@ -1033,6 +1058,12 @@ public: void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkDependencyFlags dependency_flags, + const VkMemoryBarrier& memory_barrier) const noexcept { + PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {}); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, const VkBufferMemoryBarrier& buffer_barrier) const noexcept { PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); } diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index fb9967c8f2..b025ced1c8 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -151,6 +151,7 @@ add_executable(yuzu util/util.h compatdb.cpp compatdb.h + yuzu.qrc yuzu.rc ) diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp index c680fd2c21..b92cd6886f 100644 --- a/src/yuzu/applets/controller.cpp +++ b/src/yuzu/applets/controller.cpp @@ -67,6 +67,8 @@ bool IsControllerCompatible(Settings::ControllerType controller_type, return parameters.allow_right_joycon; case Settings::ControllerType::Handheld: return parameters.enable_single_mode && parameters.allow_handheld; + case Settings::ControllerType::GameCube: + return parameters.allow_gamecube_controller; default: return false; } @@ -370,7 +372,7 @@ void QtControllerSelectorDialog::SetSupportedControllers() { QStringLiteral("image: url(:/controller/applet_joycon_right%0_disabled); ").arg(theme)); } - if (parameters.allow_pro_controller) { + if (parameters.allow_pro_controller || parameters.allow_gamecube_controller) { ui->controllerSupported5->setStyleSheet( QStringLiteral("image: url(:/controller/applet_pro_controller%0); ").arg(theme)); } else { @@ -420,6 +422,10 @@ void QtControllerSelectorDialog::SetEmulatedControllers(std::size_t player_index Settings::ControllerType::Handheld); emulated_controllers[player_index]->addItem(tr("Handheld")); } + + pairs.emplace_back(emulated_controllers[player_index]->count(), + Settings::ControllerType::GameCube); + emulated_controllers[player_index]->addItem(tr("GameCube Controller")); } Settings::ControllerType QtControllerSelectorDialog::GetControllerTypeFromIndex( @@ -461,6 +467,7 @@ void QtControllerSelectorDialog::UpdateControllerIcon(std::size_t player_index) switch (GetControllerTypeFromIndex(emulated_controllers[player_index]->currentIndex(), player_index)) { case Settings::ControllerType::ProController: + case Settings::ControllerType::GameCube: return QStringLiteral("image: url(:/controller/applet_pro_controller%0); "); case Settings::ControllerType::DualJoyconDetached: return QStringLiteral("image: url(:/controller/applet_dual_joycon%0); "); diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index ffdf34a4a0..1c61d419d1 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -64,7 +64,7 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); - system.Renderer().Rasterizer().LoadDiskResources( + system.Renderer().ReadRasterizer()->LoadDiskResources( system.CurrentProcess()->GetTitleID(), stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { emit LoadProgress(stage, value, total); @@ -405,12 +405,17 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { if (event->source() == Qt::MouseEventSynthesizedBySystem) { return; } - auto pos = event->pos(); const auto [x, y] = ScaleTouch(pos); - input_subsystem->GetMouse()->MouseMove(x, y); + const int center_x = width() / 2; + const int center_y = height() / 2; + input_subsystem->GetMouse()->MouseMove(x, y, center_x, center_y); this->TouchMoved(x, y, 0); + if (Settings::values.mouse_panning) { + QCursor::setPos(mapToGlobal({center_x, center_y})); + } + emit MouseActivity(); } @@ -714,6 +719,11 @@ void GRenderWindow::showEvent(QShowEvent* event) { bool GRenderWindow::eventFilter(QObject* object, QEvent* event) { if (event->type() == QEvent::HoverMove) { + if (Settings::values.mouse_panning) { + auto* hover_event = static_cast<QMouseEvent*>(event); + mouseMoveEvent(hover_event); + return false; + } emit MouseActivity(); } return false; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 8d85a1986e..3d6f643008 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -220,7 +220,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default // This must be in alphabetical order according to action name as it must have the same order as // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off -const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{ +const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{ {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, @@ -235,6 +235,7 @@ const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{ {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, + {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("F9"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}}, }}; @@ -507,6 +508,9 @@ void Config::ReadControlValues() { Settings::values.emulate_analog_keyboard = ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); + Settings::values.mouse_panning = ReadSetting(QStringLiteral("mouse_panning"), false).toBool(); + Settings::values.mouse_panning_sensitivity = + ReadSetting(QStringLiteral("mouse_panning_sensitivity"), 1).toFloat(); ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true); ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), @@ -610,12 +614,6 @@ void Config::ReadDataStorageValues() { QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir))) .toString() .toStdString()); - FS::GetUserPath(FS::UserPath::CacheDir, - qt_config - ->value(QStringLiteral("cache_directory"), - QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir))) - .toString() - .toStdString()); Settings::values.gamecard_inserted = ReadSetting(QStringLiteral("gamecard_inserted"), false).toBool(); Settings::values.gamecard_current_game = @@ -778,14 +776,14 @@ void Config::ReadRendererValues() { ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); ReadSettingGlobal(Settings::values.use_disk_shader_cache, QStringLiteral("use_disk_shader_cache"), true); - ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); + ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1); ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, QStringLiteral("use_asynchronous_gpu_emulation"), true); ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), true); ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), - true); + false); ReadSettingGlobal(Settings::values.use_asynchronous_shaders, QStringLiteral("use_asynchronous_shaders"), false); ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), @@ -1184,7 +1182,9 @@ void Config::SaveControlValues() { WriteSetting(QStringLiteral("keyboard_enabled"), Settings::values.keyboard_enabled, false); WriteSetting(QStringLiteral("emulate_analog_keyboard"), Settings::values.emulate_analog_keyboard, false); - + WriteSetting(QStringLiteral("mouse_panning"), Settings::values.mouse_panning, false); + WriteSetting(QStringLiteral("mouse_panning_sensitivity"), + Settings::values.mouse_panning_sensitivity, 1.0f); qt_config->endGroup(); } @@ -1212,9 +1212,6 @@ void Config::SaveDataStorageValues() { WriteSetting(QStringLiteral("dump_directory"), QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir)), QString::fromStdString(FS::GetUserPath(FS::UserPath::DumpDir))); - WriteSetting(QStringLiteral("cache_directory"), - QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir)), - QString::fromStdString(FS::GetUserPath(FS::UserPath::CacheDir))); WriteSetting(QStringLiteral("gamecard_inserted"), Settings::values.gamecard_inserted, false); WriteSetting(QStringLiteral("gamecard_current_game"), Settings::values.gamecard_current_game, false); @@ -1345,14 +1342,14 @@ void Config::SaveRendererValues() { Settings::values.use_disk_shader_cache, true); WriteSettingGlobal(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), - Settings::values.gpu_accuracy.UsingGlobal(), 0); + Settings::values.gpu_accuracy.UsingGlobal(), 1); WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), Settings::values.use_asynchronous_gpu_emulation, true); WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, true); WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), - Settings::values.use_assembly_shaders, true); + Settings::values.use_assembly_shaders, false); WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), Settings::values.use_asynchronous_shaders, false); WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 8a600e19d5..949c4eb134 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -42,7 +42,7 @@ public: default_mouse_buttons; static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; - static const std::array<UISettings::Shortcut, 16> default_hotkeys; + static const std::array<UISettings::Shortcut, 17> default_hotkeys; private: void Initialize(const std::string& config_name); diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp index 7ab4a80f73..bde2d46202 100644 --- a/src/yuzu/configuration/configure_filesystem.cpp +++ b/src/yuzu/configuration/configure_filesystem.cpp @@ -26,8 +26,6 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent) [this] { SetDirectory(DirectoryTarget::Dump, ui->dump_path_edit); }); connect(ui->load_path_button, &QToolButton::pressed, this, [this] { SetDirectory(DirectoryTarget::Load, ui->load_path_edit); }); - connect(ui->cache_directory_button, &QToolButton::pressed, this, - [this] { SetDirectory(DirectoryTarget::Cache, ui->cache_directory_edit); }); connect(ui->reset_game_list_cache, &QPushButton::pressed, this, &ConfigureFilesystem::ResetMetadata); @@ -50,8 +48,6 @@ void ConfigureFilesystem::setConfiguration() { QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::DumpDir))); ui->load_path_edit->setText( QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::LoadDir))); - ui->cache_directory_edit->setText( - QString::fromStdString(Common::FS::GetUserPath(Common::FS::UserPath::CacheDir))); ui->gamecard_inserted->setChecked(Settings::values.gamecard_inserted); ui->gamecard_current_game->setChecked(Settings::values.gamecard_current_game); @@ -72,9 +68,6 @@ void ConfigureFilesystem::applyConfiguration() { ui->dump_path_edit->text().toStdString()); Common::FS::GetUserPath(Common::FS::UserPath::LoadDir, ui->load_path_edit->text().toStdString()); - Common::FS::GetUserPath(Common::FS::UserPath::CacheDir, - ui->cache_directory_edit->text().toStdString()); - Settings::values.gamecard_path = ui->gamecard_path_edit->text().toStdString(); Settings::values.gamecard_inserted = ui->gamecard_inserted->isChecked(); Settings::values.gamecard_current_game = ui->gamecard_current_game->isChecked(); @@ -103,9 +96,6 @@ void ConfigureFilesystem::SetDirectory(DirectoryTarget target, QLineEdit* edit) case DirectoryTarget::Load: caption = tr("Select Mod Load Directory..."); break; - case DirectoryTarget::Cache: - caption = tr("Select Cache Directory..."); - break; } QString str; diff --git a/src/yuzu/configuration/configure_filesystem.h b/src/yuzu/configuration/configure_filesystem.h index a793037604..2147cd4050 100644 --- a/src/yuzu/configuration/configure_filesystem.h +++ b/src/yuzu/configuration/configure_filesystem.h @@ -32,7 +32,6 @@ private: Gamecard, Dump, Load, - Cache, }; void SetDirectory(DirectoryTarget target, QLineEdit* edit); diff --git a/src/yuzu/configuration/configure_filesystem.ui b/src/yuzu/configuration/configure_filesystem.ui index 84bea06001..62b9abc7ab 100644 --- a/src/yuzu/configuration/configure_filesystem.ui +++ b/src/yuzu/configuration/configure_filesystem.ui @@ -198,40 +198,7 @@ <string>Caching</string> </property> <layout class="QGridLayout" name="gridLayout_5"> - <item row="0" column="0"> - <widget class="QLabel" name="label_10"> - <property name="text"> - <string>Cache Directory</string> - </property> - </widget> - </item> - <item row="0" column="1"> - <spacer name="horizontalSpacer_3"> - <property name="orientation"> - <enum>Qt::Horizontal</enum> - </property> - <property name="sizeType"> - <enum>QSizePolicy::Fixed</enum> - </property> - <property name="sizeHint" stdset="0"> - <size> - <width>40</width> - <height>20</height> - </size> - </property> - </spacer> - </item> - <item row="0" column="2"> - <widget class="QLineEdit" name="cache_directory_edit"/> - </item> - <item row="0" column="3"> - <widget class="QToolButton" name="cache_directory_button"> - <property name="text"> - <string>...</string> - </property> - </widget> - </item> - <item row="1" column="0" colspan="4"> + <item row="0" column="0" colspan="2"> <layout class="QHBoxLayout" name="horizontalLayout_2"> <item> <widget class="QCheckBox" name="cache_game_list"> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index b78a5dff0c..9ff32aec4d 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -2,6 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +// Include this early to include Vulkan headers how we want to +#include "video_core/vulkan_common/vulkan_wrapper.h" + #include <QColorDialog> #include <QComboBox> #include <QVulkanInstance> @@ -11,7 +14,8 @@ #include "core/core.h" #include "core/settings.h" #include "ui_configure_graphics.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_library.h" #include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_graphics.h" @@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() { ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } -void ConfigureGraphics::RetrieveVulkanDevices() { +void ConfigureGraphics::RetrieveVulkanDevices() try { + using namespace Vulkan; + + vk::InstanceDispatch dld; + const Common::DynamicLibrary library = OpenLibrary(); + const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); + const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); + vulkan_devices.clear(); - for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { + vulkan_devices.reserve(physical_devices.size()); + for (const VkPhysicalDevice device : physical_devices) { + const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName; vulkan_devices.push_back(QString::fromStdString(name)); } + +} catch (const Vulkan::vk::Exception& exception) { + LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); } Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { diff --git a/src/yuzu/configuration/configure_input_advanced.cpp b/src/yuzu/configuration/configure_input_advanced.cpp index 4e557bc6f2..a1a0eb676c 100644 --- a/src/yuzu/configuration/configure_input_advanced.cpp +++ b/src/yuzu/configuration/configure_input_advanced.cpp @@ -122,6 +122,9 @@ void ConfigureInputAdvanced::ApplyConfiguration() { Settings::values.mouse_enabled = ui->mouse_enabled->isChecked(); Settings::values.keyboard_enabled = ui->keyboard_enabled->isChecked(); Settings::values.emulate_analog_keyboard = ui->emulate_analog_keyboard->isChecked(); + Settings::values.mouse_panning = ui->mouse_panning->isChecked(); + Settings::values.mouse_panning_sensitivity = + static_cast<float>(ui->mouse_panning_sensitivity->value()); Settings::values.touchscreen.enabled = ui->touchscreen_enabled->isChecked(); } @@ -149,6 +152,8 @@ void ConfigureInputAdvanced::LoadConfiguration() { ui->mouse_enabled->setChecked(Settings::values.mouse_enabled); ui->keyboard_enabled->setChecked(Settings::values.keyboard_enabled); ui->emulate_analog_keyboard->setChecked(Settings::values.emulate_analog_keyboard); + ui->mouse_panning->setChecked(Settings::values.mouse_panning); + ui->mouse_panning_sensitivity->setValue(Settings::values.mouse_panning_sensitivity); ui->touchscreen_enabled->setChecked(Settings::values.touchscreen.enabled); UpdateUIEnabled(); diff --git a/src/yuzu/configuration/configure_input_advanced.ui b/src/yuzu/configuration/configure_input_advanced.ui index f207e5d3be..173130d8db 100644 --- a/src/yuzu/configuration/configure_input_advanced.ui +++ b/src/yuzu/configuration/configure_input_advanced.ui @@ -2546,27 +2546,65 @@ </property> </widget> </item> - <item row="1" column="0"> - <widget class="QCheckBox" name="emulate_analog_keyboard"> - <property name="minimumSize"> - <size> - <width>0</width> - <height>23</height> - </size> - </property> - <property name="text"> - <string>Emulate Analog with Keyboard Input</string> - </property> - </widget> - </item> - <item row="5" column="2"> + <item row="1" column="0"> + <widget class="QCheckBox" name="emulate_analog_keyboard"> + <property name="minimumSize"> + <size> + <width>0</width> + <height>23</height> + </size> + </property> + <property name="text"> + <string>Emulate Analog with Keyboard Input</string> + </property> + </widget> + </item> + <item row="2" column="0"> + <widget class="QCheckBox" name="mouse_panning"> + <property name="minimumSize"> + <size> + <width>0</width> + <height>23</height> + </size> + </property> + <property name="text"> + <string>Enable mouse panning</string> + </property> + </widget> + </item> + <item row="2" column="2"> + <widget class="QDoubleSpinBox" name="mouse_panning_sensitivity"> + <property name="toolTip"> + <string>Mouse sensitivity</string> + </property> + <property name="alignment"> + <set>Qt::AlignCenter</set> + </property> + <property name="decimals"> + <number>2</number> + </property> + <property name="minimum"> + <double>0.100000000000000</double> + </property> + <property name="maximum"> + <double>16.000000000000000</double> + </property> + <property name="singleStep"> + <double>0.010000000000000</double> + </property> + <property name="value"> + <double>1.000000000000000</double> + </property> + </widget> + </item> + <item row="6" column="2"> <widget class="QPushButton" name="touchscreen_advanced"> <property name="text"> <string>Advanced</string> </property> </widget> </item> - <item row="2" column="1"> + <item row="3" column="1"> <spacer name="horizontalSpacer_8"> <property name="orientation"> <enum>Qt::Horizontal</enum> @@ -2582,21 +2620,21 @@ </property> </spacer> </item> - <item row="2" column="2"> + <item row="3" column="2"> <widget class="QPushButton" name="mouse_advanced"> <property name="text"> <string>Advanced</string> </property> </widget> </item> - <item row="5" column="0"> + <item row="6" column="0"> <widget class="QCheckBox" name="touchscreen_enabled"> <property name="text"> <string>Touchscreen</string> </property> </widget> </item> - <item row="2" column="0"> + <item row="3" column="0"> <widget class="QCheckBox" name="mouse_enabled"> <property name="minimumSize"> <size> @@ -2609,28 +2647,28 @@ </property> </widget> </item> - <item row="7" column="0"> + <item row="8" column="0"> <widget class="QLabel" name="motion_touch"> <property name="text"> <string>Motion / Touch</string> </property> </widget> </item> - <item row="7" column="2"> + <item row="8" column="2"> <widget class="QPushButton" name="buttonMotionTouch"> <property name="text"> <string>Configure</string> </property> </widget> </item> - <item row="6" column="0"> + <item row="7" column="0"> <widget class="QCheckBox" name="debug_enabled"> <property name="text"> <string>Debug Controller</string> </property> </widget> </item> - <item row="6" column="2"> + <item row="7" column="2"> <widget class="QPushButton" name="debug_configure"> <property name="text"> <string>Configure</string> diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index c9d19c948c..21d0d34499 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -467,10 +467,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i UpdateControllerIcon(); UpdateControllerAvailableButtons(); + UpdateControllerEnabledButtons(); + UpdateControllerButtonNames(); UpdateMotionButtons(); connect(ui->comboControllerType, qOverload<int>(&QComboBox::currentIndexChanged), [this](int) { UpdateControllerIcon(); UpdateControllerAvailableButtons(); + UpdateControllerEnabledButtons(); + UpdateControllerButtonNames(); UpdateMotionButtons(); }); @@ -558,9 +562,6 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i &ConfigureInputPlayer::SaveProfile); LoadConfiguration(); - - // TODO(wwylele): enable this when we actually emulate it - ui->buttonHome->setEnabled(false); ui->controllerFrame->SetPlayerInput(player_index, buttons_param, analogs_param); ui->controllerFrame->SetConnectedStatus(ui->groupConnectedController->isChecked()); } @@ -924,6 +925,12 @@ void ConfigureInputPlayer::SetConnectableControllers() { Settings::ControllerType::Handheld); ui->comboControllerType->addItem(tr("Handheld")); } + + if (enable_all || npad_style_set.gamecube == 1) { + index_controller_type_pairs.emplace_back(ui->comboControllerType->count(), + Settings::ControllerType::GameCube); + ui->comboControllerType->addItem(tr("GameCube Controller")); + } }; Core::System& system{Core::System::GetInstance()}; @@ -1014,7 +1021,7 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() { // List of all the widgets that will be hidden by any of the following layouts that need // "unhidden" after the controller type changes - const std::array<QWidget*, 9> layout_show = { + const std::array<QWidget*, 11> layout_show = { ui->buttonShoulderButtonsSLSR, ui->horizontalSpacerShoulderButtonsWidget, ui->horizontalSpacerShoulderButtonsWidget2, @@ -1024,6 +1031,8 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() { ui->buttonShoulderButtonsRight, ui->buttonMiscButtonsPlusHome, ui->bottomRight, + ui->buttonMiscButtonsMinusGroup, + ui->buttonMiscButtonsScreenshotGroup, }; for (auto* widget : layout_show) { @@ -1056,6 +1065,14 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() { ui->bottomLeft, }; break; + case Settings::ControllerType::GameCube: + layout_hidden = { + ui->buttonShoulderButtonsSLSR, + ui->horizontalSpacerShoulderButtonsWidget2, + ui->buttonMiscButtonsMinusGroup, + ui->buttonMiscButtonsScreenshotGroup, + }; + break; } for (auto* widget : layout_hidden) { @@ -1063,6 +1080,52 @@ void ConfigureInputPlayer::UpdateControllerAvailableButtons() { } } +void ConfigureInputPlayer::UpdateControllerEnabledButtons() { + auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); + if (debug) { + layout = Settings::ControllerType::ProController; + } + + // List of all the widgets that will be disabled by any of the following layouts that need + // "enabled" after the controller type changes + const std::array<QWidget*, 4> layout_enable = { + ui->buttonHome, + ui->buttonLStickPressedGroup, + ui->groupRStickPressed, + ui->buttonShoulderButtonsButtonLGroup, + }; + + for (auto* widget : layout_enable) { + widget->setEnabled(true); + } + + std::vector<QWidget*> layout_disable; + switch (layout) { + case Settings::ControllerType::ProController: + case Settings::ControllerType::DualJoyconDetached: + case Settings::ControllerType::Handheld: + case Settings::ControllerType::LeftJoycon: + case Settings::ControllerType::RightJoycon: + // TODO(wwylele): enable this when we actually emulate it + layout_disable = { + ui->buttonHome, + }; + break; + case Settings::ControllerType::GameCube: + layout_disable = { + ui->buttonHome, + ui->buttonLStickPressedGroup, + ui->groupRStickPressed, + ui->buttonShoulderButtonsButtonLGroup, + }; + break; + } + + for (auto* widget : layout_disable) { + widget->setEnabled(false); + } +} + void ConfigureInputPlayer::UpdateMotionButtons() { if (debug) { // Motion isn't used with the debug controller, hide both groupboxes. @@ -1085,6 +1148,11 @@ void ConfigureInputPlayer::UpdateMotionButtons() { ui->buttonMotionLeftGroup->hide(); ui->buttonMotionRightGroup->show(); break; + case Settings::ControllerType::GameCube: + // Hide both "Motion 1/2". + ui->buttonMotionLeftGroup->hide(); + ui->buttonMotionRightGroup->hide(); + break; case Settings::ControllerType::DualJoyconDetached: default: // Show both "Motion 1/2". @@ -1094,6 +1162,36 @@ void ConfigureInputPlayer::UpdateMotionButtons() { } } +void ConfigureInputPlayer::UpdateControllerButtonNames() { + auto layout = GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); + if (debug) { + layout = Settings::ControllerType::ProController; + } + + switch (layout) { + case Settings::ControllerType::ProController: + case Settings::ControllerType::DualJoyconDetached: + case Settings::ControllerType::Handheld: + case Settings::ControllerType::LeftJoycon: + case Settings::ControllerType::RightJoycon: + ui->buttonMiscButtonsPlusGroup->setTitle(tr("Plus")); + ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("ZL")); + ui->buttonShoulderButtonsZRGroup->setTitle(tr("ZR")); + ui->buttonShoulderButtonsRGroup->setTitle(tr("R")); + ui->LStick->setTitle(tr("Left Stick")); + ui->RStick->setTitle(tr("Right Stick")); + break; + case Settings::ControllerType::GameCube: + ui->buttonMiscButtonsPlusGroup->setTitle(tr("Start / Pause")); + ui->buttonShoulderButtonsButtonZLGroup->setTitle(tr("L")); + ui->buttonShoulderButtonsZRGroup->setTitle(tr("R")); + ui->buttonShoulderButtonsRGroup->setTitle(tr("Z")); + ui->LStick->setTitle(tr("Control Stick")); + ui->RStick->setTitle(tr("C-Stick")); + break; + } +} + void ConfigureInputPlayer::UpdateMappingWithDefaults() { if (ui->comboDevices->currentIndex() == 0) { return; diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h index da2b891368..efe953fbc9 100644 --- a/src/yuzu/configuration/configure_input_player.h +++ b/src/yuzu/configuration/configure_input_player.h @@ -143,9 +143,15 @@ private: /// Hides and disables controller settings based on the current controller type. void UpdateControllerAvailableButtons(); + /// Disables controller settings based on the current controller type. + void UpdateControllerEnabledButtons(); + /// Shows or hides motion groupboxes based on the current controller type. void UpdateMotionButtons(); + /// Alters the button names based on the current controller type. + void UpdateControllerButtonNames(); + /// Gets the default controller mapping for this device and auto configures the input to match. void UpdateMappingWithDefaults(); diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp index e3e8bde486..61ba91cefb 100644 --- a/src/yuzu/configuration/configure_input_player_widget.cpp +++ b/src/yuzu/configuration/configure_input_player_widget.cpp @@ -37,7 +37,8 @@ void PlayerControlPreview::SetPlayerInput(std::size_t index, const ButtonParam& Input::CreateDevice<Input::AnalogDevice>); UpdateColors(); } -void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, +void PlayerControlPreview::SetPlayerInputRaw(std::size_t index, + const Settings::ButtonsRaw& buttons_, Settings::AnalogsRaw analogs_) { player_index = index; std::transform(buttons_.begin() + Settings::NativeButton::BUTTON_HID_BEGIN, @@ -226,6 +227,9 @@ void PlayerControlPreview::paintEvent(QPaintEvent* event) { case Settings::ControllerType::RightJoycon: DrawRightController(p, center); break; + case Settings::ControllerType::GameCube: + DrawGCController(p, center); + break; case Settings::ControllerType::ProController: default: DrawProController(p, center); @@ -517,14 +521,15 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center) { // Draw joysticks using namespace Settings::NativeAnalog; - DrawJoystick(p, center + QPointF(-65, -65) + (axis_values[LStick].value * 7), 1.62f, - button_values[Settings::NativeButton::LStick]); - DrawJoystick(p, center + QPointF(65, 12) + (axis_values[RStick].value * 7), 1.62f, - button_values[Settings::NativeButton::RStick]); - DrawRawJoystick(p, center + QPointF(-180, 90), axis_values[LStick].raw_value, - axis_values[LStick].properties); - DrawRawJoystick(p, center + QPointF(180, 90), axis_values[RStick].raw_value, - axis_values[RStick].properties); + const auto& l_stick = axis_values[LStick]; + const auto l_button = button_values[Settings::NativeButton::LStick]; + const auto& r_stick = axis_values[RStick]; + const auto r_button = button_values[Settings::NativeButton::RStick]; + + DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button); + DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button); + DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties); + DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties); } using namespace Settings::NativeButton; @@ -603,14 +608,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen { // Draw joysticks using namespace Settings::NativeAnalog; - DrawJoystick(p, center + QPointF(-171, -41) + (axis_values[LStick].value * 4), 1.0f, - button_values[Settings::NativeButton::LStick]); - DrawJoystick(p, center + QPointF(171, 8) + (axis_values[RStick].value * 4), 1.0f, - button_values[Settings::NativeButton::RStick]); - DrawRawJoystick(p, center + QPointF(-50, 0), axis_values[LStick].raw_value, - axis_values[LStick].properties); - DrawRawJoystick(p, center + QPointF(50, 0), axis_values[RStick].raw_value, - axis_values[RStick].properties); + const auto& l_stick = axis_values[LStick]; + const auto l_button = button_values[Settings::NativeButton::LStick]; + const auto& r_stick = axis_values[RStick]; + const auto r_button = button_values[Settings::NativeButton::RStick]; + + DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button); + DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button); + DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties); + DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties); } using namespace Settings::NativeButton; @@ -699,9 +705,9 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center) { // Draw joysticks using namespace Settings::NativeAnalog; - DrawProJoystick(p, center + QPointF(-111, -55) + (axis_values[LStick].value * 11), + DrawProJoystick(p, center + QPointF(-111, -55), axis_values[LStick].value, 11, button_values[Settings::NativeButton::LStick]); - DrawProJoystick(p, center + QPointF(51, 0) + (axis_values[RStick].value * 11), + DrawProJoystick(p, center + QPointF(51, 0), axis_values[RStick].value, 11, button_values[Settings::NativeButton::RStick]); DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value, axis_values[LStick].properties); @@ -1002,12 +1008,6 @@ constexpr std::array<float, 3 * 2> up_arrow_symbol = { 0.0f, -3.0f, -3.0f, 2.0f, 3.0f, 2.0f, }; -constexpr std::array<float, 13 * 2> up_arrow = { - 9.4f, -9.8f, 9.4f, -10.2f, 8.9f, -29.8f, 8.5f, -30.0f, 8.1f, - -30.1f, 7.7f, -30.1f, -8.6f, -30.0f, -9.0f, -29.8f, -9.3f, -29.5f, - -9.5f, -29.1f, -9.5f, -28.7f, -9.1f, -9.1f, -8.8f, -8.8f, -}; - constexpr std::array<float, 64 * 2> trigger_button = { 5.5f, -12.6f, 5.8f, -12.6f, 6.7f, -12.5f, 8.1f, -12.3f, 8.6f, -12.2f, 9.2f, -12.0f, 9.5f, -11.9f, 9.9f, -11.8f, 10.6f, -11.5f, 11.0f, -11.3f, 11.2f, -11.2f, 11.4f, -11.1f, @@ -1457,15 +1457,18 @@ void PlayerControlPreview::DrawProBody(QPainter& p, const QPointF center) { constexpr int radius1 = 32; for (std::size_t point = 0; point < pro_left_handle.size() / 2; ++point) { - qleft_handle[point] = - center + QPointF(pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); - qright_handle[point] = - center + QPointF(-pro_left_handle[point * 2], pro_left_handle[point * 2 + 1]); + const float left_x = pro_left_handle[point * 2 + 0]; + const float left_y = pro_left_handle[point * 2 + 1]; + + qleft_handle[point] = center + QPointF(left_x, left_y); + qright_handle[point] = center + QPointF(-left_x, left_y); } for (std::size_t point = 0; point < pro_body.size() / 2; ++point) { - qbody[point] = center + QPointF(pro_body[point * 2], pro_body[point * 2 + 1]); - qbody[pro_body.size() - 1 - point] = - center + QPointF(-pro_body[point * 2], pro_body[point * 2 + 1]); + const float body_x = pro_body[point * 2 + 0]; + const float body_y = pro_body[point * 2 + 1]; + + qbody[point] = center + QPointF(body_x, body_y); + qbody[pro_body.size() - 1 - point] = center + QPointF(-body_x, body_y); } // Draw left handle body @@ -1496,21 +1499,25 @@ void PlayerControlPreview::DrawGCBody(QPainter& p, const QPointF center) { constexpr float angle = 2 * 3.1415f / 8; for (std::size_t point = 0; point < gc_left_body.size() / 2; ++point) { - qleft_handle[point] = - center + QPointF(gc_left_body[point * 2], gc_left_body[point * 2 + 1]); - qright_handle[point] = - center + QPointF(-gc_left_body[point * 2], gc_left_body[point * 2 + 1]); + const float body_x = gc_left_body[point * 2 + 0]; + const float body_y = gc_left_body[point * 2 + 1]; + + qleft_handle[point] = center + QPointF(body_x, body_y); + qright_handle[point] = center + QPointF(-body_x, body_y); } for (std::size_t point = 0; point < gc_body.size() / 2; ++point) { - qbody[point] = center + QPointF(gc_body[point * 2], gc_body[point * 2 + 1]); - qbody[gc_body.size() - 1 - point] = - center + QPointF(-gc_body[point * 2], gc_body[point * 2 + 1]); + const float body_x = gc_body[point * 2 + 0]; + const float body_y = gc_body[point * 2 + 1]; + + qbody[point] = center + QPointF(body_x, body_y); + qbody[gc_body.size() - 1 - point] = center + QPointF(-body_x, body_y); } for (std::size_t point = 0; point < 8; ++point) { - left_hex[point] = - center + QPointF(34 * std::cos(point * angle) - 111, 34 * std::sin(point * angle) - 44); - right_hex[point] = - center + QPointF(26 * std::cos(point * angle) + 61, 26 * std::sin(point * angle) + 37); + const float point_cos = std::cos(point * angle); + const float point_sin = std::sin(point * angle); + + left_hex[point] = center + QPointF(34 * point_cos - 111, 34 * point_sin - 44); + right_hex[point] = center + QPointF(26 * point_cos + 61, 26 * point_sin + 37); } // Draw body @@ -1631,32 +1638,36 @@ void PlayerControlPreview::DrawDualBody(QPainter& p, const QPointF center) { constexpr float offset = 209.3f; for (std::size_t point = 0; point < left_joycon_body.size() / 2; ++point) { - left_joycon[point] = center + QPointF(left_joycon_body[point * 2] * size + offset, - left_joycon_body[point * 2 + 1] * size - 1); - right_joycon[point] = center + QPointF(-left_joycon_body[point * 2] * size - offset, - left_joycon_body[point * 2 + 1] * size - 1); + const float body_x = left_joycon_body[point * 2 + 0]; + const float body_y = left_joycon_body[point * 2 + 1]; + + left_joycon[point] = center + QPointF(body_x * size + offset, body_y * size - 1); + right_joycon[point] = center + QPointF(-body_x * size - offset, body_y * size - 1); } for (std::size_t point = 0; point < left_joycon_slider.size() / 2; ++point) { - qleft_joycon_slider[point] = - center + QPointF(left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); - qright_joycon_slider[point] = - center + QPointF(-left_joycon_slider[point * 2], left_joycon_slider[point * 2 + 1]); + const float slider_x = left_joycon_slider[point * 2 + 0]; + const float slider_y = left_joycon_slider[point * 2 + 1]; + + qleft_joycon_slider[point] = center + QPointF(slider_x, slider_y); + qright_joycon_slider[point] = center + QPointF(-slider_x, slider_y); } for (std::size_t point = 0; point < left_joycon_topview.size() / 2; ++point) { + const float top_view_x = left_joycon_topview[point * 2 + 0]; + const float top_view_y = left_joycon_topview[point * 2 + 1]; + qleft_joycon_topview[point] = - center + QPointF(left_joycon_topview[point * 2] * size2 - 52, - left_joycon_topview[point * 2 + 1] * size2 - 52); + center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52); qright_joycon_topview[point] = - center + QPointF(-left_joycon_topview[point * 2] * size2 + 52, - left_joycon_topview[point * 2 + 1] * size2 - 52); + center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52); } for (std::size_t point = 0; point < left_joycon_slider_topview.size() / 2; ++point) { + const float top_view_x = left_joycon_slider_topview[point * 2 + 0]; + const float top_view_y = left_joycon_slider_topview[point * 2 + 1]; + qleft_joycon_slider_topview[point] = - center + QPointF(left_joycon_slider_topview[point * 2] * size2 - 52, - left_joycon_slider_topview[point * 2 + 1] * size2 - 52); + center + QPointF(top_view_x * size2 - 52, top_view_y * size2 - 52); qright_joycon_slider_topview[point] = - center + QPointF(-left_joycon_slider_topview[point * 2] * size2 + 52, - left_joycon_slider_topview[point * 2 + 1] * size2 - 52); + center + QPointF(-top_view_x * size2 + 52, top_view_y * size2 - 52); } // right joycon body @@ -1905,18 +1916,19 @@ void PlayerControlPreview::DrawProTriggers(QPainter& p, const QPointF center, bo std::array<QPointF, pro_body_top.size()> qbody_top; for (std::size_t point = 0; point < pro_left_trigger.size() / 2; ++point) { - qleft_trigger[point] = - center + QPointF(pro_left_trigger[point * 2], - pro_left_trigger[point * 2 + 1] + (left_pressed ? 2 : 0)); - qright_trigger[point] = - center + QPointF(-pro_left_trigger[point * 2], - pro_left_trigger[point * 2 + 1] + (right_pressed ? 2 : 0)); + const float trigger_x = pro_left_trigger[point * 2 + 0]; + const float trigger_y = pro_left_trigger[point * 2 + 1]; + + qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 2 : 0)); + qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 2 : 0)); } for (std::size_t point = 0; point < pro_body_top.size() / 2; ++point) { - qbody_top[pro_body_top.size() - 1 - point] = - center + QPointF(-pro_body_top[point * 2], pro_body_top[point * 2 + 1]); - qbody_top[point] = center + QPointF(pro_body_top[point * 2], pro_body_top[point * 2 + 1]); + const float top_x = pro_body_top[point * 2 + 0]; + const float top_y = pro_body_top[point * 2 + 1]; + + qbody_top[pro_body_top.size() - 1 - point] = center + QPointF(-top_x, top_y); + qbody_top[point] = center + QPointF(top_x, top_y); } // Pro body detail @@ -1939,12 +1951,11 @@ void PlayerControlPreview::DrawGCTriggers(QPainter& p, const QPointF center, boo std::array<QPointF, left_gc_trigger.size() / 2> qright_trigger; for (std::size_t point = 0; point < left_gc_trigger.size() / 2; ++point) { - qleft_trigger[point] = - center + QPointF(left_gc_trigger[point * 2], - left_gc_trigger[point * 2 + 1] + (left_pressed ? 10 : 0)); - qright_trigger[point] = - center + QPointF(-left_gc_trigger[point * 2], - left_gc_trigger[point * 2 + 1] + (right_pressed ? 10 : 0)); + const float trigger_x = left_gc_trigger[point * 2 + 0]; + const float trigger_y = left_gc_trigger[point * 2 + 1]; + + qleft_trigger[point] = center + QPointF(trigger_x, trigger_y + (left_pressed ? 10 : 0)); + qright_trigger[point] = center + QPointF(-trigger_x, trigger_y + (right_pressed ? 10 : 0)); } // Left trigger @@ -1973,12 +1984,13 @@ void PlayerControlPreview::DrawHandheldTriggers(QPainter& p, const QPointF cente std::array<QPointF, left_joycon_trigger.size() / 2> qright_trigger; for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { + const float left_trigger_x = left_joycon_trigger[point * 2 + 0]; + const float left_trigger_y = left_joycon_trigger[point * 2 + 1]; + qleft_trigger[point] = - center + QPointF(left_joycon_trigger[point * 2], - left_joycon_trigger[point * 2 + 1] + (left_pressed ? 0.5f : 0)); + center + QPointF(left_trigger_x, left_trigger_y + (left_pressed ? 0.5f : 0)); qright_trigger[point] = - center + QPointF(-left_joycon_trigger[point * 2], - left_joycon_trigger[point * 2 + 1] + (right_pressed ? 0.5f : 0)); + center + QPointF(-left_trigger_x, left_trigger_y + (right_pressed ? 0.5f : 0)); } // Left trigger @@ -1998,12 +2010,14 @@ void PlayerControlPreview::DrawDualTriggers(QPainter& p, const QPointF center, b constexpr float size = 1.62f; constexpr float offset = 210.6f; for (std::size_t point = 0; point < left_joycon_trigger.size() / 2; ++point) { - qleft_trigger[point] = - center + QPointF(left_joycon_trigger[point * 2] * size + offset, - left_joycon_trigger[point * 2 + 1] * size + (left_pressed ? 0.5f : 0)); - qright_trigger[point] = center + QPointF(-left_joycon_trigger[point * 2] * size - offset, - left_joycon_trigger[point * 2 + 1] * size + - (right_pressed ? 0.5f : 0)); + const float left_trigger_x = left_joycon_trigger[point * 2 + 0]; + const float left_trigger_y = left_joycon_trigger[point * 2 + 1]; + + qleft_trigger[point] = center + QPointF(left_trigger_x * size + offset, + left_trigger_y * size + (left_pressed ? 0.5f : 0)); + qright_trigger[point] = + center + QPointF(-left_trigger_x * size - offset, + left_trigger_y * size + (right_pressed ? 0.5f : 0)); } // Left trigger @@ -2023,13 +2037,16 @@ void PlayerControlPreview::DrawDualTriggersTopView(QPainter& p, const QPointF ce constexpr float size = 0.9f; for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { - qleft_trigger[point] = center + QPointF(left_joystick_L_topview[point * 2] * size - 50, - left_joystick_L_topview[point * 2 + 1] * size - 52); + const float top_view_x = left_joystick_L_topview[point * 2 + 0]; + const float top_view_y = left_joystick_L_topview[point * 2 + 1]; + + qleft_trigger[point] = center + QPointF(top_view_x * size - 50, top_view_y * size - 52); } for (std::size_t point = 0; point < left_joystick_L_topview.size() / 2; ++point) { - qright_trigger[point] = - center + QPointF(-left_joystick_L_topview[point * 2] * size + 50, - left_joystick_L_topview[point * 2 + 1] * size - 52); + const float top_view_x = left_joystick_L_topview[point * 2 + 0]; + const float top_view_y = left_joystick_L_topview[point * 2 + 1]; + + qright_trigger[point] = center + QPointF(-top_view_x * size + 50, top_view_y * size - 52); } p.setPen(colors.outline); @@ -2273,15 +2290,39 @@ void PlayerControlPreview::DrawJoystickSideview(QPainter& p, const QPointF cente p.drawLine(p2.at(32), p2.at(71)); } -void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, bool pressed) { +void PlayerControlPreview::DrawProJoystick(QPainter& p, const QPointF center, const QPointF offset, + float offset_scalar, bool pressed) { + const float radius1 = 24.0f; + const float radius2 = 17.0f; + + const QPointF offset_center = center + offset * offset_scalar; + + const auto amplitude = static_cast<float>( + 1.0 - std::sqrt((offset.x() * offset.x()) + (offset.y() * offset.y())) * 0.1f); + + const float rotation = + ((offset.x() == 0) ? atan(1) * 2 : atan(offset.y() / offset.x())) * (180 / (atan(1) * 4)); + + p.save(); + p.translate(offset_center); + p.rotate(rotation); + // Outer circle p.setPen(colors.outline); p.setBrush(pressed ? colors.highlight : colors.button); - DrawCircle(p, center, 24.0f); + p.drawEllipse(QPointF(0, 0), radius1 * amplitude, radius1); // Inner circle p.setBrush(pressed ? colors.highlight2 : colors.button2); - DrawCircle(p, center, 17.0f); + + const float inner_offset = + (radius1 - radius2) * 0.4f * ((offset.x() == 0 && offset.y() < 0) ? -1.0f : 1.0f); + const float offset_factor = (1.0f - amplitude) / 0.1f; + + p.drawEllipse(QPointF((offset.x() < 0) ? -inner_offset : inner_offset, 0) * offset_factor, + radius2 * amplitude, radius2); + + p.restore(); } void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, bool pressed) { @@ -2299,7 +2340,7 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo } void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value, - const Input::AnalogProperties properties) { + const Input::AnalogProperties& properties) { constexpr float size = 45.0f; const float range = size * properties.range; const float deadzone = size * properties.deadzone; @@ -2422,17 +2463,16 @@ void PlayerControlPreview::DrawArrowButtonOutline(QPainter& p, const QPointF cen std::array<QPointF, (arrow_points - 1) * 4> arrow_button_outline; for (std::size_t point = 0; point < arrow_points - 1; ++point) { - arrow_button_outline[point] = center + QPointF(up_arrow_button[point * 2] * size, - up_arrow_button[point * 2 + 1] * size); + const float up_arrow_x = up_arrow_button[point * 2 + 0]; + const float up_arrow_y = up_arrow_button[point * 2 + 1]; + + arrow_button_outline[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size); arrow_button_outline[(arrow_points - 1) * 2 - point - 1] = - center + - QPointF(up_arrow_button[point * 2 + 1] * size, up_arrow_button[point * 2] * size); + center + QPointF(up_arrow_y * size, up_arrow_x * size); arrow_button_outline[(arrow_points - 1) * 2 + point] = - center + - QPointF(-up_arrow_button[point * 2] * size, -up_arrow_button[point * 2 + 1] * size); + center + QPointF(-up_arrow_x * size, -up_arrow_y * size); arrow_button_outline[(arrow_points - 1) * 4 - point - 1] = - center + - QPointF(-up_arrow_button[point * 2 + 1] * size, -up_arrow_button[point * 2] * size); + center + QPointF(-up_arrow_y * size, -up_arrow_x * size); } // Draw arrow button outline p.setPen(colors.outline); @@ -2446,22 +2486,21 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center, QPoint offset; for (std::size_t point = 0; point < up_arrow_button.size() / 2; ++point) { + const float up_arrow_x = up_arrow_button[point * 2 + 0]; + const float up_arrow_y = up_arrow_button[point * 2 + 1]; + switch (direction) { case Direction::Up: - arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, - up_arrow_button[point * 2 + 1] * size); + arrow_button[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size); break; case Direction::Left: - arrow_button[point] = center + QPointF(up_arrow_button[point * 2 + 1] * size, - up_arrow_button[point * 2] * size); + arrow_button[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size); break; case Direction::Right: - arrow_button[point] = center + QPointF(-up_arrow_button[point * 2 + 1] * size, - up_arrow_button[point * 2] * size); + arrow_button[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size); break; case Direction::Down: - arrow_button[point] = center + QPointF(up_arrow_button[point * 2] * size, - -up_arrow_button[point * 2 + 1] * size); + arrow_button[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size); break; case Direction::None: break; @@ -2500,17 +2539,17 @@ void PlayerControlPreview::DrawArrowButton(QPainter& p, const QPointF center, void PlayerControlPreview::DrawTriggerButton(QPainter& p, const QPointF center, const Direction direction, bool pressed) { std::array<QPointF, trigger_button.size() / 2> qtrigger_button; - QPoint offset; for (std::size_t point = 0; point < trigger_button.size() / 2; ++point) { + const float trigger_button_x = trigger_button[point * 2 + 0]; + const float trigger_button_y = trigger_button[point * 2 + 1]; + switch (direction) { case Direction::Left: - qtrigger_button[point] = - center + QPointF(-trigger_button[point * 2], trigger_button[point * 2 + 1]); + qtrigger_button[point] = center + QPointF(-trigger_button_x, trigger_button_y); break; case Direction::Right: - qtrigger_button[point] = - center + QPointF(trigger_button[point * 2], trigger_button[point * 2 + 1]); + qtrigger_button[point] = center + QPointF(trigger_button_x, trigger_button_y); break; case Direction::Up: case Direction::Down: @@ -2633,22 +2672,21 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di std::array<QPointF, up_arrow_symbol.size() / 2> arrow_symbol; for (std::size_t point = 0; point < up_arrow_symbol.size() / 2; ++point) { + const float up_arrow_x = up_arrow_symbol[point * 2 + 0]; + const float up_arrow_y = up_arrow_symbol[point * 2 + 1]; + switch (direction) { case Direction::Up: - arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, - up_arrow_symbol[point * 2 + 1] * size); + arrow_symbol[point] = center + QPointF(up_arrow_x * size, up_arrow_y * size); break; case Direction::Left: - arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2 + 1] * size, - up_arrow_symbol[point * 2] * size); + arrow_symbol[point] = center + QPointF(up_arrow_y * size, up_arrow_x * size); break; case Direction::Right: - arrow_symbol[point] = center + QPointF(-up_arrow_symbol[point * 2 + 1] * size, - up_arrow_symbol[point * 2] * size); + arrow_symbol[point] = center + QPointF(-up_arrow_y * size, up_arrow_x * size); break; case Direction::Down: - arrow_symbol[point] = center + QPointF(up_arrow_symbol[point * 2] * size, - -up_arrow_symbol[point * 2 + 1] * size); + arrow_symbol[point] = center + QPointF(up_arrow_x * size, -up_arrow_y * size); break; case Direction::None: break; diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h index 39565f795c..91c3343f15 100644 --- a/src/yuzu/configuration/configure_input_player_widget.h +++ b/src/yuzu/configuration/configure_input_player_widget.h @@ -25,7 +25,7 @@ public: void SetPlayerInput(std::size_t index, const ButtonParam& buttons_param, const AnalogParam& analogs_param); - void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw buttons_, + void SetPlayerInputRaw(std::size_t index, const Settings::ButtonsRaw& buttons_, Settings::AnalogsRaw analogs_); void SetConnectedStatus(bool checked); void SetControllerType(Settings::ControllerType type); @@ -138,9 +138,9 @@ private: // Draw joystick functions void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed); void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed); - void DrawRawJoystick(QPainter& p, QPointF center, const QPointF value, - const Input::AnalogProperties properties); - void DrawProJoystick(QPainter& p, QPointF center, bool pressed); + void DrawRawJoystick(QPainter& p, QPointF center, QPointF value, + const Input::AnalogProperties& properties); + void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed); void DrawGCJoystick(QPainter& p, QPointF center, bool pressed); // Draw button functions diff --git a/src/yuzu/debugger/controller.cpp b/src/yuzu/debugger/controller.cpp index 85724a8f36..2731d948d7 100644 --- a/src/yuzu/debugger/controller.cpp +++ b/src/yuzu/debugger/controller.cpp @@ -42,7 +42,7 @@ void ControllerDialog::refreshConfiguration() { QAction* ControllerDialog::toggleViewAction() { if (toggle_view_action == nullptr) { - toggle_view_action = new QAction(windowTitle(), this); + toggle_view_action = new QAction(tr("&Controller P1"), this); toggle_view_action->setCheckable(true); toggle_view_action->setChecked(isVisible()); connect(toggle_view_action, &QAction::toggled, this, &ControllerDialog::setVisible); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ef92c25bc6..0ba7c07cc9 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -850,6 +850,16 @@ void GMainWindow::InitializeHotkeys() { connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), &QShortcut::activated, this, [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); + + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this), + &QShortcut::activated, this, [&] { + Settings::values.mouse_panning = !Settings::values.mouse_panning; + if (UISettings::values.hide_mouse || Settings::values.mouse_panning) { + mouse_hide_timer.start(); + render_window->installEventFilter(render_window); + render_window->setAttribute(Qt::WA_Hover, true); + } + }); } void GMainWindow::SetDefaultUIGeometry() { @@ -1197,7 +1207,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) { multicore_status_button->setDisabled(true); renderer_status_button->setDisabled(true); - if (UISettings::values.hide_mouse) { + if (UISettings::values.hide_mouse || Settings::values.mouse_panning) { mouse_hide_timer.start(); render_window->installEventFilter(render_window); render_window->setAttribute(Qt::WA_Hover, true); @@ -2359,7 +2369,7 @@ void GMainWindow::OnConfigure() { config->Save(); - if (UISettings::values.hide_mouse && emulation_running) { + if ((UISettings::values.hide_mouse || Settings::values.mouse_panning) && emulation_running) { render_window->installEventFilter(render_window); render_window->setAttribute(Qt::WA_Hover, true); mouse_hide_timer.start(); @@ -2480,6 +2490,11 @@ void GMainWindow::OnCaptureScreenshot() { .arg(title_id, 16, 16, QLatin1Char{'0'}) .arg(date); + if (!Common::FS::CreateDir(screenshot_path.toStdString())) { + OnStartGame(); + return; + } + #ifdef _WIN32 if (UISettings::values.enable_screenshot_save_as) { filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename, @@ -2600,7 +2615,8 @@ void GMainWindow::UpdateUISettings() { } void GMainWindow::HideMouseCursor() { - if (emu_thread == nullptr || UISettings::values.hide_mouse == false) { + if (emu_thread == nullptr || + (!UISettings::values.hide_mouse && !Settings::values.mouse_panning)) { mouse_hide_timer.stop(); ShowMouseCursor(); return; @@ -2610,13 +2626,16 @@ void GMainWindow::HideMouseCursor() { void GMainWindow::ShowMouseCursor() { render_window->unsetCursor(); - if (emu_thread != nullptr && UISettings::values.hide_mouse) { + if (emu_thread != nullptr && + (UISettings::values.hide_mouse || Settings::values.mouse_panning)) { mouse_hide_timer.start(); } } void GMainWindow::OnMouseActivity() { - ShowMouseCursor(); + if (!Settings::values.mouse_panning) { + ShowMouseCursor(); + } } void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string details) { @@ -2751,7 +2770,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { .arg(errors)); } - QProgressDialog prog; + QProgressDialog prog(this); prog.setRange(0, 0); prog.setLabelText(tr("Deriving keys...\nThis may take up to a minute depending \non your " "system's performance.")); @@ -2933,7 +2952,7 @@ void GMainWindow::filterBarSetChecked(bool state) { } void GMainWindow::UpdateUITheme() { - const QString default_icons = QStringLiteral(":/icons/default"); + const QString default_icons = QStringLiteral("default"); const QString& current_theme = UISettings::values.theme; const bool is_default_theme = current_theme == QString::fromUtf8(UISettings::themes[0].second); QStringList theme_paths(default_theme_paths); @@ -2949,7 +2968,6 @@ void GMainWindow::UpdateUITheme() { qApp->setStyleSheet({}); setStyleSheet({}); } - theme_paths.append(default_icons); QIcon::setThemeName(default_icons); } else { const QString theme_uri(QLatin1Char{':'} + current_theme + QStringLiteral("/style.qss")); @@ -2961,10 +2979,7 @@ void GMainWindow::UpdateUITheme() { } else { LOG_ERROR(Frontend, "Unable to set style, stylesheet file not found"); } - - const QString theme_name = QStringLiteral(":/icons/") + current_theme; - theme_paths.append({default_icons, theme_name}); - QIcon::setThemeName(theme_name); + QIcon::setThemeName(current_theme); } QIcon::setThemeSearchPaths(theme_paths); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index e2ad5baf69..0488706875 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -14,8 +14,8 @@ <string>yuzu</string> </property> <property name="windowIcon"> - <iconset> - <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> + <iconset resource="yuzu.qrc"> + <normaloff>:/img/yuzu.ico</normaloff>:/img/yuzu.ico</iconset> </property> <property name="tabShape"> <enum>QTabWidget::Rounded</enum> @@ -303,6 +303,8 @@ </property> </action> </widget> - <resources/> + <resources> + <include location="yuzu.qrc"/> + </resources> <connections/> </ui> diff --git a/src/yuzu/yuzu.qrc b/src/yuzu/yuzu.qrc new file mode 100644 index 0000000000..5733cac988 --- /dev/null +++ b/src/yuzu/yuzu.qrc @@ -0,0 +1,5 @@ +<RCC> + <qresource prefix="/img"> + <file alias="yuzu.ico">../../dist/yuzu.ico</file> + </qresource> +</RCC> diff --git a/src/yuzu_cmd/CMakeLists.txt b/src/yuzu_cmd/CMakeLists.txt index 0b3f2cb54f..8461f8896e 100644 --- a/src/yuzu_cmd/CMakeLists.txt +++ b/src/yuzu_cmd/CMakeLists.txt @@ -1,5 +1,15 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) +function(create_resource file output filename) + # Read hex data from file + file(READ ${file} filedata HEX) + # Convert hex data for C compatibility + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata}) + # Write data to output file + set(RESOURCES_DIR "${PROJECT_BINARY_DIR}/dist" PARENT_SCOPE) + file(WRITE "${PROJECT_BINARY_DIR}/dist/${output}" "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n") +endfunction() + add_executable(yuzu-cmd config.cpp config.h @@ -24,6 +34,9 @@ if (MSVC) endif() target_link_libraries(yuzu-cmd PRIVATE ${PLATFORM_LIBRARIES} SDL2 Threads::Threads) +create_resource("../../dist/yuzu.bmp" "yuzu_cmd/yuzu_icon.h" "yuzu_icon") +target_include_directories(yuzu-cmd PRIVATE ${RESOURCES_DIR}) + target_include_directories(yuzu-cmd PRIVATE ../../externals/Vulkan-Headers/include) if(UNIX AND NOT APPLE) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index f76102459a..6d8bc55099 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -329,9 +329,6 @@ void Config::ReadValues() { FS::GetUserPath( FS::UserPath::DumpDir, sdl2_config->Get("Data Storage", "dump_directory", FS::GetUserPath(FS::UserPath::DumpDir))); - FS::GetUserPath(FS::UserPath::CacheDir, - sdl2_config->Get("Data Storage", "cache_directory", - FS::GetUserPath(FS::UserPath::CacheDir))); Settings::values.gamecard_inserted = sdl2_config->GetBoolean("Data Storage", "gamecard_inserted", false); Settings::values.gamecard_current_game = @@ -388,7 +385,7 @@ void Config::ReadValues() { static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); Settings::values.use_disk_shader_cache.SetValue( sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); - const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); + const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1); Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); Settings::values.use_asynchronous_gpu_emulation.SetValue( sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index 7843d51672..7e391ab895 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -12,6 +12,7 @@ #include "input_common/mouse/mouse_input.h" #include "input_common/sdl/sdl.h" #include "yuzu_cmd/emu_window/emu_window_sdl2.h" +#include "yuzu_cmd/yuzu_icon.h" EmuWindow_SDL2::EmuWindow_SDL2(InputCommon::InputSubsystem* input_subsystem_) : input_subsystem{input_subsystem_} { @@ -30,7 +31,8 @@ EmuWindow_SDL2::~EmuWindow_SDL2() { void EmuWindow_SDL2::OnMouseMotion(s32 x, s32 y) { TouchMoved((unsigned)std::max(x, 0), (unsigned)std::max(y, 0), 0); - input_subsystem->GetMouse()->MouseMove(x, y); + + input_subsystem->GetMouse()->MouseMove(x, y, 0, 0); } void EmuWindow_SDL2::OnMouseButton(u32 button, u8 state, s32 x, s32 y) { @@ -193,6 +195,22 @@ void EmuWindow_SDL2::WaitEvent() { } } +void EmuWindow_SDL2::SetWindowIcon() { + SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size); + if (yuzu_icon_stream == nullptr) { + LOG_WARNING(Frontend, "Failed to create yuzu icon stream."); + return; + } + SDL_Surface* const window_icon = SDL_LoadBMP_RW(yuzu_icon_stream, 1); + if (window_icon == nullptr) { + LOG_WARNING(Frontend, "Failed to read BMP from stream."); + return; + } + // The icon is attached to the window pointer + SDL_SetWindowIcon(render_window, window_icon); + SDL_FreeSurface(window_icon); +} + void EmuWindow_SDL2::OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) { SDL_SetWindowMinimumSize(render_window, minimal_size.first, minimal_size.second); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h index a931412401..51a12a6a97 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h @@ -32,6 +32,9 @@ public: /// Wait for the next event on the main thread. void WaitEvent(); + // Sets the window icon from yuzu.bmp + void SetWindowIcon(); + protected: /// Called by WaitEvent when a key is pressed or released. void OnKeyEvent(int key, u8 state); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index deddea9ee8..a02485c14e 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -107,6 +107,8 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0, SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL); + SetWindowIcon(); + if (fullscreen) { Fullscreen(); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index 3ba657c007..6f9b00461d 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp @@ -35,6 +35,8 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste std::exit(EXIT_FAILURE); } + SetWindowIcon(); + switch (wm.subsystem) { #ifdef SDL_VIDEO_DRIVER_WINDOWS case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 0e1f3bdb31..982c417859 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -215,7 +215,7 @@ int main(int argc, char** argv) { // Core is loaded, start the GPU (makes the GPU contexts current to this thread) system.GPU().Start(); - system.Renderer().Rasterizer().LoadDiskResources( + system.Renderer().ReadRasterizer()->LoadDiskResources( system.CurrentProcess()->GetTitleID(), false, [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); |