From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- .../frontend/maxwell/translate_program.cpp | 203 +++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.cpp (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 0000000000..e52170e3e2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <ranges> +#include <vector> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional<PixelImap> imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + Optimization::LowerFp16ToFp32(program); + + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::VerificationPass(program); + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + } + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell -- cgit v1.2.3-70-g09d2 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 8 +++++--- .../frontend/maxwell/translate_program.h | 3 ++- src/shader_recompiler/host_translate_info.h | 18 ++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 9 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/host_translate_info.h (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72c..164e940713 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/translate.h frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.h + host_translate_info.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e2..5250509c18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo RemoveUnreachableBlocks(program); // Replace instructions before the SSA rewrite - Optimization::LowerFp16ToFp32(program); - + if (!host_info.support_float16) { + Optimization::LowerFp16ToFp32(program); + } Optimization::SsaRewritePass(program); Optimization::GlobalMemoryToStorageBufferPass(program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h index 1e55364438..a84814811e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -8,13 +8,14 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 0000000000..94a584219c --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d283..b459397f5e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d909..6952a1f2cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr<ShaderWorker> workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e8..72e6f42077 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector<u32> code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2ed..42da2960b3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf81..0d8c6cd088 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3-70-g09d2 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- .../backend/glsl/emit_context.cpp | 26 +++++++--------------- src/shader_recompiler/backend/glsl/emit_context.h | 2 -- .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ .../backend/glsl/emit_glsl_special.cpp | 18 +++++++-------- .../backend/spirv/emit_context.cpp | 3 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- .../frontend/maxwell/translate_program.cpp | 4 +++- .../ir_opt/collect_shader_info_pass.cpp | 6 +++-- src/shader_recompiler/runtime_info.h | 8 +++++-- src/shader_recompiler/shader_info.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 ++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +++++++++---- 12 files changed, 62 insertions(+), 41 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index bd40356a1b..14c0095359 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -327,11 +327,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; - if (generic.used) { - header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, - InputArrayDecorator(stage)); + if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + continue; } + header += + fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,10 +350,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } } for (size_t index = 0; index < info.stores_generics.size(); ++index) { - // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || StageInitializesVaryings()) { - DefineGenericOutput(index, program.invocations); + if (!info.stores_generics[index]) { + continue; } + DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -362,17 +363,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } -bool EmitContext::StageInitializesVaryings() const noexcept { - switch (stage) { - case Stage::VertexA: - case Stage::VertexB: - case Stage::Geometry: - return true; - default: - return false; - } -} - void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 4a50556e14..8fa87c02cb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,8 +136,6 @@ public: code += '\n'; } - [[nodiscard]] bool StageInitializesVaryings() const noexcept; - std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index a241d18fed..663ff37532 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { @@ -179,6 +180,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + ctx.AddF32("{}=0.f;", inst, attr); + return; + } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index f8e8aaa67b..1a2d3dcead 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -12,11 +12,12 @@ namespace Shader::Backend::GLSL { namespace { -void InitializeVaryings(EmitContext& ctx) { - ctx.Add("gl_Position=vec4(0,0,0,1);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < 16; ++index) { + if (ctx.info.stores_generics[index]) { ctx.Add("out_attr{}=vec4(0,0,0,1);", index); } } @@ -56,9 +57,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.StageInitializesVaryings()) { - InitializeVaryings(ctx); - } + InitializeOutputVaryings(ctx); + if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { if (ctx.info.stores_frag_color[index]) { @@ -73,7 +73,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); - InitializeVaryings(ctx); + InitializeOutputVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 007b796504..612d087ad1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1209,6 +1209,9 @@ void EmitContext::DefineInputs(const Info& info) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { + if (!runtime_info.previous_stage_stores_generic[index]) { + continue; + } const InputVarying generic{info.input_generics[index]}; if (!generic.used) { continue; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 42fff74e39..4ac1fbae5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -286,7 +286,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; - if (!type) { + if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c18..ed8729fcaf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 47933df970..bab32b58b9 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,7 +79,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { void SetAttribute(Info& info, IR::Attribute attr) { if (IR::IsGeneric(attr)) { - info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; + info.stores_generics[IR::GenericAttributeIndex(attr)] = true; return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { @@ -956,7 +956,9 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } if (info.stores_indexed_attributes) { for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); + if (header.vtg.IsOutputGenericVectorActive(i)) { + info.stores_generics[i] = true; + } } info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; info.stores_position |= header.vtg.omap_systemb.position != 0; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index d4b047b4d4..63fe2afafc 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <bitset> #include <optional> #include <vector> @@ -59,6 +60,8 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array<AttributeType, 32> generic_input_types{}; + std::bitset<32> previous_stage_stores_generic{}; + bool convert_depth_mode{}; bool force_early_z{}; @@ -72,11 +75,12 @@ struct RuntimeInfo { std::optional<CompareFunction> alpha_test_func; float alpha_test_reference{}; - // Static y negate value + /// Static Y negate value bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM + /// Use storage buffers instead of global pointers on GLASM bool glasm_use_storage_buffers{}; + /// Transform feedback state for each varying std::vector<TransformFeedbackVarying> xfb_varyings; }; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e9ebc16a4b..a20e15d2e6 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -140,7 +140,7 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::array<bool, 32> stores_generics{}; + std::bitset<32> stores_generics{}; bool stores_layer{}; bool stores_viewport_index{}; bool stores_point_size{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b459397f5e..b8b24dd3d3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -58,8 +58,15 @@ auto MakeSpan(Container& container) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } switch (program.stage) { case Shader::Stage::VertexB: case Shader::Stage::Geometry: @@ -400,6 +407,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array<std::string, 5> sources; Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { @@ -413,12 +421,13 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{ - MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } + previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f42077..dc028306a9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; -- cgit v1.2.3-70-g09d2 From 1091995f8e5ba79d659ab39fe4dbbca26ad01488 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 16 Jun 2021 05:02:19 -0300 Subject: shader: Simplify MergeDualVertexPrograms --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ed8729fcaf..e728b43cc6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -174,14 +174,12 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); result.blocks = GenerateBlocks(result.syntax_list); result.post_order_blocks = vertex_b.post_order_blocks; for (const auto& block : vertex_a.post_order_blocks) { -- cgit v1.2.3-70-g09d2 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 6 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc6..c084f34002 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo if (!host_info.support_float16) { Optimization::LowerFp16ToFp32(program); } + if (!host_info.support_int64) { + Optimization::LowerInt64ToInt32(program); + } Optimization::SsaRewritePass(program); Optimization::GlobalMemoryToStorageBufferPass(program); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 5838fc02fd..b1b5ba1ab9 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -159,6 +159,7 @@ Device::Device() { has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0c9d6fe311..0bd277d38d 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -124,6 +124,10 @@ public: return has_nv_gpu_shader_5; } + bool HasShaderInt64() const { + return has_shader_int64; + } + bool HasAmdShaderHalfFloat() const { return has_amd_shader_half_float; } @@ -172,6 +176,7 @@ private: bool use_driver_cache{}; bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; + bool has_shader_int64{}; bool has_amd_shader_half_float{}; bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c36b0d8cf4..f2f18b18ae 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,7 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo }, host_info{ .support_float16 = false, - .support_int64 = true, + .support_int64 = device.HasShaderInt64(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e83628c132..ec06b124f3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,7 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw }; host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), - .support_int64 = true, + .support_int64 = device.IsShaderInt64Supported(), }; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 49605752d4..40d00a52f1 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -164,6 +164,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int64 is supported. + bool IsShaderInt64Supported() const { + return is_shader_int64_supported; + } + /// Returns true if shader int16 is supported. bool IsShaderInt16Supported() const { return is_shader_int16_supported; -- cgit v1.2.3-70-g09d2 From ecd6b4356b3a12c0963c4fd6ec6d273a7064ac2f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 23 Jun 2021 03:31:49 -0300 Subject: shader: Only verify shader when graphics debugging is enabled --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c084f34002..a8b727f1a3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -7,6 +7,7 @@ #include <ranges> #include <vector> +#include "common/settings.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" @@ -164,7 +165,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::ConstantPropagationPass(program); Optimization::DeadCodeEliminationPass(program); - Optimization::VerificationPass(program); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(program); + } Optimization::CollectShaderInfoPass(env, program); CollectInterpolationInfo(env, program); AddNVNStorageBuffers(program); @@ -200,7 +203,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); - Optimization::VerificationPass(result); + if (Settings::values.renderer_debug) { + Optimization::VerificationPass(result); + } Optimization::CollectShaderInfoPass(env_vertex_b, result); return result; } -- cgit v1.2.3-70-g09d2 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++--- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 97 +++++----- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 +++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 +++++++ src/video_core/engines/maxwell_3d.h | 7 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- src/video_core/vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 345 insertions(+), 331 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9d..b5b7e5e833 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c2..80dad9ff34 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f1301..2b96977b36 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248c..02c9dc6d79 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c0095359..0d7f7bc3ba 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da21..8deaf5760f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eeec..16e2a8502e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa219..298881c7bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129e..af4fb0c69b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector<Sirit::Literal> literals; std::vector<Id> labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast<u32>(i))}; + const auto type{AttrTypes(*this, static_cast<u32>(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector<Sirit::Literal> literals; std::vector<Id> labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast<u32>(i)); + literals.push_back(base_attribute_value + static_cast<u32>(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { continue; } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { + if (!loads.Generic(index)) { continue; } - const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional<u32> invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb81..e277bc3580 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8f..ddb86d0701 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd723895..77fbb2b2fd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08e..8369d0d843 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array<u32, 8> gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30d..ca11994943 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d9..ebcaa8bc2d 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a3..6b4b0ce5bd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional<PixelImap> imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.output_topology = sph.common3.output_topology; program.output_vertices = sph.common4.max_output_vertices; program.invocations = sph.common2.threads_per_input_primitive; + program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; + if (program.is_geometry_passthrough) { + const auto mask{env.GpPassthroughMask()}; + program.info.passthrough.mask |= ~Common::BitCast<std::bitset<256>>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152c..5e32ac7844 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be16388..ee1887b569 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aae..bd6c2bfb5e 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array<u8, 16> imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array<u8, 16> omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afafc..f3f83a258c 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include <vector> #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array<AttributeType, 32> generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e6..4ef4dbd406 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include <boost/container/small_vector.hpp> #include <boost/container/static_vector.hpp> @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array<bool, 30> uses_patches{}; - std::array<InputVarying, 32> input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array<Interpolation, 32> interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 0000000000..9d7b24a763 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> +#include <cstddef> + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast<size_t>(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast<size_t>(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] || + mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] && + mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded6715..471d5686aa 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array<u32, 8> gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b77451..06e39a5032 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e657..6d664ed6b4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f3..7aaa40ef27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast<float>(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b560..429cab30de 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast<const char*>(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f82..da4721e6b9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d3..d0adc01275 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3-70-g09d2 From 8612b5fec5d39b904f9fddbbee3e06437d49429c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Thu, 24 Jun 2021 17:42:07 -0300 Subject: shader: Use std::bit_cast instead of Common::BitCast for passthrough --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 6b4b0ce5bd..2bb1d24a4a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <bit> #include <memory> #include <ranges> #include <vector> @@ -142,8 +143,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.invocations = sph.common2.threads_per_input_primitive; program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; if (program.is_geometry_passthrough) { - const auto mask{env.GpPassthroughMask()}; - program.info.passthrough.mask |= ~Common::BitCast<std::bitset<256>>(mask); + const auto& mask{env.GpPassthroughMask()}; + program.info.passthrough.mask |= ~std::bit_cast<std::bitset<256>>(mask); } break; } -- cgit v1.2.3-70-g09d2 From 2235a51b5d987cf8297211bb1778d75e6b794324 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 11 Jul 2021 01:10:38 -0300 Subject: shader: Manually convert from array<u32> to bitset instead of using bit_cast --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2bb1d24a4a..83c77967d3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <algorithm> -#include <bit> #include <memory> #include <ranges> #include <vector> @@ -144,7 +143,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; if (program.is_geometry_passthrough) { const auto& mask{env.GpPassthroughMask()}; - program.info.passthrough.mask |= ~std::bit_cast<std::bitset<256>>(mask); + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } } break; } -- cgit v1.2.3-70-g09d2 From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: shader: Avoid usage of C++20 ranges to build in clang --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 7 +++++-- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 9 +++++++-- .../frontend/maxwell/control_flow.cpp | 13 ++++++------- .../frontend/maxwell/structured_control_flow.cpp | 8 ++++---- .../frontend/maxwell/translate_program.cpp | 20 +++++++++++++------- .../ir_opt/constant_propagation_pass.cpp | 5 +++-- .../ir_opt/dead_code_elimination_pass.cpp | 2 -- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 6 ------ .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 1 - .../ir_opt/lower_int64_to_int32.cpp | 5 +++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 10 ++++++---- 11 files changed, 47 insertions(+), 39 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/translate_program.cpp') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 64787b3537..a5e8c9b6e0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> +#include <algorithm> #include <string> #include <tuple> @@ -196,7 +196,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index c5e819a0a2..8a430d5739 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> +#include <algorithm> #include <string> +#include <tuple> +#include <type_traits> #include "common/div_ceil.h" #include "common/settings.h" @@ -120,7 +122,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e7abea82f4..1a954a509f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <array> #include <optional> -#include <ranges> #include <string> #include <utility> @@ -151,18 +150,18 @@ std::pair<Location, Stack> Stack::Pop(Token token) const { } std::optional<Location> Stack::Peek(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - if (it == reverse_entries.end()) { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { return std::nullopt; } return it->target; } Stack Stack::Remove(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - const auto pos{std::distance(reverse_entries.begin(), it)}; + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; Stack result; result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); return result; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 221454b995..8b3e0a15c7 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <memory> -#include <ranges> #include <string> #include <unordered_map> #include <utility> @@ -167,7 +166,7 @@ std::string DumpExpr(const Statement* stmt) { } } -std::string DumpTree(const Tree& tree, u32 indentation = 0) { +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { std::string ret; std::string indent(indentation, ' '); for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { @@ -315,8 +314,9 @@ class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 83c77967d3..c067d459cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <memory> -#include <ranges> #include <vector> #include "common/settings.h" @@ -20,12 +19,19 @@ namespace Shader::Maxwell { namespace { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } return blocks; } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3c72203add..8dd6d6c2c8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <algorithm> -#include <ranges> #include <tuple> #include <type_traits> @@ -599,7 +598,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 1e4a3fdaed..4008363016 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> - #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index 3d2c205c2b..055ba9c54d 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -2,12 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> -#include <ranges> - -#include "common/bit_cast.h" -#include "common/bit_util.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f9de17b250..4197b0095d 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <compare> #include <optional> -#include <ranges> #include <queue> #include <boost/container/flat_set.hpp> diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index abf7c87c79..e80d3d1d94 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> #include <utility> #include "shader_recompiler/exception.h" @@ -207,7 +206,9 @@ void Lower(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void LowerInt64ToInt32(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { Lower(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index dcaced83f3..53145fb5e3 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include <ranges> #include <span> #include <variant> #include <vector> @@ -243,7 +242,9 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& [variant, phi] : it->second) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -373,8 +374,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { - VisitBlock(pass, block); + const auto end{program.post_order_blocks.rend()}; + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); } } -- cgit v1.2.3-70-g09d2