From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Tue, 15 Jun 2021 17:23:57 -0400
Subject: glsl: Address rest of feedback

---
 .../backend/glsl/emit_context.cpp                  | 44 +++++++++++++++++-----
 src/shader_recompiler/backend/glsl/emit_context.h  |  2 +
 .../backend/glsl/emit_glsl_context_get_set.cpp     |  2 +-
 .../backend/glsl/emit_glsl_special.cpp             | 22 +++++++----
 src/shader_recompiler/ir_opt/texture_pass.cpp      | 11 +++++-
 src/shader_recompiler/profile.h                    |  2 +
 src/shader_recompiler/shader_info.h                |  2 +
 src/video_core/renderer_opengl/gl_device.cpp       |  1 +
 src/video_core/renderer_opengl/gl_device.h         |  5 +++
 .../renderer_opengl/gl_graphics_pipeline.cpp       | 32 ++++++++--------
 src/video_core/renderer_opengl/gl_shader_cache.cpp |  1 +
 11 files changed, 86 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 0e8fe017d2..d224c4d845 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) {
     }
 }
 
+std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
+    if (is_written && !is_read) {
+        return "writeonly ";
+    }
+    if (is_read && !is_written) {
+        return "readonly ";
+    }
+    return "";
+}
+
 std::string_view GetTessMode(TessPrimitive primitive) {
     switch (primitive) {
     case TessPrimitive::Triangles:
@@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
 EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
                          const RuntimeInfo& runtime_info_)
     : info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
-    header += "#pragma optionNV(fastmath off)\n";
+    if (profile.need_fastmath_off) {
+        header += "#pragma optionNV(fastmath off)\n";
+    }
     SetupExtensions();
     stage = program.stage;
     switch (program.stage) {
@@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
     }
     for (size_t index = 0; index < info.stores_generics.size(); ++index) {
         // TODO: Properly resolve attribute issues
-        if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) {
+        if (info.stores_generics[index] || StageInitializesVaryings()) {
             DefineGenericOutput(index, program.invocations);
         }
     }
@@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
     DefineConstants();
 }
 
+bool EmitContext::StageInitializesVaryings() const noexcept {
+    switch (stage) {
+    case Stage::VertexA:
+    case Stage::VertexB:
+    case Stage::Geometry:
+        return true;
+    default:
+        return false;
+    }
+}
+
 void EmitContext::SetupExtensions() {
     if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
         header += "#extension GL_EXT_texture_shadow_lod : enable\n";
@@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() {
         header += "#extension GL_NV_shader_atomic_float : enable\n";
     }
     if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
-        header += "#extension NV_shader_atomic_fp16_vector : enable\n";
+        header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
     }
     if (info.uses_fp16) {
         if (profile.support_gl_nv_gpu_shader_5) {
@@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() {
     if (info.stores_viewport_mask && profile.support_viewport_mask) {
         header += "#extension GL_NV_viewport_array2 : enable\n";
     }
-    if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) {
+    if (info.uses_typeless_image_reads) {
         header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
     }
     if (info.uses_derivatives && profile.support_gl_derivative_control) {
@@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
                     "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
                     "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
     }
-    write_func += "}";
-    write_func_64 += "}";
-    write_func_128 += "}";
+    write_func += '}';
+    write_func_64 += '}';
+    write_func_128 += '}';
     load_func += "return 0u;}";
     load_func_64 += "return uvec2(0);}";
     load_func_128 += "return uvec4(0);}";
@@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) {
     for (const auto& desc : info.image_buffer_descriptors) {
         image_buffers.push_back({bindings.image, desc.count});
         const auto format{ImageFormatString(desc.format)};
+        const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
-        header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image,
-                              format, bindings.image, array_decorator);
+        header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
+                              bindings.image, format, qualifier, bindings.image, array_decorator);
         bindings.image += desc.count;
     }
     images.reserve(info.image_descriptors.size());
@@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) {
         images.push_back({bindings.image, desc.count});
         const auto format{ImageFormatString(desc.format)};
         const auto image_type{ImageType(desc.type)};
-        const auto qualifier{desc.is_written ? "" : "readonly "};
+        const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
         header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
                               qualifier, image_type, bindings.image, array_decorator);
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h
index 8fa87c02cb..4a50556e14 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.h
+++ b/src/shader_recompiler/backend/glsl/emit_context.h
@@ -136,6 +136,8 @@ public:
         code += '\n';
     }
 
+    [[nodiscard]] bool StageInitializesVaryings() const noexcept;
+
     std::string header;
     std::string code;
     VarAlloc var_alloc;
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index edeecc26e1..a241d18fed 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
         ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
         break;
     case IR::Attribute::FogCoordinate:
-        ctx.Add("gl_FogFragCoord.x={};", value);
+        ctx.Add("gl_FogFragCoord={};", value);
         break;
     case IR::Attribute::ClipDistance0:
     case IR::Attribute::ClipDistance1:
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
index cfef58d79d..59ca52f07f 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
@@ -10,6 +10,17 @@
 #include "shader_recompiler/frontend/ir/value.h"
 
 namespace Shader::Backend::GLSL {
+namespace {
+void InitializeVaryings(EmitContext& ctx) {
+    ctx.Add("gl_Position=vec4(0,0,0,1);");
+    // TODO: Properly resolve attribute issues
+    for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
+        if (!ctx.info.stores_generics[index]) {
+            ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
+        }
+    }
+}
+} // Anonymous namespace
 
 void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
     const size_t num_args{phi.NumArgs()};
@@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
 }
 
 void EmitPrologue(EmitContext& ctx) {
-    if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) {
-        ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);");
-        // TODO: Properly resolve attribute issues
-        for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
-            if (!ctx.info.stores_generics[index]) {
-                ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
-            }
-        }
+    if (ctx.StageInitializesVaryings()) {
+        InitializeVaryings(ctx);
     }
 }
 
@@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {}
 
 void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
     ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
+    InitializeVaryings(ctx);
 }
 
 void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index e9098239da..737f186abb 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -312,11 +312,14 @@ public:
     }
 
     u32 Add(const ImageBufferDescriptor& desc) {
-        return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
+        const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
             return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
                    desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
                    desc.size_shift == existing.size_shift;
-        });
+        })};
+        image_buffer_descriptors[index].is_written |= desc.is_written;
+        image_buffer_descriptors[index].is_read |= desc.is_read;
+        return index;
     }
 
     u32 Add(const TextureDescriptor& desc) {
@@ -339,6 +342,7 @@ public:
                    desc.size_shift == existing.size_shift;
         })};
         image_descriptors[index].is_written |= desc.is_written;
+        image_descriptors[index].is_read |= desc.is_read;
         return index;
     }
 
@@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) {
                 throw NotImplementedException("Unexpected separate sampler");
             }
             const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
+            const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead};
             if (flags.type == TextureType::Buffer) {
                 index = descriptors.Add(ImageBufferDescriptor{
                     .format = flags.image_format,
                     .is_written = is_written,
+                    .is_read = is_read,
                     .cbuf_index = cbuf.index,
                     .cbuf_offset = cbuf.offset,
                     .count = cbuf.count,
@@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) {
                     .type = flags.type,
                     .format = flags.image_format,
                     .is_written = is_written,
+                    .is_read = is_read,
                     .cbuf_index = cbuf.index,
                     .cbuf_offset = cbuf.offset,
                     .count = cbuf.count,
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index e8cfc03af3..a3c412a0fe 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -97,6 +97,8 @@ struct Profile {
     /// Fragment outputs have to be declared even if they are not written to avoid undefined values.
     /// See Ori and the Blind Forest's main menu for reference.
     bool need_declared_frag_colors{};
+    /// Prevents fast math optimizations that may cause inaccuracies
+    bool need_fastmath_off{};
 
     /// OpFClamp is broken and OpFMax + OpFMin should be used instead
     bool has_broken_spirv_clamp{};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 74d7a6a945..e9ebc16a4b 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes
 struct ImageBufferDescriptor {
     ImageFormat format;
     bool is_written;
+    bool is_read;
     u32 cbuf_index;
     u32 cbuf_offset;
     u32 count;
@@ -99,6 +100,7 @@ struct ImageDescriptor {
     TextureType type;
     ImageFormat format;
     bool is_written;
+    bool is_read;
     u32 cbuf_index;
     u32 cbuf_offset;
     u32 count;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index bf08a6d93c..5838fc02fd 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -162,6 +162,7 @@ Device::Device() {
     has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
     has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
     warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+    need_fastmath_off = is_nvidia;
 
     // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
     // uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 0b59c9df04..0c9d6fe311 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -136,6 +136,10 @@ public:
         return warp_size_potentially_larger_than_guest;
     }
 
+    bool NeedsFastmathOff() const {
+        return need_fastmath_off;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestPreciseBug();
@@ -171,6 +175,7 @@ private:
     bool has_amd_shader_half_float{};
     bool has_sparse_texture_2{};
     bool warp_size_potentially_larger_than_guest{};
+    bool need_fastmath_off{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index d27a3cf464..8d11fbc558 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
     std::ranges::transform(infos, stage_infos.begin(),
                            [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
     auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
-        if (device.UseAssemblyShaders()) {
-            for (size_t stage = 0; stage < 5; ++stage) {
-                const auto code{sources[stage]};
-                if (code.empty()) {
-                    continue;
-                }
+        if (!device.UseAssemblyShaders()) {
+            program.handle = glCreateProgram();
+        }
+        for (size_t stage = 0; stage < 5; ++stage) {
+            const auto code{sources[stage]};
+            if (code.empty()) {
+                continue;
+            }
+            if (device.UseAssemblyShaders()) {
                 assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
                 enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
-            }
-        } else {
-            program.handle = glCreateProgram();
-            for (size_t stage = 0; stage < 5; ++stage) {
-                const auto code{sources[stage]};
-                if (code.empty()) {
-                    continue;
-                }
+            } else {
                 AttachShader(Stage(stage), program.handle, code);
             }
-            LinkProgram(program.handle);
         }
-        if (shader_notify) {
-            shader_notify->MarkShaderComplete();
+        if (!device.UseAssemblyShaders()) {
+            LinkProgram(program.handle);
         }
         u32 num_textures{};
         u32 num_images{};
@@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
         if (assembly_shaders && xfb_state) {
             GenerateTransformFeedbackState(*xfb_state);
         }
+        if (shader_notify) {
+            shader_notify->MarkShaderComplete();
+        }
         is_built.store(true, std::memory_order_relaxed);
     }};
     if (thread_worker) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index fedbce2f05..620666622a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
 
           .lower_left_origin_mode = true,
           .need_declared_frag_colors = true,
+          .need_fastmath_off = device.NeedsFastmathOff(),
 
           .has_broken_spirv_clamp = true,
           .has_broken_unsigned_image_offsets = true,
-- 
cgit v1.2.3-70-g09d2