From b84d429c2ec59e54a89d9d4e34b0df9f22172e8f Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Mon, 27 Dec 2021 23:59:32 -0500
Subject: glsl_context_get_set: Add alternative cbuf type for broken drivers

some drivers have a bug bitwise converting floating point cbuf values to uint variables. This adds a workaround for these drivers to make all cbufs uint and convert to floating point as needed.
---
 .../backend/glsl/emit_glsl_context_get_set.cpp     | 35 +++++++++++++---------
 .../backend/glsl/glsl_emit_context.cpp             |  7 +++--
 src/shader_recompiler/profile.h                    |  2 ++
 src/video_core/renderer_opengl/gl_device.cpp       |  9 ++----
 src/video_core/renderer_opengl/gl_device.h         |  5 ++++
 src/video_core/renderer_opengl/gl_shader_cache.cpp |  1 +
 6 files changed, 35 insertions(+), 24 deletions(-)

(limited to 'src')

diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 5ef46d6343..0c1fbc7b1f 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
 
 void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                    const IR::Value& offset) {
-    GetCbuf8(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf8(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
-    GetCbuf16(ctx, inst, binding, offset, "ftoi");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
+    GetCbuf16(ctx, inst, binding, offset, cast);
 }
 
 void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
-    GetCbuf(ctx, ret, binding, offset, 32, "ftou");
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                     const IR::Value& offset) {
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
-    GetCbuf(ctx, ret, binding, offset, 32);
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
+    GetCbuf(ctx, ret, binding, offset, 32, cast);
 }
 
 void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
                       const IR::Value& offset) {
     const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
+    const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
     if (offset.IsImmediate()) {
         static constexpr u32 cbuf_size{0x10000};
         const u32 u32_offset{offset.U32()};
@@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
             return;
         }
         if (u32_offset % 2 == 0) {
-            ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16,
+            ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
                          OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
         } else {
-            ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16,
-                         OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16,
-                         OffsetSwizzle(u32_offset + 4));
+            ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
+                         u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
+                         (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
         }
         return;
     }
     const auto offset_var{ctx.var_alloc.Consume(offset)};
     if (!ctx.profile.has_gl_component_indexing_bug) {
-        ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));",
-                     inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var);
+        ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
+                     cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
         return;
     }
     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
     const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
     for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
-        ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset,
-                swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var,
+        ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
+                swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
                 "xyzw"[(swizzle + 1) % 4]);
     }
 }
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index bc9d2a904a..bb7f1a0fd3 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
         return;
     }
     for (const auto& desc : info.constant_buffer_descriptors) {
-        header += fmt::format(
-            "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};",
-            bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024);
+        const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"};
+        header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};",
+                              bindings.uniform_buffer, stage_name, desc.index, cbuf_type,
+                              stage_name, desc.index, 4 * 1024);
         bindings.uniform_buffer += desc.count;
     }
 }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0c3b3b172..9deb3f4bb7 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -65,6 +65,8 @@ struct Profile {
     bool has_gl_component_indexing_bug{};
     /// The precise type qualifier is broken in the fragment stage of some drivers
     bool has_gl_precise_bug{};
+    /// Some drivers do not properly support floatBitsToUint when used on cbufs
+    bool has_gl_cbuf_ftou_bug{};
     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
     bool ignore_nan_fp_comparisons{};
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e01..32736126f6 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,12 @@ Device::Device() {
         shader_backend = Settings::ShaderBackend::GLSL;
     }
 
-    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia &&
-        !Settings::values.renderer_debug) {
+    if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
         const std::string_view driver_version = version.substr(13);
         const int version_major =
             std::atoi(driver_version.substr(0, driver_version.find(".")).data());
-
         if (version_major >= 495) {
-            LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems "
-                                       "with yuzu. Forcing GLASM as a mitigation.");
-            shader_backend = Settings::ShaderBackend::GLASM;
-            use_assembly_shaders = true;
+            has_cbuf_ftou_bug = true;
         }
     }
 
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659d..fe53ef9913 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,10 @@ public:
         return need_fastmath_off;
     }
 
+    bool HasCbufFtouBug() const {
+        return has_cbuf_ftou_bug;
+    }
+
     Settings::ShaderBackend GetShaderBackend() const {
         return shader_backend;
     }
@@ -200,6 +204,7 @@ private:
     bool has_sparse_texture_2{};
     bool warp_size_potentially_larger_than_guest{};
     bool need_fastmath_off{};
+    bool has_cbuf_ftou_bug{};
 
     std::string vendor_name;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 29c6e1a5f3..1efcc35620 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
           .has_broken_fp16_float_controls = false,
           .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
           .has_gl_precise_bug = device.HasPreciseBug(),
+          .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
           .ignore_nan_fp_comparisons = true,
           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
       },
-- 
cgit v1.2.3-70-g09d2