From 8d8ce24f20649be639dbb3cc0f3edc90c6a6481e Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Thu, 3 Jun 2021 19:15:36 -0400
Subject: glsl: Implement Load/WriteGlobal

along with some other misc changes and fixes
---
 .../backend/glsl/emit_context.cpp                  | 86 +++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

(limited to 'src/shader_recompiler/backend/glsl/emit_context.cpp')

diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 5048c8b689..f68f332129 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -9,6 +9,14 @@
 
 namespace Shader::Backend::GLSL {
 namespace {
+u32 CbufIndex(u32 offset) {
+    return (offset / 4) % 4;
+}
+
+char OffsetSwizzle(u32 offset) {
+    return "xyzw"[CbufIndex(offset)];
+}
+
 std::string_view InterpDecorator(Interpolation interp) {
     switch (interp) {
     case Interpolation::Smooth:
@@ -382,6 +390,8 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
 }
 
 void EmitContext::DefineHelperFunctions() {
+    header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
+              "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
     if (info.uses_global_increment || info.uses_shared_increment) {
         header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n";
     }
@@ -391,7 +401,7 @@ void EmitContext::DefineHelperFunctions() {
     }
     if (info.uses_atomic_f32_add) {
         header += "uint CasFloatAdd(uint op_a,float op_b){return "
-                  "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n";
+                  "ftou(utof(op_a)+op_b);}\n";
     }
     if (info.uses_atomic_f32x2_add) {
         header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return "
@@ -423,6 +433,80 @@ void EmitContext::DefineHelperFunctions() {
     if (info.uses_atomic_s32_max) {
         header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
     }
+    if (info.uses_global_memory) {
+        std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"};
+        std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"};
+        std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"};
+
+        std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"};
+        std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"};
+        std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"};
+        const size_t num_buffers{info.storage_buffers_descriptors.size()};
+        for (size_t index = 0; index < num_buffers; ++index) {
+            if (!info.nvn_buffer_used[index]) {
+                continue;
+            }
+            const auto& ssbo{info.storage_buffers_descriptors[index]};
+            const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
+            const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
+            const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
+            const auto cbuf_value{fmt::format(
+                "uint64_t {}=packUint2x32(uvec2(ftou({}[{}].{}),ftou({}[{}].{})));", ssbo_addr,
+                cbuf, ssbo.cbuf_offset / 16, OffsetSwizzle(ssbo.cbuf_offset), cbuf,
+                (ssbo.cbuf_offset + 4) / 16, OffsetSwizzle(ssbo.cbuf_offset + 4))};
+
+            write_func += cbuf_value;
+            write_func_64 += cbuf_value;
+            write_func_128 += cbuf_value;
+            load_func += cbuf_value;
+            load_func_64 += cbuf_value;
+            load_func_128 += cbuf_value;
+            const auto ssbo_size{fmt::format("ftou({}[{}].{}),ftou({}[{}].{})", cbuf,
+                                             size_cbuf_offset / 16, OffsetSwizzle(size_cbuf_offset),
+                                             cbuf, (size_cbuf_offset + 4) / 16,
+                                             OffsetSwizzle(size_cbuf_offset + 4))};
+            const auto comparison{fmt::format("if((addr>={})&&(addr<({}+\nuint64_t(uvec2({}))))){{",
+                                              ssbo_addr, ssbo_addr, ssbo_size)};
+            write_func += comparison;
+            write_func_64 += comparison;
+            write_func_128 += comparison;
+            load_func += comparison;
+            load_func_64 += comparison;
+            load_func_128 += comparison;
+
+            const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
+            write_func += fmt::format("{}[uint(addr-{})>>2]=data;return;}}", ssbo_name, ssbo_addr);
+            write_func_64 +=
+                fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}",
+                            ssbo_name, ssbo_addr, ssbo_name, ssbo_addr);
+            write_func_128 +=
+                fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint("
+                            "addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}",
+                            ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr,
+                            ssbo_name, ssbo_addr);
+            load_func += fmt::format("return {}[uint(addr-{})>>2];}}", ssbo_name, ssbo_addr);
+            load_func_64 +=
+                fmt::format("return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}",
+                            ssbo_name, ssbo_addr, ssbo_name, ssbo_addr);
+            load_func_128 += fmt::format("return "
+                                         "uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}["
+                                         "uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}",
+                                         ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name,
+                                         ssbo_addr, ssbo_name, ssbo_addr);
+        }
+        write_func += "}\n";
+        write_func_64 += "}\n";
+        write_func_128 += "}\n";
+        load_func += "return 0u;}\n";
+        load_func_64 += "return uvec2(0);}\n";
+        load_func_128 += "return uvec4(0);}\n";
+        header += write_func;
+        header += write_func_64;
+        header += write_func_128;
+        header += load_func;
+        header += load_func_64;
+        header += load_func_128;
+    }
 }
 
 void EmitContext::SetupImages(Bindings& bindings) {
-- 
cgit v1.2.3-70-g09d2