From e35ffbbeb0f85f676416fcb8f0bb0207671f379d Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Sun, 30 May 2021 00:53:26 -0400
Subject: glsl: Implement VOTE for subgroup size potentially larger

---
 .../backend/glsl/emit_context.cpp                  | 12 ++++--
 .../backend/glsl/emit_glsl_warp.cpp                | 43 ++++++++++++++--------
 2 files changed, 36 insertions(+), 19 deletions(-)

(limited to 'src/shader_recompiler/backend')

diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 5456d4e5be..c6325e55f7 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
 
 void EmitContext::SetupExtensions(std::string&) {
     header += "#extension GL_ARB_separate_shader_objects : enable\n";
-    header += "#extension GL_ARB_sparse_texture2 : enable\n";
-    header += "#extension GL_EXT_texture_shadow_lod : enable\n";
-    // header += "#extension GL_ARB_texture_cube_map_array : enable\n";
+    if (stage != Stage::Compute) {
+        // TODO: track this usage
+        header += "#extension GL_ARB_sparse_texture2 : enable\n";
+        header += "#extension GL_EXT_texture_shadow_lod : enable\n";
+    }
     if (info.uses_int64) {
         header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
     }
@@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) {
         info.uses_subgroup_shuffles || info.uses_fswzadd) {
         header += "#extension GL_ARB_shader_ballot : enable\n";
         header += "#extension GL_ARB_shader_group_vote : enable\n";
+        header += "#extension GL_KHR_shader_subgroup_basic : enable\n";
+        if (!info.uses_int64) {
+            header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
+        }
     }
 }
 
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index e462c977c2..8a018acb5d 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in
 }
 
 void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    // TODO:
-    // if (ctx.profile.warp_size_potentially_larger_than_guest) {
-    // }
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+        ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+    }
 }
 
 void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
-    // TODO:
-    // if (ctx.profile.warp_size_potentially_larger_than_guest) {
-    // }
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+        ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+    }
 }
 
 void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    // TODO:
-    // if (ctx.profile.warp_size_potentially_larger_than_guest) {
-    // }
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
+    } else {
+        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
+        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
+        const auto value{fmt::format("({}^{})", ballot, active_mask)};
+        ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+    }
 }
 
 void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
-    // TODO:
-    // if (ctx.profile.warp_size_potentially_larger_than_guest) {
-    // }
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
+    } else {
+        ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred);
+    }
 }
 
 void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
-- 
cgit v1.2.3-70-g09d2