diff options
author | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-05-19 01:00:51 -0400 |
---|---|---|
committer | ameerj <52414509+ameerj@users.noreply.github.com> | 2021-07-22 21:51:33 -0400 |
commit | 36d040da7059e438fa35f1a5de5d5aed4cef5ca4 (patch) | |
tree | 304f0b94407b689627c4966212c06dc3e8ad65d5 /src/shader_recompiler/backend/glasm/emit_glasm.cpp | |
parent | 3da7b98d376cc0b8ec00de80755d9e90fc90e3a8 (diff) |
glasm: Implement FSWZADD
Diffstat (limited to 'src/shader_recompiler/backend/glasm/emit_glasm.cpp')
-rw-r--r-- | src/shader_recompiler/backend/glasm/emit_glasm.cpp | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 4fc7d2f2fb..f110fd7f88 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -281,7 +281,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "OPTION NV_shader_atomic_fp16_vector;"; } - if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) { + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_fswzadd) { header += "OPTION NV_shader_thread_group;"; } if (info.uses_subgroup_shuffles) { @@ -416,12 +417,25 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi if (program.local_memory_size > 0) { header += fmt::format("lmem[{}],", program.local_memory_size); } + if (program.info.uses_fswzadd) { + header += "FSWZA[4],FSWZB[4],"; + } header += "RC;" "LONG TEMP "; for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { header += fmt::format("D{},", index); } header += "DC;"; + if (program.info.uses_fswzadd) { + header += "MOV.F FSWZA[0],-1;" + "MOV.F FSWZA[1],1;" + "MOV.F FSWZA[2],-1;" + "MOV.F FSWZA[3],0;" + "MOV.F FSWZB[0],-1;" + "MOV.F FSWZB[1],-1;" + "MOV.F FSWZB[2],1;" + "MOV.F FSWZB[3],-1;"; + } ctx.code.insert(0, header); ctx.code += "END"; return ctx.code; |