aboutsummaryrefslogtreecommitdiff
path: root/Ryujinx.Graphics/Shader/ShaderHeader.cs
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2019-04-17 20:57:08 -0300
committerjduncanator <1518948+jduncanator@users.noreply.github.com>2019-04-18 09:57:08 +1000
commit6b23a2c125b9c48b5ebea92716004ef68698bb0f (patch)
tree69332df6fbbd8e2bddc522ba682fcc5c7a69e101 /Ryujinx.Graphics/Shader/ShaderHeader.cs
parentb2e88b04a85b41cc60af3485d88c90429e84a218 (diff)
New shader translator implementation (#654)
* Start implementing a new shader translator * Fix shift instructions and a typo * Small refactoring on StructuredProgram, move RemovePhis method to a separate class * Initial geometry shader support * Implement TLD4 * Fix -- There's no negation on FMUL32I * Add constant folding and algebraic simplification optimizations, nits * Some leftovers from constant folding * Avoid cast for constant assignments * Add a branch elimination pass, and misc small fixes * Remove redundant branches, add expression propagation and other improvements on the code * Small leftovers -- add missing break and continue, remove unused properties, other improvements * Add null check to handle empty block cases on block visitor * Add HADD2 and HMUL2 half float shader instructions * Optimize pack/unpack sequences, some fixes related to half float instructions * Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen * Fix copy paste mistake that caused RZ to be ignored on the AST instruction * Add workaround for conditional exit, and fix half float instruction with constant buffer * Add missing 0.0 source for TLDS.LZ variants * Simplify the switch for TLDS.LZ * Texture instructions related fixes * Implement the HFMA instruction, and some misc. fixes * Enable constant folding on UnpackHalf2x16 instructions * Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods * Remove the old shader translator * Remove ShaderDeclInfo and other unused things * Add dual vertex shader support * Add ShaderConfig, used to pass shader type and maximum cbuffer size * Move and rename some instruction enums * Move texture instructions into a separate file * Move operand GetExpression and locals management to OperandManager * Optimize opcode decoding using a simple list and binary search * Add missing condition for do-while on goto elimination * Misc. fixes on texture instructions * Simplify TLDS switch * Address PR feedback, and a nit
Diffstat (limited to 'Ryujinx.Graphics/Shader/ShaderHeader.cs')
-rw-r--r--Ryujinx.Graphics/Shader/ShaderHeader.cs166
1 files changed, 166 insertions, 0 deletions
diff --git a/Ryujinx.Graphics/Shader/ShaderHeader.cs b/Ryujinx.Graphics/Shader/ShaderHeader.cs
new file mode 100644
index 00000000..53abdc56
--- /dev/null
+++ b/Ryujinx.Graphics/Shader/ShaderHeader.cs
@@ -0,0 +1,166 @@
+using Ryujinx.Graphics.Gal;
+using Ryujinx.Graphics.Shader.Decoders;
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ struct OutputMapTarget
+ {
+ public bool Red { get; }
+ public bool Green { get; }
+ public bool Blue { get; }
+ public bool Alpha { get; }
+
+ public bool Enabled => Red || Green || Blue || Alpha;
+
+ public OutputMapTarget(bool red, bool green, bool blue, bool alpha)
+ {
+ Red = red;
+ Green = green;
+ Blue = blue;
+ Alpha = alpha;
+ }
+
+ public bool ComponentEnabled(int component)
+ {
+ switch (component)
+ {
+ case 0: return Red;
+ case 1: return Green;
+ case 2: return Blue;
+ case 3: return Alpha;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(component));
+ }
+ }
+
+ class ShaderHeader
+ {
+ public int SphType { get; }
+
+ public int Version { get; }
+
+ public int ShaderType { get; }
+
+ public bool MrtEnable { get; }
+
+ public bool KillsPixels { get; }
+
+ public bool DoesGlobalStore { get; }
+
+ public int SassVersion { get; }
+
+ public bool DoesLoadOrStore { get; }
+
+ public bool DoesFp64 { get; }
+
+ public int StreamOutMask{ get; }
+
+ public int ShaderLocalMemoryLowSize { get; }
+
+ public int PerPatchAttributeCount { get; }
+
+ public int ShaderLocalMemoryHighSize { get; }
+
+ public int ThreadsPerInputPrimitive { get; }
+
+ public int ShaderLocalMemoryCrsSize { get; }
+
+ public int OutputTopology { get; }
+
+ public int MaxOutputVertexCount { get; }
+
+ public int StoreReqStart { get; }
+ public int StoreReqEnd { get; }
+
+ public OutputMapTarget[] OmapTargets { get; }
+ public bool OmapSampleMask { get; }
+ public bool OmapDepth { get; }
+
+ public ShaderHeader(IGalMemory memory, ulong address)
+ {
+ int commonWord0 = memory.ReadInt32((long)address + 0);
+ int commonWord1 = memory.ReadInt32((long)address + 4);
+ int commonWord2 = memory.ReadInt32((long)address + 8);
+ int commonWord3 = memory.ReadInt32((long)address + 12);
+ int commonWord4 = memory.ReadInt32((long)address + 16);
+
+ SphType = commonWord0.Extract(0, 5);
+
+ Version = commonWord0.Extract(5, 5);
+
+ ShaderType = commonWord0.Extract(10, 4);
+
+ MrtEnable = commonWord0.Extract(14);
+
+ KillsPixels = commonWord0.Extract(15);
+
+ DoesGlobalStore = commonWord0.Extract(16);
+
+ SassVersion = commonWord0.Extract(17, 4);
+
+ DoesLoadOrStore = commonWord0.Extract(26);
+
+ DoesFp64 = commonWord0.Extract(27);
+
+ StreamOutMask = commonWord0.Extract(28, 4);
+
+ ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24);
+
+ PerPatchAttributeCount = commonWord1.Extract(24, 8);
+
+ ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24);
+
+ ThreadsPerInputPrimitive = commonWord2.Extract(24, 8);
+
+ ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24);
+
+ OutputTopology = commonWord3.Extract(24, 4);
+
+ MaxOutputVertexCount = commonWord4.Extract(0, 12);
+
+ StoreReqStart = commonWord4.Extract(12, 8);
+ StoreReqEnd = commonWord4.Extract(24, 8);
+
+ int type2OmapTarget = memory.ReadInt32((long)address + 72);
+ int type2Omap = memory.ReadInt32((long)address + 76);
+
+ OmapTargets = new OutputMapTarget[8];
+
+ for (int offset = 0; offset < OmapTargets.Length * 4; offset += 4)
+ {
+ OmapTargets[offset >> 2] = new OutputMapTarget(
+ type2OmapTarget.Extract(offset + 0),
+ type2OmapTarget.Extract(offset + 1),
+ type2OmapTarget.Extract(offset + 2),
+ type2OmapTarget.Extract(offset + 3));
+ }
+
+ OmapSampleMask = type2Omap.Extract(0);
+ OmapDepth = type2Omap.Extract(1);
+ }
+
+ public int DepthRegister
+ {
+ get
+ {
+ int count = 0;
+
+ for (int index = 0; index < OmapTargets.Length; index++)
+ {
+ for (int component = 0; component < 4; component++)
+ {
+ if (OmapTargets[index].ComponentEnabled(component))
+ {
+ count++;
+ }
+ }
+ }
+
+ //Depth register is always two registers after the last color output.
+ return count + 1;
+ }
+ }
+ }
+} \ No newline at end of file