aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs')
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs541
1 files changed, 541 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..c73c6b2a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
@@ -0,0 +1,541 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum MemoryRegion
+ {
+ Local,
+ Shared
+ }
+
+ public static void Atom(EmitterContext context)
+ {
+ InstAtom op = context.GetOp<InstAtom>();
+
+ int sOffset = (op.Imm20 << 12) >> 12;
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Atoms(EmitterContext context)
+ {
+ InstAtoms op = context.GetOp<InstAtoms>();
+
+ Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
+
+ int sOffset = (op.Imm22 << 10) >> 10;
+
+ offset = context.IAdd(offset, Const(sOffset));
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ AtomSize size = op.AtomsSize switch
+ {
+ AtomsSize.S32 => AtomSize.S32,
+ AtomsSize.U64 => AtomSize.U64,
+ AtomsSize.S64 => AtomSize.S64,
+ _ => AtomSize.U32
+ };
+
+ Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Ldc(EmitterContext context)
+ {
+ InstLdc op = context.GetOp<InstLdc>();
+
+ if (op.LsSize > LsSize2.B64)
+ {
+ context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
+ return;
+ }
+
+ bool isSmallInt = op.LsSize < LsSize2.B32;
+
+ int count = op.LsSize == LsSize2.B64 ? 2 : 1;
+
+ Operand slot = Const(op.CbufSlot);
+ Operand srcA = GetSrcReg(context, op.SrcA);
+
+ if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
+ {
+ slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
+ srcA = context.BitwiseAnd(srcA, Const(0xffff));
+ }
+
+ Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
+ Operand wordOffset = context.ShiftRightU32(addr, Const(2));
+ Operand bitOffset = GetBitOffset(context, addr);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(op.Dest + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand offset = context.IAdd(wordOffset, Const(index));
+ Operand value = context.LoadConstant(slot, offset);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ public static void Ldg(EmitterContext context)
+ {
+ InstLdg op = context.GetOp<InstLdg>();
+
+ EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Ldl(EmitterContext context)
+ {
+ InstLdl op = context.GetOp<InstLdl>();
+
+ EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Lds(EmitterContext context)
+ {
+ InstLds op = context.GetOp<InstLds>();
+
+ EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Red(EmitterContext context)
+ {
+ InstRed op = context.GetOp<InstRed>();
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
+
+ EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
+ }
+
+ public static void Stg(EmitterContext context)
+ {
+ InstStg op = context.GetOp<InstStg>();
+
+ EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Stl(EmitterContext context)
+ {
+ InstStl op = context.GetOp<InstStl>();
+
+ EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Sts(EmitterContext context)
+ {
+ InstSts op = context.GetOp<InstSts>();
+
+ EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ private static Operand EmitAtomicOp(
+ EmitterContext context,
+ StorageKind storageKind,
+ AtomOp op,
+ AtomSize type,
+ Operand addrLow,
+ Operand addrHigh,
+ Operand value)
+ {
+ Operand res = Const(0);
+
+ switch (op)
+ {
+ case AtomOp.Add:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.And:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Xor:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Or:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Max:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Min:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ }
+
+ return res;
+ }
+
+ private static void EmitLoad(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+ Operand value = null;
+
+ switch (region)
+ {
+ case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
+ case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
+ }
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitLdg(
+ EmitterContext context,
+ LsSize size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ bool isSmallInt = size < LsSize.B32;
+
+ int count = GetVectorCount(size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitStore(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+
+ if (isSmallInt && region == MemoryRegion.Local)
+ {
+ Operand word = context.LoadLocal(elemOffset);
+
+ value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
+ }
+
+ if (region == MemoryRegion.Local)
+ {
+ context.StoreLocal(elemOffset, value);
+ }
+ else if (region == MemoryRegion.Shared)
+ {
+ switch (size)
+ {
+ case LsSize2.U8:
+ case LsSize2.S8:
+ context.StoreShared8(baseOffset, value);
+ break;
+ case LsSize2.U16:
+ case LsSize2.S16:
+ context.StoreShared16(baseOffset, value);
+ break;
+ default:
+ context.StoreShared(elemOffset, value);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void EmitStg(
+ EmitterContext context,
+ LsSize2 size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ int count = GetVectorCount((LsSize)size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+
+ Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
+
+ if (size == LsSize2.U8 || size == LsSize2.S8)
+ {
+ context.StoreGlobal8(addrLowOffset, addrHigh, value);
+ }
+ else if (size == LsSize2.U16 || size == LsSize2.S16)
+ {
+ context.StoreGlobal16(addrLowOffset, addrHigh, value);
+ }
+ else
+ {
+ context.StoreGlobal(addrLowOffset, addrHigh, value);
+ }
+ }
+ }
+
+ private static int GetVectorCount(LsSize size)
+ {
+ switch (size)
+ {
+ case LsSize.B64:
+ return 2;
+ case LsSize.B128:
+ case LsSize.UB128:
+ return 4;
+ }
+
+ return 1;
+ }
+
+ private static (Operand, Operand) Get40BitsAddress(
+ EmitterContext context,
+ Register ra,
+ bool extended,
+ int offset)
+ {
+ Operand addrLow = Register(ra);
+ Operand addrHigh;
+
+ if (extended && !ra.IsRZ)
+ {
+ addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
+ }
+ else
+ {
+ addrHigh = Const(0);
+ }
+
+ Operand offs = Const(offset);
+
+ addrLow = context.IAdd(addrLow, offs);
+
+ if (extended)
+ {
+ Operand carry = context.ICompareLessUnsigned(addrLow, offs);
+
+ addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
+ }
+
+ return (addrLow, addrHigh);
+ }
+
+ private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
+ {
+ // Note: bit offset = (baseOffset & 0b11) * 8.
+ // Addresses should be always aligned to the integer type,
+ // so we don't need to take unaligned addresses into account.
+ return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
+ }
+
+ private static Operand ExtractSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand value)
+ {
+ value = context.ShiftRightU32(value, bitOffset);
+
+ switch (size)
+ {
+ case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
+ case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
+ case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
+ case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
+ }
+
+ return value;
+ }
+
+ private static Operand InsertSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand word,
+ Operand value)
+ {
+ switch (size)
+ {
+ case LsSize.U8:
+ case LsSize.S8:
+ value = context.BitwiseAnd(value, Const(0xff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(8));
+ break;
+
+ case LsSize.U16:
+ case LsSize.S16:
+ value = context.BitwiseAnd(value, Const(0xffff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(16));
+ break;
+ }
+
+ return value;
+ }
+ }
+} \ No newline at end of file