aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormpnico <mpnico@gmail.com>2021-08-26 23:50:28 +0200
committerGitHub <noreply@github.com>2021-08-26 23:50:28 +0200
commit8e1adb95cf7f67b976f105f4cac26d3ff2986057 (patch)
treef56ee0f92495fc1bd1e307c3bd51a2d1240d197b
parent5cab8ea4ad2388bd035150e79f241ae5df95ab3b (diff)
Add support for HLE macros and accelerate MultiDrawElementsIndirectCount #2 (#2557)
* Add support for HLE macros and accelerate MultiDrawElementsIndirectCount * Add missing barrier * Fix index buffer count * Add support check for each macro hle before use * Add missing xml doc Co-authored-by: gdkchan <gab.dark.100@gmail.com>
-rw-r--r--Ryujinx.Common/Hash128.cs6
-rw-r--r--Ryujinx.Graphics.GAL/Capabilities.cs3
-rw-r--r--Ryujinx.Graphics.GAL/IPipeline.cs5
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs9
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs30
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs19
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs29
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs51
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs142
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs11
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs89
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs8
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs8
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs59
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs21
-rw-r--r--Ryujinx.Graphics.Gpu/GraphicsConfig.cs5
-rw-r--r--Ryujinx.Graphics.Gpu/Memory/BufferCache.cs12
-rw-r--r--Ryujinx.Graphics.OpenGL/HwCapabilities.cs2
-rw-r--r--Ryujinx.Graphics.OpenGL/Pipeline.cs58
-rw-r--r--Ryujinx.Graphics.OpenGL/Renderer.cs1
-rw-r--r--Ryujinx.Graphics.OpenGL/VertexArray.cs22
22 files changed, 552 insertions, 40 deletions
diff --git a/Ryujinx.Common/Hash128.cs b/Ryujinx.Common/Hash128.cs
index 99cd015c..04457bd0 100644
--- a/Ryujinx.Common/Hash128.cs
+++ b/Ryujinx.Common/Hash128.cs
@@ -9,6 +9,12 @@ namespace Ryujinx.Common
public ulong Low;
public ulong High;
+ public Hash128(ulong low, ulong high)
+ {
+ Low = low;
+ High = high;
+ }
+
public override string ToString()
{
return $"{High:x16}{Low:x16}";
diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs
index 937c3f5b..78a99554 100644
--- a/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.GAL
public bool SupportsNonConstantTextureOffset { get; }
public bool SupportsTextureShadowLod { get; }
public bool SupportsViewportSwizzle { get; }
+ public bool SupportsIndirectParameters { get; }
public int MaximumComputeSharedMemorySize { get; }
public float MaximumSupportedAnisotropy { get; }
@@ -25,6 +26,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsNonConstantTextureOffset,
bool supportsTextureShadowLod,
bool supportsViewportSwizzle,
+ bool supportsIndirectParameters,
int maximumComputeSharedMemorySize,
float maximumSupportedAnisotropy,
int storageBufferOffsetAlignment)
@@ -37,6 +39,7 @@ namespace Ryujinx.Graphics.GAL
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportSwizzle = supportsViewportSwizzle;
+ SupportsIndirectParameters = supportsIndirectParameters;
MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs
index b2f9d5cb..a5af6391 100644
--- a/Ryujinx.Graphics.GAL/IPipeline.cs
+++ b/Ryujinx.Graphics.GAL/IPipeline.cs
@@ -19,6 +19,8 @@ namespace Ryujinx.Graphics.GAL
int stencilValue,
int stencilMask);
+ void CommandBufferBarrier();
+
void CopyBuffer(BufferHandle source, BufferHandle destination, int srcOffset, int dstOffset, int size);
void DispatchCompute(int groupsX, int groupsY, int groupsZ);
@@ -33,6 +35,9 @@ namespace Ryujinx.Graphics.GAL
void EndTransformFeedback();
+ void MultiDrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride);
+ void MultiDrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride);
+
void SetAlphaTest(bool enable, float reference, CompareOp op);
void SetBlendState(int index, BlendDescriptor blend);
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
index 28822f4e..fe49b0f2 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
@@ -161,6 +161,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <param name="argument">Method call argument</param>
public void SetReference(int argument)
{
+ _context.Renderer.Pipeline.CommandBufferBarrier();
+
_context.CreateHostSyncIfNeeded();
}
@@ -195,10 +197,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// Pushes an argument to a macro.
/// </summary>
/// <param name="index">Index of the macro</param>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="argument">Argument to be pushed to the macro</param>
- public void MmePushArgument(int index, int argument)
+ public void MmePushArgument(int index, ulong gpuVa, int argument)
{
- _macros[index].PushArgument(argument);
+ _macros[index].PushArgument(gpuVa, argument);
}
/// <summary>
@@ -208,7 +211,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <param name="argument">Initial argument passed to the macro</param>
public void MmeStart(int index, int argument)
{
- _macros[index].StartExecution(argument);
+ _macros[index].StartExecution(_context, _parent, _macroCode, argument);
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
index ada3bc4b..b3de738d 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
@@ -54,11 +54,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Fetch the command buffer.
/// </summary>
- public void Fetch(MemoryManager memoryManager)
+ /// <param name="flush">If true, flushes potential GPU written data before reading the command buffer</param>
+ public void Fetch(MemoryManager memoryManager, bool flush = true)
{
if (Words == null)
{
- Words = MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, true)).ToArray();
+ Words = MemoryMarshal.Cast<byte, int>(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush)).ToArray();
}
}
}
@@ -73,6 +74,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
private readonly AutoResetEvent _event;
private bool _interrupt;
+ private int _flushSkips;
/// <summary>
/// Creates a new instance of the GPU General Purpose FIFO device.
@@ -188,8 +190,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
// Process command buffers.
while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
{
+ bool flushCommandBuffer = true;
+
+ if (_flushSkips != 0)
+ {
+ _flushSkips--;
+ flushCommandBuffer = false;
+ }
+
_currentCommandBuffer = entry;
- _currentCommandBuffer.Fetch(entry.Processor.MemoryManager);
+ _currentCommandBuffer.Fetch(entry.Processor.MemoryManager, flushCommandBuffer);
// If we are changing the current channel,
// we need to force all the host state to be updated.
@@ -199,13 +209,25 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
entry.Processor.ForceAllDirty();
}
- entry.Processor.Process(_currentCommandBuffer.Words);
+ entry.Processor.Process(entry.EntryAddress, _currentCommandBuffer.Words);
}
_interrupt = false;
}
/// <summary>
+ /// Sets the number of flushes that should be skipped for subsequent command buffers.
+ /// </summary>
+ /// <remarks>
+ /// This can improve performance when command buffer data only needs to be consumed by the GPU.
+ /// </remarks>
+ /// <param name="count">The amount of flushes that should be skipped</param>
+ internal void SetFlushSkips(int count)
+ {
+ _flushSkips = count;
+ }
+
+ /// <summary>
/// Interrupts command processing. This will break out of the DispatchCalls loop.
/// </summary>
public void Interrupt()
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
index ea34d6cd..096b795c 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs
@@ -29,6 +29,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
public MemoryManager MemoryManager => _channel.MemoryManager;
/// <summary>
+ /// 3D Engine.
+ /// </summary>
+ public ThreedClass ThreedClass => _3dClass;
+
+ /// <summary>
/// Internal GPFIFO state.
/// </summary>
private struct DmaState
@@ -70,13 +75,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Processes a command buffer.
/// </summary>
+ /// <param name="baseGpuVa">Base GPU virtual address of the command buffer</param>
/// <param name="commandBuffer">Command buffer</param>
- public void Process(ReadOnlySpan<int> commandBuffer)
+ public void Process(ulong baseGpuVa, ReadOnlySpan<int> commandBuffer)
{
for (int index = 0; index < commandBuffer.Length; index++)
{
int command = commandBuffer[index];
+ ulong gpuVa = baseGpuVa + (ulong)index * 4;
+
if (_state.MethodCount != 0)
{
if (TryFastI2mBufferUpdate(commandBuffer, ref index))
@@ -84,7 +92,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
continue;
}
- Send(_state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
+ Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1);
if (!_state.NonIncrementing)
{
@@ -120,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
_state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod;
break;
case SecOp.ImmdDataMethod:
- Send(meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
+ Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true);
break;
}
}
@@ -198,8 +206,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
/// <summary>
/// Sends a uncompressed method for processing by the graphics pipeline.
/// </summary>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="meth">Method to be processed</param>
- private void Send(int offset, int argument, int subChannel, bool isLastCall)
+ private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall)
{
if (offset < 0x60)
{
@@ -243,7 +252,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
if ((offset & 1) != 0)
{
- _fifoClass.MmePushArgument(macroIndex, argument);
+ _fifoClass.MmePushArgument(macroIndex, gpuVa, argument);
}
else
{
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
index b957de08..640687f0 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs
@@ -5,6 +5,33 @@ using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
+ /// FIFO word.
+ /// </summary>
+ struct FifoWord
+ {
+ /// <summary>
+ /// GPU virtual address where the word is located in memory.
+ /// </summary>
+ public ulong GpuVa { get; }
+
+ /// <summary>
+ /// Word value.
+ /// </summary>
+ public int Word { get; }
+
+ /// <summary>
+ /// Creates a new FIFO word.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the word is located in memory</param>
+ /// <param name="word">Word value</param>
+ public FifoWord(ulong gpuVa, int word)
+ {
+ GpuVa = gpuVa;
+ Word = word;
+ }
+ }
+
+ /// <summary>
/// Macro Execution Engine interface.
/// </summary>
interface IMacroEE
@@ -12,7 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
- Queue<int> Fifo { get; }
+ Queue<FifoWord> Fifo { get; }
/// <summary>
/// Should execute the GPU Macro code being passed.
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
index 1a79afb9..9d1dbc8f 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs
@@ -1,4 +1,6 @@
using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using Ryujinx.Graphics.Gpu.Memory;
using System;
namespace Ryujinx.Graphics.Gpu.Engine.MME
@@ -13,10 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// </summary>
public int Position { get; }
+ private IMacroEE _executionEngine;
private bool _executionPending;
private int _argument;
-
- private readonly IMacroEE _executionEngine;
+ private MacroHLEFunctionName _hleFunction;
/// <summary>
/// Creates a new instance of the GPU cached macro program.
@@ -26,28 +28,47 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
{
Position = position;
+ _executionEngine = null;
_executionPending = false;
_argument = 0;
-
- if (GraphicsConfig.EnableMacroJit)
- {
- _executionEngine = new MacroJit();
- }
- else
- {
- _executionEngine = new MacroInterpreter();
- }
+ _hleFunction = MacroHLEFunctionName.None;
}
/// <summary>
/// Sets the first argument for the macro call.
/// </summary>
+ /// <param name="context">GPU context where the macro code is being executed</param>
+ /// <param name="processor">GPU GP FIFO command processor</param>
+ /// <param name="code">Code to be executed</param>
/// <param name="argument">First argument</param>
- public void StartExecution(int argument)
+ public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan<int> code, int argument)
{
_argument = argument;
_executionPending = true;
+
+ if (_executionEngine == null)
+ {
+ if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction))
+ {
+ _executionEngine = new MacroHLE(processor, _hleFunction);
+ }
+ else if (GraphicsConfig.EnableMacroJit)
+ {
+ _executionEngine = new MacroJit();
+ }
+ else
+ {
+ _executionEngine = new MacroInterpreter();
+ }
+ }
+
+ if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ // We don't consume the parameter buffer value, so we don't need to flush it.
+ // Doing so improves performance if the value was written by a GPU shader.
+ context.GPFifo.SetFlushSkips(2);
+ }
}
/// <summary>
@@ -60,7 +81,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
if (_executionPending)
{
_executionPending = false;
-
_executionEngine?.Execute(code.Slice(Position), state, _argument);
}
}
@@ -68,10 +88,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Pushes an argument to the macro call argument FIFO.
/// </summary>
+ /// <param name="gpuVa">GPU virtual address where the command word is located</param>
/// <param name="argument">Argument to be pushed</param>
- public void PushArgument(int argument)
+ public void PushArgument(ulong gpuVa, int argument)
{
- _executionEngine?.Fifo.Enqueue(argument);
+ _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument));
}
}
}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
new file mode 100644
index 00000000..77b44e81
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs
@@ -0,0 +1,142 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.GPFifo;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Macro High-level emulation.
+ /// </summary>
+ class MacroHLE : IMacroEE
+ {
+ private readonly GPFifoProcessor _processor;
+ private readonly MacroHLEFunctionName _functionName;
+
+ /// <summary>
+ /// Arguments FIFO.
+ /// </summary>
+ public Queue<FifoWord> Fifo { get; }
+
+ /// <summary>
+ /// Creates a new instance of the HLE macro handler.
+ /// </summary>
+ /// <param name="context">GPU context the macro is being executed on</param>
+ /// <param name="memoryManager">GPU memory manager</param>
+ /// <param name="engine">3D engine where this macro is being called</param>
+ /// <param name="functionName">Name of the HLE macro function to be called</param>
+ public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName)
+ {
+ _processor = processor;
+ _functionName = functionName;
+
+ Fifo = new Queue<FifoWord>();
+ }
+
+ /// <summary>
+ /// Executes a macro program until it exits.
+ /// </summary>
+ /// <param name="code">Code of the program to execute</param>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">Optional argument passed to the program, 0 if not used</param>
+ public void Execute(ReadOnlySpan<int> code, IDeviceState state, int arg0)
+ {
+ switch (_functionName)
+ {
+ case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
+ MultiDrawElementsIndirectCount(state, arg0);
+ break;
+ default:
+ throw new NotImplementedException(_functionName.ToString());
+ }
+ }
+
+ /// <summary>
+ /// Performs a indirect multi-draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="state">GPU state at the time of the call</param>
+ /// <param name="arg0">First argument of the call</param>
+ private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0)
+ {
+ int arg1 = FetchParam().Word;
+ int arg2 = FetchParam().Word;
+ int arg3 = FetchParam().Word;
+
+ int startOffset = arg0;
+ int endOffset = arg1;
+ var topology = (PrimitiveTopology)arg2;
+ int paddingWords = arg3;
+ int maxDrawCount = endOffset - startOffset;
+ int stride = paddingWords * 4 + 0x14;
+ int indirectBufferSize = maxDrawCount * stride;
+
+ ulong parameterBufferGpuVa = FetchParam().GpuVa;
+ ulong indirectBufferGpuVa = 0;
+
+ int indexCount = 0;
+
+ for (int i = 0; i < maxDrawCount; i++)
+ {
+ var count = FetchParam();
+ var instanceCount = FetchParam();
+ var firstIndex = FetchParam();
+ var baseVertex = FetchParam();
+ var baseInstance = FetchParam();
+
+ if (i == 0)
+ {
+ indirectBufferGpuVa = count.GpuVa;
+ }
+
+ indexCount = Math.Max(indexCount, count.Word + firstIndex.Word);
+
+ if (i != maxDrawCount - 1)
+ {
+ for (int j = 0; j < paddingWords; j++)
+ {
+ FetchParam();
+ }
+ }
+ }
+
+ // It should be empty at this point, but clear it just to be safe.
+ Fifo.Clear();
+
+ var parameterBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, parameterBufferGpuVa, 4);
+ var indirectBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, indirectBufferGpuVa, (ulong)indirectBufferSize);
+
+ _processor.ThreedClass.MultiDrawIndirectCount(indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
+
+ /// <summary>
+ /// Fetches a arguments from the arguments FIFO.
+ /// </summary>
+ /// <returns>The call argument, or a 0 value with null address if the FIFO is empty</returns>
+ private FifoWord FetchParam()
+ {
+ if (!Fifo.TryDequeue(out var value))
+ {
+ Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
+
+ return new FifoWord(0UL, 0);
+ }
+
+ return value;
+ }
+
+ /// <summary>
+ /// Performs a GPU method call.
+ /// </summary>
+ /// <param name="state">Current GPU state</param>
+ /// <param name="methAddr">Address, in words, of the method</param>
+ /// <param name="value">Call argument</param>
+ private static void Send(IDeviceState state, int methAddr, int value)
+ {
+ state.Write(methAddr * 4, value);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
new file mode 100644
index 00000000..60354a9b
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Name of the High-level implementation of a Macro function.
+ /// </summary>
+ enum MacroHLEFunctionName
+ {
+ None,
+ MultiDrawElementsIndirectCount
+ }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
new file mode 100644
index 00000000..77d041ad
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs
@@ -0,0 +1,89 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Engine.MME
+{
+ /// <summary>
+ /// Table with information about High-level implementations of GPU Macro code.
+ /// </summary>
+ static class MacroHLETable
+ {
+ /// <summary>
+ /// Macroo High-level implementation table entry.
+ /// </summary>
+ struct TableEntry
+ {
+ /// <summary>
+ /// Name of the Macro function.
+ /// </summary>
+ public MacroHLEFunctionName Name { get; }
+
+ /// <summary>
+ /// Hash of the original binary Macro function code.
+ /// </summary>
+ public Hash128 Hash { get; }
+
+ /// <summary>
+ /// Size (in bytes) of the original binary Macro function code.
+ /// </summary>
+ public int Length { get; }
+
+ /// <summary>
+ /// Creates a new table entry.
+ /// </summary>
+ /// <param name="name">Name of the Macro function</param>
+ /// <param name="hash">Hash of the original binary Macro function code</param>
+ /// <param name="length">Size (in bytes) of the original binary Macro function code</param>
+ public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length)
+ {
+ Name = name;
+ Hash = hash;
+ Length = length;
+ }
+ }
+
+ private static readonly TableEntry[] Table = new TableEntry[]
+ {
+ new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C)
+ };
+
+ private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
+ {
+ if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
+ {
+ return caps.SupportsIndirectParameters;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// Checks if there's a fast, High-level implementation of the specified Macro code available.
+ /// </summary>
+ /// <param name="code">Macro code to be checked</param>
+ /// <param name="caps">Renderer capabilities to check for this macro HLE support</param>
+ /// <param name="name">Name of the function if a implementation is available and supported, otherwise <see cref="MacroHLEFunctionName.None"/></param>
+ /// <returns>True if there is a implementation available and supported, false otherwise</returns>
+ public static bool TryGetMacroHLEFunction(ReadOnlySpan<int> code, Capabilities caps, out MacroHLEFunctionName name)
+ {
+ var mc = MemoryMarshal.Cast<int, byte>(code);
+
+ for (int i = 0; i < Table.Length; i++)
+ {
+ ref var entry = ref Table[i];
+
+ var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length));
+ if (hash == entry.Hash)
+ {
+ name = entry.Name;
+ return IsMacroHLESupported(caps, name);
+ }
+ }
+
+ name = MacroHLEFunctionName.None;
+ return false;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
index 0173a7fb..df6ee040 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs
@@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
- public Queue<int> Fifo { get; }
+ public Queue<FifoWord> Fifo { get; }
private int[] _gprs;
@@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// </summary>
public MacroInterpreter()
{
- Fifo = new Queue<int>();
+ Fifo = new Queue<FifoWord>();
_gprs = new int[8];
}
@@ -364,14 +364,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <returns>The call argument, or 0 if the FIFO is empty</returns>
private int FetchParam()
{
- if (!Fifo.TryDequeue(out int value))
+ if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return 0;
}
- return value;
+ return value.Word;
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
index f0393dd1..4077f74e 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs
@@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
- public Queue<int> Fifo => _context.Fifo;
+ public Queue<FifoWord> Fifo => _context.Fifo;
private MacroJitCompiler.MacroExecute _execute;
diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
index aa31c9ee..52c2a11b 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs
@@ -12,22 +12,22 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <summary>
/// Arguments FIFO.
/// </summary>
- public Queue<int> Fifo { get; } = new Queue<int>();
+ public Queue<FifoWord> Fifo { get; } = new Queue<FifoWord>();
/// <summary>
/// Fetches a arguments from the arguments FIFO.
/// </summary>
- /// <returns></returns>
+ /// <returns>The call argument, or 0 if the FIFO is empty</returns>
public int FetchParam()
{
- if (!Fifo.TryDequeue(out int value))
+ if (!Fifo.TryDequeue(out var value))
{
Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument.");
return 0;
}
- return value;
+ return value.Word;
}
/// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
index d58f175d..2443917c 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs
@@ -26,6 +26,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
private int _instanceIndex;
+ private const int IndexBufferCountMethodOffset = 0x5f8;
+
/// <summary>
/// Creates a new instance of the draw manager.
/// </summary>
@@ -305,6 +307,63 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
}
/// <summary>
+ /// Performs a indirect multi-draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="engine">3D engine where this method is being called</param>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="indirectBuffer">GPU buffer with the draw parameters, such as count, first index, etc</param>
+ /// <param name="parameterBuffer">GPU buffer with the draw count</param>
+ /// <param name="maxDrawCount">Maximum number of draws that can be made</param>
+ /// <param name="stride">Distance in bytes between each element on the <paramref name="indirectBuffer"/> array</param>
+ public void MultiDrawIndirectCount(
+ ThreedClass engine,
+ int indexCount,
+ PrimitiveTopology topology,
+ BufferRange indirectBuffer,
+ BufferRange parameterBuffer,
+ int maxDrawCount,
+ int stride)
+ {
+ engine.Write(IndexBufferCountMethodOffset * 4, indexCount);
+
+ _context.Renderer.Pipeline.SetPrimitiveTopology(topology);
+ _drawState.Topology = topology;
+
+ ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable(
+ _context,
+ _channel.MemoryManager,
+ _state.State.RenderEnableAddress,
+ _state.State.RenderEnableCondition);
+
+ if (renderEnable == ConditionalRenderEnabled.False)
+ {
+ _drawState.DrawIndexed = false;
+ return;
+ }
+
+ _drawState.FirstIndex = _state.State.IndexBufferState.First;
+ _drawState.IndexCount = indexCount;
+
+ engine.UpdateState();
+
+ if (_drawState.DrawIndexed)
+ {
+ _context.Renderer.Pipeline.MultiDrawIndexedIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
+ else
+ {
+ _context.Renderer.Pipeline.MultiDrawIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
+
+ _drawState.DrawIndexed = false;
+
+ if (renderEnable == ConditionalRenderEnabled.Host)
+ {
+ _context.Renderer.Pipeline.EndHostConditionalRendering();
+ }
+ }
+
+ /// <summary>
/// Perform any deferred draws.
/// This is used for instanced draws.
/// Since each instance is a separate draw, we defer the draw and accumulate the instance count.
diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
index 37c8fec2..3d02af96 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs
@@ -1,4 +1,5 @@
using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
using System;
using System.Collections.Generic;
@@ -433,5 +434,25 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
return 0;
}
+
+ /// <summary>
+ /// Performs a indirect multi-draw, with parameters from a GPU buffer.
+ /// </summary>
+ /// <param name="indexCount">Index Buffer Count</param>
+ /// <param name="topology">Primitive topology</param>
+ /// <param name="indirectBuffer">GPU buffer with the draw parameters, such as count, first index, etc</param>
+ /// <param name="parameterBuffer">GPU buffer with the draw count</param>
+ /// <param name="maxDrawCount">Maximum number of draws that can be made</param>
+ /// <param name="stride">Distance in bytes between each element on the <paramref name="indirectBuffer"/> array</param>
+ public void MultiDrawIndirectCount(
+ int indexCount,
+ PrimitiveTopology topology,
+ BufferRange indirectBuffer,
+ BufferRange parameterBuffer,
+ int maxDrawCount,
+ int stride)
+ {
+ _drawManager.MultiDrawIndirectCount(this, indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride);
+ }
}
}
diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
index 7ef102e2..d58b8da7 100644
--- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
+++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
@@ -34,6 +34,11 @@ namespace Ryujinx.Graphics.Gpu
public static bool EnableMacroJit = true;
/// <summary>
+ /// Enables or disables high-level emulation of common GPU Macro code.
+ /// </summary>
+ public static bool EnableMacroHLE = true;
+
+ /// <summary>
/// Title id of the current running game.
/// Used by the shader cache.
/// </summary>
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
index 58dd838e..63d22150 100644
--- a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs
@@ -298,6 +298,18 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
+ /// Gets a buffer sub-range for a given GPU memory range.
+ /// </summary>
+ /// <param name="memoryManager">GPU memory manager where the buffer is mapped</param>
+ /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
+ /// <param name="size">Size in bytes of the buffer</param>
+ /// <returns>The buffer sub-range for the given range</returns>
+ public BufferRange GetGpuBufferRange(MemoryManager memoryManager, ulong gpuVa, ulong size)
+ {
+ return GetBufferRange(TranslateAndCreateBuffer(memoryManager, gpuVa, size), size);
+ }
+
+ /// <summary>
/// Gets a buffer sub-range starting at a given memory address.
/// </summary>
/// <param name="address">Start address of the memory range</param>
diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
index 44365ca7..dd917b7b 100644
--- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
+++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs
@@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.OpenGL
private static readonly Lazy<bool> _supportsSeamlessCubemapPerTexture = new Lazy<bool>(() => HasExtension("GL_ARB_seamless_cubemap_per_texture"));
private static readonly Lazy<bool> _supportsTextureShadowLod = new Lazy<bool>(() => HasExtension("GL_EXT_texture_shadow_lod"));
private static readonly Lazy<bool> _supportsViewportSwizzle = new Lazy<bool>(() => HasExtension("GL_NV_viewport_swizzle"));
+ private static readonly Lazy<bool> _supportsIndirectParameters = new Lazy<bool>(() => HasExtension("GL_ARB_indirect_parameters"));
private static readonly Lazy<int> _maximumComputeSharedMemorySize = new Lazy<int>(() => GetLimit(All.MaxComputeSharedMemorySize));
private static readonly Lazy<int> _storageBufferOffsetAlignment = new Lazy<int>(() => GetLimit(All.ShaderStorageBufferOffsetAlignment));
@@ -46,6 +47,7 @@ namespace Ryujinx.Graphics.OpenGL
public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value;
public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value;
public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value;
+ public static bool SupportsIndirectParameters => _supportsIndirectParameters.Value;
public static bool SupportsMismatchingViewFormat => _gpuVendor.Value != GpuVendor.AmdWindows && _gpuVendor.Value != GpuVendor.IntelWindows;
public static bool SupportsNonConstantTextureOffset => _gpuVendor.Value == GpuVendor.Nvidia;
diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs
index be526fa9..24dd97f9 100644
--- a/Ryujinx.Graphics.OpenGL/Pipeline.cs
+++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs
@@ -166,6 +166,11 @@ namespace Ryujinx.Graphics.OpenGL
}
}
+ public void CommandBufferBarrier()
+ {
+ GL.MemoryBarrier(MemoryBarrierFlags.CommandBarrierBit);
+ }
+
public void CopyBuffer(BufferHandle source, BufferHandle destination, int srcOffset, int dstOffset, int size)
{
Buffer.Copy(source, destination, srcOffset, dstOffset, size);
@@ -543,6 +548,57 @@ namespace Ryujinx.Graphics.OpenGL
_tfEnabled = false;
}
+ public void MultiDrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride)
+ {
+ if (!_program.IsLinked)
+ {
+ Logger.Debug?.Print(LogClass.Gpu, "Draw error, shader not linked.");
+ return;
+ }
+
+ PreDraw();
+
+ GL.BindBuffer((BufferTarget)All.DrawIndirectBuffer, indirectBuffer.Handle.ToInt32());
+ GL.BindBuffer((BufferTarget)All.ParameterBuffer, parameterBuffer.Handle.ToInt32());
+
+ GL.MultiDrawArraysIndirectCount(
+ _primitiveType,
+ (IntPtr)indirectBuffer.Offset,
+ (IntPtr)parameterBuffer.Offset,
+ maxDrawCount,
+ stride);
+
+ PostDraw();
+ }
+
+ public void MultiDrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride)
+ {
+ if (!_program.IsLinked)
+ {
+ Logger.Debug?.Print(LogClass.Gpu, "Draw error, shader not linked.");
+ return;
+ }
+
+ PreDraw();
+
+ _vertexArray.SetRangeOfIndexBuffer();
+
+ GL.BindBuffer((BufferTarget)All.DrawIndirectBuffer, indirectBuffer.Handle.ToInt32());
+ GL.BindBuffer((BufferTarget)All.ParameterBuffer, parameterBuffer.Handle.ToInt32());
+
+ GL.MultiDrawElementsIndirectCount(
+ _primitiveType,
+ (Version46)_elementsType,
+ (IntPtr)indirectBuffer.Offset,
+ (IntPtr)parameterBuffer.Offset,
+ maxDrawCount,
+ stride);
+
+ _vertexArray.RestoreIndexBuffer();
+
+ PostDraw();
+ }
+
public void SetAlphaTest(bool enable, float reference, CompareOp op)
{
if (!enable)
@@ -741,7 +797,7 @@ namespace Ryujinx.Graphics.OpenGL
EnsureVertexArray();
- _vertexArray.SetIndexBuffer(buffer.Handle);
+ _vertexArray.SetIndexBuffer(buffer);
}
public void SetLogicOpState(bool enable, LogicalOp op)
diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs
index 01072176..6b620bb8 100644
--- a/Ryujinx.Graphics.OpenGL/Renderer.cs
+++ b/Ryujinx.Graphics.OpenGL/Renderer.cs
@@ -107,6 +107,7 @@ namespace Ryujinx.Graphics.OpenGL
HwCapabilities.SupportsNonConstantTextureOffset,
HwCapabilities.SupportsTextureShadowLod,
HwCapabilities.SupportsViewportSwizzle,
+ HwCapabilities.SupportsIndirectParameters,
HwCapabilities.MaximumComputeSharedMemorySize,
HwCapabilities.MaximumSupportedAnisotropy,
HwCapabilities.StorageBufferOffsetAlignment);
diff --git a/Ryujinx.Graphics.OpenGL/VertexArray.cs b/Ryujinx.Graphics.OpenGL/VertexArray.cs
index f2fcba1f..bdf14481 100644
--- a/Ryujinx.Graphics.OpenGL/VertexArray.cs
+++ b/Ryujinx.Graphics.OpenGL/VertexArray.cs
@@ -20,12 +20,17 @@ namespace Ryujinx.Graphics.OpenGL
private uint _vertexAttribsInUse;
private uint _vertexBuffersInUse;
+ private BufferRange _indexBuffer;
+ private BufferHandle _tempIndexBuffer;
+
public VertexArray()
{
Handle = GL.GenVertexArray();
_vertexAttribs = new VertexAttribDescriptor[Constants.MaxVertexAttribs];
_vertexBuffers = new VertexBufferDescriptor[Constants.MaxVertexBuffers];
+
+ _tempIndexBuffer = Buffer.Create();
}
public void Bind()
@@ -120,9 +125,22 @@ namespace Ryujinx.Graphics.OpenGL
}
}
- public void SetIndexBuffer(BufferHandle buffer)
+ public void SetIndexBuffer(BufferRange range)
+ {
+ _indexBuffer = range;
+ GL.BindBuffer(BufferTarget.ElementArrayBuffer, range.Handle.ToInt32());
+ }
+
+ public void SetRangeOfIndexBuffer()
+ {
+ Buffer.Resize(_tempIndexBuffer, _indexBuffer.Size);
+ Buffer.Copy(_indexBuffer.Handle, _tempIndexBuffer, _indexBuffer.Offset, 0, _indexBuffer.Size);
+ GL.BindBuffer(BufferTarget.ElementArrayBuffer, _tempIndexBuffer.ToInt32());
+ }
+
+ public void RestoreIndexBuffer()
{
- GL.BindBuffer(BufferTarget.ElementArrayBuffer, buffer.ToInt32());
+ GL.BindBuffer(BufferTarget.ElementArrayBuffer, _indexBuffer.Handle.ToInt32());
}
public void Validate()