using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Gpu.Shader;
using Ryujinx.Graphics.Shader;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Gpu.Engine.Compute
{
///
/// Represents a compute engine class.
///
class ComputeClass : IDeviceState
{
private readonly GpuContext _context;
private readonly GpuChannel _channel;
private readonly ThreedClass _3dEngine;
private readonly DeviceState _state;
private readonly InlineToMemoryClass _i2mClass;
///
/// Creates a new instance of the compute engine class.
///
/// GPU context
/// GPU channel
/// 3D engine
public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
{
_context = context;
_channel = channel;
_3dEngine = threedEngine;
_state = new DeviceState(new Dictionary
{
{ nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
{ nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
{ nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }
});
_i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
}
///
/// Reads data from the class registers.
///
/// Register byte offset
/// Data at the specified offset
public int Read(int offset) => _state.Read(offset);
///
/// Writes data to the class registers.
///
/// Register byte offset
/// Data to be written
public void Write(int offset, int data) => _state.Write(offset, data);
///
/// Launches the Inline-to-Memory DMA copy operation.
///
/// Method call argument
private void LaunchDma(int argument)
{
_i2mClass.LaunchDma(ref Unsafe.As(ref _state.State), argument);
}
///
/// Pushes a block of data to the Inline-to-Memory engine.
///
/// Data to push
public void LoadInlineData(ReadOnlySpan data)
{
_i2mClass.LoadInlineData(data);
}
///
/// Pushes a word of data to the Inline-to-Memory engine.
///
/// Method call argument
private void LoadInlineData(int argument)
{
_i2mClass.LoadInlineData(argument);
}
///
/// Performs the compute dispatch operation.
///
/// Method call argument
private void SendSignalingPcasB(int argument)
{
var memoryManager = _channel.MemoryManager;
// Since we're going to change the state, make sure any pending instanced draws are done.
_3dEngine.PerformDeferredDraws();
// Make sure all pending uniform buffer data is written to memory.
_3dEngine.FlushUboDirty();
uint qmdAddress = _state.State.SendPcasA;
var qmd = _channel.MemoryManager.Read((ulong)qmdAddress << 8);
ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
shaderGpuVa += (uint)qmd.ProgramOffset;
int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
{
if (!qmd.ConstantBufferValid(index))
{
continue;
}
ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
ulong size = (ulong)qmd.ConstantBufferSize(index);
_channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
}
ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
GpuChannelPoolState poolState = new GpuChannelPoolState(
texturePoolGpuVa,
_state.State.SetTexHeaderPoolCMaximumIndex,
_state.State.SetBindlessTextureConstantBufferSlotSelect);
GpuChannelComputeState computeState = new GpuChannelComputeState(
qmd.CtaThreadDimension0,
qmd.CtaThreadDimension1,
qmd.CtaThreadDimension2,
localMemorySize,
sharedMemorySize,
_channel.BufferManager.HasUnalignedStorageBuffers);
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
_channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
_channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
_channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
ShaderProgramInfo info = cs.Shaders[0].Info;
bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot);
sbDescAddress += (ulong)sb.SbCbOffset * 4;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read(sbDescAddress);
uint size;
if (sb.SbCbSlot == Constants.DriverReservedUniformBuffer)
{
// Only trust the SbDescriptor size if it comes from slot 0.
size = (uint)sbDescriptor.Size;
}
else
{
// TODO: Use full mapped size and somehow speed up buffer sync.
size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), Constants.MaxUnknownStorageSize);
}
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags);
}
if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
{
// Refetch the shader, as assumptions about storage buffer alignment have changed.
cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
info = cs.Shaders[0].Info;
}
_channel.BufferManager.SetComputeBufferBindings(cs.Bindings);
_channel.TextureManager.SetComputeBindings(cs.Bindings);
// Should never return false for mismatching spec state, since the shader was fetched above.
_channel.TextureManager.CommitComputeBindings(cs.SpecializationState);
_channel.BufferManager.CommitComputeBindings();
_context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
_3dEngine.ForceShaderUpdate();
}
}
}