using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Threading; namespace Ryujinx.Graphics.Gpu.Shader { /// /// Memory cache of shader code. /// class ShaderCache : IDisposable { /// /// Default flags used on the shader translation process. /// public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; private readonly struct TranslatedShader { public readonly CachedShaderStage Shader; public readonly ShaderProgram Program; public TranslatedShader(CachedShaderStage shader, ShaderProgram program) { Shader = shader; Program = program; } } private readonly struct TranslatedShaderVertexPair { public readonly CachedShaderStage VertexA; public readonly CachedShaderStage VertexB; public readonly ShaderProgram Program; public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program) { VertexA = vertexA; VertexB = vertexB; Program = program; } } private readonly GpuContext _context; private readonly ShaderDumper _dumper; private readonly Dictionary _cpPrograms; private readonly Dictionary _gpPrograms; private readonly struct ProgramToSave { public readonly CachedShaderProgram CachedProgram; public readonly IProgram HostProgram; public readonly byte[] BinaryCode; public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode) { CachedProgram = cachedProgram; HostProgram = hostProgram; BinaryCode = binaryCode; } } private Queue _programsToSaveQueue; private readonly ComputeShaderCacheHashTable _computeShaderCache; private readonly ShaderCacheHashTable _graphicsShaderCache; private readonly DiskCacheHostStorage _diskCacheHostStorage; private readonly BackgroundDiskCacheWriter _cacheWriter; /// /// Event for signalling shader cache loading progress. /// public event Action ShaderCacheStateChanged; /// /// Creates a new instance of the shader cache. /// /// GPU context that the shader cache belongs to public ShaderCache(GpuContext context) { _context = context; _dumper = new ShaderDumper(); _cpPrograms = new Dictionary(); _gpPrograms = new Dictionary(); _programsToSaveQueue = new Queue(); string diskCacheTitleId = GetDiskCachePath(); _computeShaderCache = new ComputeShaderCacheHashTable(); _graphicsShaderCache = new ShaderCacheHashTable(); _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId); if (_diskCacheHostStorage.CacheEnabled) { _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage); } } /// /// Gets the path where the disk cache for the current application is stored. /// private static string GetDiskCachePath() { return GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null ? Path.Combine(AppDataManager.GamesDirPath, GraphicsConfig.TitleId, "cache", "shader") : null; } /// /// Processes the queue of shaders that must save their binaries to the disk cache. /// public void ProcessShaderCacheQueue() { // Check to see if the binaries for previously compiled shaders are ready, and save them out. while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave)) { ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false); if (result != ProgramLinkStatus.Incomplete) { if (result == ProgramLinkStatus.Success) { _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary()); } _programsToSaveQueue.Dequeue(); } else { break; } } } /// /// Initialize the cache. /// /// Cancellation token to cancel the shader cache initialization process internal void Initialize(CancellationToken cancellationToken) { if (_diskCacheHostStorage.CacheEnabled) { ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( _context, _graphicsShaderCache, _computeShaderCache, _diskCacheHostStorage, cancellationToken, ShaderCacheStateUpdate); loader.LoadShaders(); int errorCount = loader.ErrorCount; if (errorCount != 0) { Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache."); } } } /// /// Shader cache state update handler. /// /// Current state of the shader cache load process /// Number of the current shader being processed /// Total number of shaders to process private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total) { ShaderCacheStateChanged?.Invoke(state, current, total); } /// /// Gets a compute shader from the cache. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU channel /// Texture pool state /// Compute engine state /// GPU virtual address of the binary shader code /// Compiled compute shader code public CachedShaderProgram GetComputeShader( GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, ulong gpuVa) { if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa)) { return cpShader; } if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode)) { _cpPrograms[gpuVa] = cpShader; return cpShader; } ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa); TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info); IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info); cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); _computeShaderCache.Add(cpShader); EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray); _cpPrograms[gpuVa] = cpShader; return cpShader; } /// /// Updates the shader pipeline state based on the current GPU state. /// /// Current GPU 3D engine state /// Shader pipeline state to be updated /// Current graphics state /// Current GPU channel private void UpdatePipelineInfo( ref ThreedClassState state, ref ProgramPipelineState pipeline, GpuChannelGraphicsState graphicsState, GpuChannel channel) { channel.TextureManager.UpdateRenderTargets(); var rtControl = state.RtControl; var msaaMode = state.RtMsaaMode; pipeline.SamplesCount = msaaMode.SamplesInX() * msaaMode.SamplesInY(); int count = rtControl.UnpackCount(); for (int index = 0; index < Constants.TotalRenderTargets; index++) { int rtIndex = rtControl.UnpackPermutationIndex(index); var colorState = state.RtColorState[rtIndex]; if (index >= count || colorState.Format == 0 || colorState.WidthOrStride == 0) { pipeline.AttachmentEnable[index] = false; pipeline.AttachmentFormats[index] = Format.R8G8B8A8Unorm; } else { pipeline.AttachmentEnable[index] = true; pipeline.AttachmentFormats[index] = colorState.Format.Convert().Format; } } pipeline.DepthStencilEnable = state.RtDepthStencilEnable; pipeline.DepthStencilFormat = pipeline.DepthStencilEnable ? state.RtDepthStencilState.Format.Convert().Format : Format.D24UnormS8Uint; pipeline.VertexBufferCount = Constants.TotalVertexBuffers; pipeline.Topology = graphicsState.Topology; } /// /// Gets a graphics shader program from the shader cache. /// This includes all the specified shader stages. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU state /// Pipeline state /// GPU channel /// Texture pool state /// 3D engine state /// Addresses of the shaders for each stage /// Compiled graphics shader code public CachedShaderProgram GetGraphicsShader( ref ThreedClassState state, ref ProgramPipelineState pipeline, GpuChannel channel, ref GpuChannelPoolState poolState, ref GpuChannelGraphicsState graphicsState, ShaderAddresses addresses) { if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, ref poolState, ref graphicsState, gpShaders, addresses)) { return gpShaders; } if (_graphicsShaderCache.TryFind(channel, ref poolState, ref graphicsState, addresses, out gpShaders, out var cachedGuestCode)) { _gpPrograms[addresses] = gpShaders; return gpShaders; } TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel); ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); ReadOnlySpan addressesSpan = addresses.AsSpan(); TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; TargetApi api = _context.Capabilities.Api; for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { ulong gpuVa = addressesSpan[stageIndex + 1]; if (gpuVa != 0) { GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa); if (nextStage != null) { currentStage.SetNextStage(nextStage); } if (stageIndex == 0 && addresses.VertexA != 0) { translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); } translatorContexts[stageIndex + 1] = currentStage; nextStage = currentStage; } } if (!_context.Capabilities.SupportsGeometryShader) { TryRemoveGeometryStage(translatorContexts); } CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; List shaderSources = new List(); TranslatorContext previousStage = null; ShaderInfoBuilder infoBuilder = new ShaderInfoBuilder(_context, transformFeedbackDescriptors != null); for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { TranslatorContext currentStage = translatorContexts[stageIndex + 1]; if (currentStage != null) { ShaderProgram program; if (stageIndex == 0 && translatorContexts[0] != null) { TranslatedShaderVertexPair translatedShader = TranslateShader( _dumper, channel, currentStage, translatorContexts[0], cachedGuestCode.VertexACode, cachedGuestCode.VertexBCode); shaders[0] = translatedShader.VertexA; shaders[1] = translatedShader.VertexB; program = translatedShader.Program; } else { byte[] code = cachedGuestCode.GetByIndex(stageIndex); TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); shaders[stageIndex + 1] = translatedShader.Shader; program = translatedShader.Program; } if (program != null) { shaderSources.Add(CreateShaderSource(program)); infoBuilder.AddStageInfo(program.Info); } previousStage = currentStage; } else if ( previousStage != null && previousStage.LayerOutputWritten && stageIndex == 3 && !_context.Capabilities.SupportsLayerVertexTessellation) { shaderSources.Add(CreateShaderSource(previousStage.GenerateGeometryPassthrough())); } } ShaderSource[] shaderSourcesArray = shaderSources.ToArray(); ShaderInfo info = infoBuilder.Build(pipeline); IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info); gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); _graphicsShaderCache.Add(gpShaders); EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); _gpPrograms[addresses] = gpShaders; return gpShaders; } /// /// Tries to eliminate the geometry stage from the array of translator contexts. /// /// Array of translator contexts public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts) { if (translatorContexts[4] != null) { // We have a geometry shader, but geometry shaders are not supported. // Try to eliminate the geometry shader. ShaderProgramInfo info = translatorContexts[4].Translate().Info; if (info.Identification == ShaderIdentification.GeometryLayerPassthrough) { // We managed to identify that this geometry shader is only used to set the output Layer value, // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it. for (int i = 3; i >= 1; i--) { if (translatorContexts[i] != null) { translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute); translatorContexts[i].SetLastInVertexPipeline(); break; } } translatorContexts[4] = null; } } } /// /// Creates a shader source for use with the backend from a translated shader program. /// /// Translated shader program /// Shader source public static ShaderSource CreateShaderSource(ShaderProgram program) { return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language); } /// /// Puts a program on the queue of programs to be saved on the disk cache. /// /// /// This will not do anything if disk shader cache is disabled. /// /// Cached shader program /// Host program /// Source for each shader stage private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources) { if (_diskCacheHostStorage.CacheEnabled) { byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null; ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode); _programsToSaveQueue.Enqueue(programToSave); } } /// /// Gets transform feedback state from the current GPU state. /// /// Current GPU state /// Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state) { bool tfEnable = state.TfEnable; if (!tfEnable) { return null; } TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers]; for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++) { var tf = state.TfState[i]; descs[i] = new TransformFeedbackDescriptor( tf.BufferIndex, tf.Stride, tf.VaryingsCount, ref state.TfVaryingLocations[i]); } return descs; } /// /// Checks if compute shader code in memory is equal to the cached shader. /// /// GPU channel using the shader /// GPU channel state to verify shader compatibility /// GPU channel compute state to verify shader compatibility /// Cached compute shader /// GPU virtual address of the shader code in memory /// True if the code is different, false otherwise private static bool IsShaderEqual( GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, CachedShaderProgram cpShader, ulong gpuVa) { if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) { return cpShader.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true); } return false; } /// /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. /// /// GPU channel using the shader /// GPU channel state to verify shader compatibility /// GPU channel graphics state to verify shader compatibility /// Cached graphics shaders /// GPU virtual addresses of all enabled shader stages /// True if the code is different, false otherwise private static bool IsShaderEqual( GpuChannel channel, ref GpuChannelPoolState poolState, ref GpuChannelGraphicsState graphicsState, CachedShaderProgram gpShaders, ShaderAddresses addresses) { ReadOnlySpan addressesSpan = addresses.AsSpan(); for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++) { CachedShaderStage shader = gpShaders.Shaders[stageIndex]; ulong gpuVa = addressesSpan[stageIndex]; if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa)) { return false; } } bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false; return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true); } /// /// Checks if the code of the specified cached shader is different from the code in memory. /// /// Memory manager used to access the GPU memory where the shader is located /// Cached shader to compare with /// GPU virtual address of the binary shader code /// True if the code is different, false otherwise private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa) { if (shader == null) { return true; } ReadOnlySpan memoryCode = memoryManager.GetSpanMapped(gpuVa, shader.Code.Length); return memoryCode.SequenceEqual(shader.Code); } /// /// Decode the binary Maxwell shader code to a translator context. /// /// GPU state accessor /// Graphics API that will be used with the shader /// GPU virtual address of the binary shader code /// The generated translator context public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, TargetApi api, ulong gpuVa) { var options = CreateTranslationOptions(api, DefaultFlags | TranslationFlags.Compute); return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Decode the binary Maxwell shader code to a translator context. /// /// /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// /// GPU state accessor /// Graphics API that will be used with the shader /// Flags that controls shader translation /// GPU virtual address of the shader code /// The generated translator context public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TargetApi api, TranslationFlags flags, ulong gpuVa) { var options = CreateTranslationOptions(api, flags); return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper /// GPU channel using the shader /// Translator context of the stage to be translated /// Optional translator context of the shader that should be combined /// Optional Maxwell binary code of the Vertex A shader, if present /// Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache /// Compiled graphics shader code private static TranslatedShaderVertexPair TranslateShader( ShaderDumper dumper, GpuChannel channel, TranslatorContext currentStage, TranslatorContext vertexA, byte[] codeA, byte[] codeB) { ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); var memoryManager = channel.MemoryManager; codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray(); byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray(); ShaderDumpPaths pathsA = default; ShaderDumpPaths pathsB = default; if (dumper != null) { pathsA = dumper.Dump(codeA, compute: false); pathsB = dumper.Dump(codeB, compute: false); } ShaderProgram program = currentStage.Translate(vertexA); pathsB.Prepend(program); pathsA.Prepend(program); CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA); CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB); return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper /// GPU channel using the shader /// Translator context of the stage to be translated /// Optional Maxwell binary code of the current stage shader, if present on cache /// Compiled graphics shader code private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) { var memoryManager = channel.MemoryManager; ulong cb1DataAddress = context.Stage == ShaderStage.Compute ? channel.BufferManager.GetComputeUniformBufferAddress(1) : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1); byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray(); code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; ShaderProgram program = context.Translate(); paths.Prepend(program); return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program); } /// /// Gets the index of a stage from a . /// /// Stage to get the index from /// Stage index private static int StageToStageIndex(ShaderStage stage) { return stage switch { ShaderStage.TessellationControl => 1, ShaderStage.TessellationEvaluation => 2, ShaderStage.Geometry => 3, ShaderStage.Fragment => 4, _ => 0 }; } /// /// Creates shader translation options with the requested graphics API and flags. /// The shader language is choosen based on the current configuration and graphics API. /// /// Target graphics API /// Translation flags /// Translation options private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags) { TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan ? TargetLanguage.Spirv : TargetLanguage.Glsl; return new TranslationOptions(lang, api, flags); } /// /// Disposes the shader cache, deleting all the cached shaders. /// It's an error to use the shader cache after disposal. /// public void Dispose() { foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms()) { program.Dispose(); } foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms()) { program.Dispose(); } _cacheWriter?.Dispose(); } } }