diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs')
-rw-r--r-- | src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs | 725 |
1 files changed, 725 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs new file mode 100644 index 00000000..77fb3ca4 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -0,0 +1,725 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using static Ryujinx.Graphics.Gpu.Shader.ShaderCache; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + class ParallelDiskCacheLoader + { + private const int ThreadCount = 8; + + private readonly GpuContext _context; + private readonly ShaderCacheHashTable _graphicsCache; + private readonly ComputeShaderCacheHashTable _computeCache; + private readonly DiskCacheHostStorage _hostStorage; + private readonly CancellationToken _cancellationToken; + private readonly Action<ShaderCacheState, int, int> _stateChangeCallback; + + /// <summary> + /// Indicates if the cache should be loaded. + /// </summary> + public bool Active => !_cancellationToken.IsCancellationRequested; + + private bool _needsHostRegen; + + /// <summary> + /// Number of shaders that failed to compile from the cache. + /// </summary> + public int ErrorCount { get; private set; } + + /// <summary> + /// Program validation entry. + /// </summary> + private readonly struct ProgramEntry + { + /// <summary> + /// Cached shader program. + /// </summary> + public readonly CachedShaderProgram CachedProgram; + + /// <summary> + /// Optional binary code. If not null, it is used instead of the backend host binary. + /// </summary> + public readonly byte[] BinaryCode; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Indicates if the program is a host binary shader. + /// </summary> + public readonly bool IsBinary; + + /// <summary> + /// Creates a new program validation entry. + /// </summary> + /// <param name="cachedProgram">Cached shader program</param> + /// <param name="binaryCode">Optional binary code. If not null, it is used instead of the backend host binary</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + /// <param name="isBinary">Indicates if the program is a host binary shader</param> + public ProgramEntry( + CachedShaderProgram cachedProgram, + byte[] binaryCode, + int programIndex, + bool isCompute, + bool isBinary) + { + CachedProgram = cachedProgram; + BinaryCode = binaryCode; + ProgramIndex = programIndex; + IsCompute = isCompute; + IsBinary = isBinary; + } + } + + /// <summary> + /// Translated shader compilation entry. + /// </summary> + private readonly struct ProgramCompilation + { + /// <summary> + /// Translated shader stages. + /// </summary> + public readonly ShaderProgram[] TranslatedStages; + + /// <summary> + /// Cached shaders. + /// </summary> + public readonly CachedShaderStage[] Shaders; + + /// <summary> + /// Specialization state. + /// </summary> + public readonly ShaderSpecializationState SpecializationState; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Creates a new translated shader compilation entry. + /// </summary> + /// <param name="translatedStages">Translated shader stages</param> + /// <param name="shaders">Cached shaders</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public ProgramCompilation( + ShaderProgram[] translatedStages, + CachedShaderStage[] shaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + TranslatedStages = translatedStages; + Shaders = shaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + /// <summary> + /// Program translation entry. + /// </summary> + private readonly struct AsyncProgramTranslation + { + /// <summary> + /// Guest code for each active stage. + /// </summary> + public readonly GuestCodeAndCbData?[] GuestShaders; + + /// <summary> + /// Specialization state. + /// </summary> + public readonly ShaderSpecializationState SpecializationState; + + /// <summary> + /// Program index. + /// </summary> + public readonly int ProgramIndex; + + /// <summary> + /// Indicates if the program is a compute shader. + /// </summary> + public readonly bool IsCompute; + + /// <summary> + /// Creates a new program translation entry. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public AsyncProgramTranslation( + GuestCodeAndCbData?[] guestShaders, + ShaderSpecializationState specState, + int programIndex, + bool isCompute) + { + GuestShaders = guestShaders; + SpecializationState = specState; + ProgramIndex = programIndex; + IsCompute = isCompute; + } + } + + private readonly Queue<ProgramEntry> _validationQueue; + private readonly ConcurrentQueue<ProgramCompilation> _compilationQueue; + private readonly BlockingCollection<AsyncProgramTranslation> _asyncTranslationQueue; + private readonly SortedList<int, (CachedShaderProgram, byte[])> _programList; + + private int _backendParallelCompileThreads; + private int _compiledCount; + private int _totalCount; + + /// <summary> + /// Creates a new parallel disk cache loader. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="graphicsCache">Graphics shader cache</param> + /// <param name="computeCache">Compute shader cache</param> + /// <param name="hostStorage">Disk cache host storage</param> + /// <param name="cancellationToken">Cancellation token</param> + /// <param name="stateChangeCallback">Function to be called when there is a state change, reporting state, compiled and total shaders count</param> + public ParallelDiskCacheLoader( + GpuContext context, + ShaderCacheHashTable graphicsCache, + ComputeShaderCacheHashTable computeCache, + DiskCacheHostStorage hostStorage, + CancellationToken cancellationToken, + Action<ShaderCacheState, int, int> stateChangeCallback) + { + _context = context; + _graphicsCache = graphicsCache; + _computeCache = computeCache; + _hostStorage = hostStorage; + _cancellationToken = cancellationToken; + _stateChangeCallback = stateChangeCallback; + _validationQueue = new Queue<ProgramEntry>(); + _compilationQueue = new ConcurrentQueue<ProgramCompilation>(); + _asyncTranslationQueue = new BlockingCollection<AsyncProgramTranslation>(ThreadCount); + _programList = new SortedList<int, (CachedShaderProgram, byte[])>(); + _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. + } + + /// <summary> + /// Loads all shaders from the cache. + /// </summary> + public void LoadShaders() + { + Thread[] workThreads = new Thread[ThreadCount]; + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index] = new Thread(ProcessAsyncQueue) + { + Name = $"GPU.AsyncTranslationThread.{index}" + }; + } + + int programCount = _hostStorage.GetProgramCount(); + + _compiledCount = 0; + _totalCount = programCount; + + _stateChangeCallback(ShaderCacheState.Start, 0, programCount); + + Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache..."); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Start(_cancellationToken); + } + + try + { + _hostStorage.LoadShaders(_context, this); + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}"); + + // If we can't even access the file, then we also can't rebuild. + if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess) + { + _needsHostRegen = true; + } + } + catch (InvalidDataException invalidDataException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}"); + _needsHostRegen = true; + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}"); + _needsHostRegen = true; + } + + _asyncTranslationQueue.CompleteAdding(); + + for (int index = 0; index < ThreadCount; index++) + { + workThreads[index].Join(); + } + + CheckCompilationBlocking(); + + if (_needsHostRegen && Active) + { + // Rebuild both shared and host cache files. + // Rebuilding shared is required because the shader information returned by the translator + // might have changed, and so we have to reconstruct the file with the new information. + try + { + _hostStorage.ClearSharedCache(); + _hostStorage.ClearHostCache(_context); + + if (_programList.Count != 0) + { + Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders..."); + + using var streams = _hostStorage.GetOutputStreams(_context); + + foreach (var kv in _programList) + { + if (!Active) + { + break; + } + + (CachedShaderProgram program, byte[] binaryCode) = kv.Value; + _hostStorage.AddShader(_context, program, binaryCode, streams); + } + + Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); + } + else + { + _hostStorage.ClearGuestCache(); + + Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption."); + } + } + catch (DiskCacheLoadException diskCacheLoadException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}"); + } + catch (IOException ioException) + { + Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}"); + } + } + + Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded."); + + _stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount); + } + + /// <summary> + /// Enqueues a host program for compilation. + /// </summary> + /// <param name="cachedProgram">Cached program</param> + /// <param name="binaryCode">Host binary code</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute) + { + EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true)); + } + + /// <summary> + /// Enqueues a guest program for compilation. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute); + _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken); + } + catch (OperationCanceledException) + { + } + } + + /// <summary> + /// Check the state of programs that have already been compiled, + /// and add to the cache if the compilation was successful. + /// </summary> + public void CheckCompilation() + { + ProcessCompilationQueue(); + + // Process programs that already finished compiling. + // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. + while (_validationQueue.TryPeek(out ProgramEntry entry)) + { + ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false); + + if (result != ProgramLinkStatus.Incomplete) + { + ProcessCompiledProgram(ref entry, result); + _validationQueue.Dequeue(); + } + else + { + break; + } + } + } + + /// <summary> + /// Waits until all programs finishes compiling, then adds the ones + /// with successful compilation to the cache. + /// </summary> + private void CheckCompilationBlocking() + { + ProcessCompilationQueue(); + + while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) + { + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// <summary> + /// Process a compiled program result. + /// </summary> + /// <param name="entry">Compiled program entry</param> + /// <param name="result">Compilation result</param> + /// <param name="asyncCompile">For failed host compilations, indicates if a guest compilation should be done asynchronously</param> + private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true) + { + if (result == ProgramLinkStatus.Success) + { + // Compilation successful, add to memory cache. + if (entry.IsCompute) + { + _computeCache.Add(entry.CachedProgram); + } + else + { + _graphicsCache.Add(entry.CachedProgram); + } + + if (!entry.IsBinary) + { + _needsHostRegen = true; + } + + // Fetch the binary code from the backend if it isn't already present. + byte[] binaryCode = entry.BinaryCode ?? entry.CachedProgram.HostProgram.GetBinary(); + + _programList.Add(entry.ProgramIndex, (entry.CachedProgram, binaryCode)); + SignalCompiled(); + } + else if (entry.IsBinary) + { + // If this is a host binary and compilation failed, + // we still have a chance to recompile from the guest binary. + CachedShaderProgram program = entry.CachedProgram; + + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length]; + + for (int index = 0; index < program.Shaders.Length; index++) + { + CachedShaderStage shader = program.Shaders[index]; + + if (shader != null) + { + guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data); + } + } + + if (asyncCompile) + { + QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + } + else + { + RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + ProcessCompilationQueue(); + } + } + else + { + // Failed to compile from both host and guest binary. + ErrorCount++; + SignalCompiled(); + } + } + + /// <summary> + /// Processes the queue of translated guest programs that should be compiled on the host. + /// </summary> + private void ProcessCompilationQueue() + { + while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active) + { + ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length]; + + int fragmentOutputMap = -1; + + for (int index = 0; index < compilation.TranslatedStages.Length; index++) + { + ShaderProgram shader = compilation.TranslatedStages[index]; + shaderSources[index] = CreateShaderSource(shader); + + if (shader.Info.Stage == ShaderStage.Fragment) + { + fragmentOutputMap = shader.Info.FragmentOutputMap; + } + } + + ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value, fromCache: true) + : new ShaderInfo(fragmentOutputMap, fromCache: true); + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo); + CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); + + // Vulkan's binary code is the SPIR-V used for compilation, so it is ready immediately. Other APIs get this after compilation. + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : null; + + EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + } + } + + /// <summary> + /// Enqueues a program for validation, which will check if the program was compiled successfully. + /// </summary> + /// <param name="newEntry">Program entry to be validated</param> + private void EnqueueForValidation(ProgramEntry newEntry) + { + _validationQueue.Enqueue(newEntry); + + // Do not allow more than N shader compilation in-flight, where N is the maximum number of threads + // the driver will be using for parallel compilation. + // Submitting more seems to cause NVIDIA OpenGL driver to crash. + if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) + { + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); + } + } + + /// <summary> + /// Processses the queue of programs that should be translated from guest code. + /// </summary> + /// <param name="state">Cancellation token</param> + private void ProcessAsyncQueue(object state) + { + CancellationToken ct = (CancellationToken)state; + + try + { + foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) + { + RecompileFromGuestCode( + asyncCompilation.GuestShaders, + asyncCompilation.SpecializationState, + asyncCompilation.ProgramIndex, + asyncCompilation.IsCompute); + } + } + catch (OperationCanceledException) + { + } + } + + /// <summary> + /// Recompiles a program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + /// <param name="isCompute">Indicates if the program is a compute shader</param> + private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + { + try + { + if (isCompute) + { + RecompileComputeFromGuestCode(guestShaders, specState, programIndex); + } + else + { + RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex); + } + } + catch (Exception exception) + { + Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {exception.Message}"); + + ErrorCount++; + SignalCompiled(); + } + } + + /// <summary> + /// Recompiles a graphics program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) + { + ShaderSpecializationState newSpecState = new ShaderSpecializationState( + ref specState.GraphicsState, + specState.PipelineState, + specState.TransformFeedbackDescriptors); + + ResourceCounts counts = new ResourceCounts(); + + TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; + TranslatorContext nextStage = null; + + TargetApi api = _context.Capabilities.Api; + + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) + { + if (guestShaders[stageIndex + 1].HasValue) + { + GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value; + + byte[] guestCode = shader.Code; + byte[] cb1Data = shader.Cb1Data; + + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0); + + if (nextStage != null) + { + currentStage.SetNextStage(nextStage); + } + + if (stageIndex == 0 && guestShaders[0].HasValue) + { + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; + + DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); + } + + translatorContexts[stageIndex + 1] = currentStage; + nextStage = currentStage; + } + } + + if (!_context.Capabilities.SupportsGeometryShader) + { + ShaderCache.TryRemoveGeometryStage(translatorContexts); + } + + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; + List<ShaderProgram> translatedStages = new List<ShaderProgram>(); + + TranslatorContext previousStage = null; + + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + TranslatorContext currentStage = translatorContexts[stageIndex + 1]; + + if (currentStage != null) + { + ShaderProgram program; + + byte[] guestCode = guestShaders[stageIndex + 1].Value.Code; + byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data; + + if (stageIndex == 0 && guestShaders[0].HasValue) + { + program = currentStage.Translate(translatorContexts[0]); + + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; + + shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); + shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + else + { + program = currentStage.Translate(); + + shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data); + } + + if (program != null) + { + translatedStages.Add(program); + } + + previousStage = currentStage; + } + else if ( + previousStage != null && + previousStage.LayerOutputWritten && + stageIndex == 3 && + !_context.Capabilities.SupportsLayerVertexTessellation) + { + translatedStages.Add(previousStage.GenerateGeometryPassthrough()); + } + } + + _compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false)); + } + + /// <summary> + /// Recompiles a compute program from guest code. + /// </summary> + /// <param name="guestShaders">Guest code for each active stage</param> + /// <param name="specState">Specialization state</param> + /// <param name="programIndex">Program index</param> + private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) + { + GuestCodeAndCbData shader = guestShaders[0].Value; + ResourceCounts counts = new ResourceCounts(); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState); + DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); + + ShaderProgram program = translatorContext.Translate(); + + CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) }; + + _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); + } + + /// <summary> + /// Signals that compilation of a program has been finished successfully, + /// or that it failed and guest recompilation has also been attempted. + /// </summary> + private void SignalCompiled() + { + _stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount); + } + } +}
\ No newline at end of file |