aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md6
-rw-r--r--Ryujinx.Common/AsyncWorkQueue.cs100
-rw-r--r--Ryujinx.Common/Logging/LogClass.cs5
-rw-r--r--Ryujinx.Common/Memory/ArrayPtr.cs123
-rw-r--r--Ryujinx.Common/Memory/IArray.cs21
-rw-r--r--Ryujinx.Common/Memory/Ptr.cs68
-rw-r--r--Ryujinx.Common/Memory/StructArrayHelpers.cs518
-rw-r--r--Ryujinx.Cpu/MemoryManager.cs32
-rw-r--r--Ryujinx.Cpu/WritableRegion.cs29
-rw-r--r--Ryujinx.Graphics.Device/AccessControl.cs10
-rw-r--r--Ryujinx.Graphics.Device/DeviceState.cs124
-rw-r--r--Ryujinx.Graphics.Device/IDeviceState.cs8
-rw-r--r--Ryujinx.Graphics.Device/RegisterAttribute.cs15
-rw-r--r--Ryujinx.Graphics.Device/RwCallback.cs16
-rw-r--r--Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj7
-rw-r--r--Ryujinx.Graphics.Device/SizeCalculator.cs63
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Compute.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs10
-rw-r--r--Ryujinx.Graphics.Gpu/Engine/Methods.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/GpuContext.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/Image/TextureManager.cs2
-rw-r--r--Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs36
-rw-r--r--Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs60
-rw-r--r--Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs12
-rw-r--r--Ryujinx.Graphics.Host1x/ClassId.cs20
-rw-r--r--Ryujinx.Graphics.Host1x/Devices.cs32
-rw-r--r--Ryujinx.Graphics.Host1x/Host1xClass.cs33
-rw-r--r--Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs41
-rw-r--r--Ryujinx.Graphics.Host1x/Host1xDevice.cs123
-rw-r--r--Ryujinx.Graphics.Host1x/OpCode.cs21
-rw-r--r--Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj20
-rw-r--r--Ryujinx.Graphics.Host1x/SyncptIncrManager.cs99
-rw-r--r--Ryujinx.Graphics.Host1x/ThiDevice.cs96
-rw-r--r--Ryujinx.Graphics.Host1x/ThiRegisters.cs22
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/Decoder.cs40
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs51
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs121
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj23
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs159
-rw-r--r--Ryujinx.Graphics.Nvdec.H264/Surface.cs33
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs9
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs56
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs59
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs94
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs25
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Constants.cs71
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs1190
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs1159
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs164
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs325
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs949
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs12
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs1379
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs2868
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs73
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs237
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs54
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Idct.cs536
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs15
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs14
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs418
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Luts.cs1612
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs389
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs203
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs234
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs761
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj20
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs15
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs21
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs8
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs27
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs24
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs13
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs179
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs21
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs66
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs14
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs189
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs8
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs17
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs12
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs9
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs14
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs21
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs8
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs451
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs11
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs71
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs80
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs85
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs12
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs11
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs11
-rw-r--r--Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs334
-rw-r--r--Ryujinx.Graphics.Nvdec/CdmaProcessor.cs103
-rw-r--r--Ryujinx.Graphics.Nvdec/ChClassId.cs20
-rw-r--r--Ryujinx.Graphics.Nvdec/ChCommandEntry.cs18
-rw-r--r--Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs13
-rw-r--r--Ryujinx.Graphics.Nvdec/CodecId.cs13
-rw-r--r--Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs16
-rw-r--r--Ryujinx.Graphics.Nvdec/H264Decoder.cs43
-rw-r--r--Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs151
-rw-r--r--Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs26
-rw-r--r--Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs133
-rw-r--r--Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs126
-rw-r--r--Ryujinx.Graphics.Nvdec/MemoryExtensions.cs28
-rw-r--r--Ryujinx.Graphics.Nvdec/NvdecDevice.cs55
-rw-r--r--Ryujinx.Graphics.Nvdec/NvdecRegisters.cs41
-rw-r--r--Ryujinx.Graphics.Nvdec/ResourceManager.cs17
-rw-r--r--Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj12
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs120
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs72
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs139
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs12
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs20
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs11
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs85
-rw-r--r--Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs14
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs75
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs17
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs168
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs14
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs79
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs238
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs8
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs34
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs281
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs19
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs879
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs79
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs10
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs31
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs38
-rw-r--r--Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs134
-rw-r--r--Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs69
-rw-r--r--Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs33
-rw-r--r--Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs8
-rw-r--r--Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs94
-rw-r--r--Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs12
-rw-r--r--Ryujinx.Graphics.Nvdec/Vp9Decoder.cs92
-rw-r--r--Ryujinx.Graphics.Texture/LayoutConverter.cs168
-rw-r--r--Ryujinx.Graphics.Texture/OffsetCalculator.cs13
-rw-r--r--Ryujinx.Graphics.Vic/Blender.cs157
-rw-r--r--Ryujinx.Graphics.Vic/Image/BufferPool.cs103
-rw-r--r--Ryujinx.Graphics.Vic/Image/InputSurface.cs17
-rw-r--r--Ryujinx.Graphics.Vic/Image/Pixel.cs10
-rw-r--r--Ryujinx.Graphics.Vic/Image/Surface.cs46
-rw-r--r--Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs33
-rw-r--r--Ryujinx.Graphics.Vic/Image/SurfaceReader.cs253
-rw-r--r--Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs361
-rw-r--r--Ryujinx.Graphics.Vic/ResourceManager.cs19
-rw-r--r--Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj23
-rw-r--r--Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs39
-rw-r--r--Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs27
-rw-r--r--Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs17
-rw-r--r--Ryujinx.Graphics.Vic/Types/ConfigStruct.cs14
-rw-r--r--Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs17
-rw-r--r--Ryujinx.Graphics.Vic/Types/MatrixStruct.cs25
-rw-r--r--Ryujinx.Graphics.Vic/Types/OutputConfig.cs23
-rw-r--r--Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs20
-rw-r--r--Ryujinx.Graphics.Vic/Types/PipeConfig.cs11
-rw-r--r--Ryujinx.Graphics.Vic/Types/PixelFormat.cs81
-rw-r--r--Ryujinx.Graphics.Vic/Types/SlotConfig.cs63
-rw-r--r--Ryujinx.Graphics.Vic/Types/SlotStruct.cs12
-rw-r--r--Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs21
-rw-r--r--Ryujinx.Graphics.Vic/VicDevice.cs97
-rw-r--r--Ryujinx.Graphics.Vic/VicRegisters.cs47
-rw-r--r--Ryujinx.Graphics.Video/H264PictureInfo.cs47
-rw-r--r--Ryujinx.Graphics.Video/IDecoder.cs11
-rw-r--r--Ryujinx.Graphics.Video/IH264Decoder.cs9
-rw-r--r--Ryujinx.Graphics.Video/ISurface.cs18
-rw-r--r--Ryujinx.Graphics.Video/IVp9Decoder.cs14
-rw-r--r--Ryujinx.Graphics.Video/Plane.cs42
-rw-r--r--Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj11
-rw-r--r--Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs32
-rw-r--r--Ryujinx.Graphics.Video/Vp9EntropyProbs.cs36
-rw-r--r--Ryujinx.Graphics.Video/Vp9Mv.cs8
-rw-r--r--Ryujinx.Graphics.Video/Vp9MvRef.cs11
-rw-r--r--Ryujinx.Graphics.Video/Vp9PictureInfo.cs39
-rw-r--r--Ryujinx.HLE/HOS/Horizon.cs2
-rw-r--r--Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs2
-rw-r--r--Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs2
-rw-r--r--Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs62
-rw-r--r--Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs29
-rw-r--r--Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs6
-rw-r--r--Ryujinx.HLE/Ryujinx.HLE.csproj3
-rw-r--r--Ryujinx.HLE/Switch.cs37
-rw-r--r--Ryujinx.Memory/MemoryBlock.cs20
-rw-r--r--Ryujinx.Memory/NativeMemoryManager.cs42
-rw-r--r--Ryujinx.sln60
-rw-r--r--Ryujinx/Ryujinx.csproj4
-rw-r--r--Ryujinx/THIRDPARTY.md203
202 files changed, 20563 insertions, 2567 deletions
diff --git a/README.md b/README.md
index 69041236..3400ced9 100644
--- a/README.md
+++ b/README.md
@@ -112,3 +112,9 @@ If you need help with setting up Ryujinx, you can ask questions in the #support
If you have contributions, need support, have suggestions, or just want to get in touch with the team, join our [Discord server](https://discord.gg/N2FmfVc)!
If you'd like to donate, please take a look at our [Patreon](https://www.patreon.com/ryujinx).
+
+## License
+
+This software is licensed under the terms of the MIT license.
+This project makes use of code authored by the libvpx project, licensed under BSD and the ffmpeg project, licensed under LGPLv3.
+See [LICENSE.txt](LICENSE.txt) and [THIRDPARTY.md](Ryujinx/THIRDPARTY.md) for more details.
diff --git a/Ryujinx.Common/AsyncWorkQueue.cs b/Ryujinx.Common/AsyncWorkQueue.cs
new file mode 100644
index 00000000..80f8dcfe
--- /dev/null
+++ b/Ryujinx.Common/AsyncWorkQueue.cs
@@ -0,0 +1,100 @@
+using System;
+using System.Collections.Concurrent;
+using System.Threading;
+
+namespace Ryujinx.Common
+{
+ public sealed class AsyncWorkQueue<T> : IDisposable
+ {
+ private readonly Thread _workerThread;
+ private readonly CancellationTokenSource _cts;
+ private readonly Action<T> _workerAction;
+ private readonly BlockingCollection<T> _queue;
+
+ public bool IsCancellationRequested => _cts.IsCancellationRequested;
+
+ public AsyncWorkQueue(Action<T> callback, string name = null) : this(callback, name, new BlockingCollection<T>())
+ {
+ }
+
+ public AsyncWorkQueue(Action<T> callback, string name, BlockingCollection<T> collection)
+ {
+ _cts = new CancellationTokenSource();
+ _queue = collection;
+ _workerAction = callback;
+ _workerThread = new Thread(DoWork) { Name = name };
+
+ _workerThread.IsBackground = true;
+ _workerThread.Start();
+ }
+
+ private void DoWork()
+ {
+ try
+ {
+ foreach (var item in _queue.GetConsumingEnumerable(_cts.Token))
+ {
+ _workerAction(item);
+ }
+ }
+ catch (OperationCanceledException)
+ {
+ }
+ }
+
+ public void Cancel()
+ {
+ _cts.Cancel();
+ }
+
+ public void CancelAfter(int millisecondsDelay)
+ {
+ _cts.CancelAfter(millisecondsDelay);
+ }
+
+ public void CancelAfter(TimeSpan delay)
+ {
+ _cts.CancelAfter(delay);
+ }
+
+ public void Add(T workItem)
+ {
+ _queue.Add(workItem);
+ }
+
+ public void Add(T workItem, CancellationToken cancellationToken)
+ {
+ _queue.Add(workItem, cancellationToken);
+ }
+
+ public bool TryAdd(T workItem)
+ {
+ return _queue.TryAdd(workItem);
+ }
+
+ public bool TryAdd(T workItem, int millisecondsDelay)
+ {
+ return _queue.TryAdd(workItem, millisecondsDelay);
+ }
+
+ public bool TryAdd(T workItem, int millisecondsDelay, CancellationToken cancellationToken)
+ {
+ return _queue.TryAdd(workItem, millisecondsDelay, cancellationToken);
+ }
+
+ public bool TryAdd(T workItem, TimeSpan timeout)
+ {
+ return _queue.TryAdd(workItem, timeout);
+ }
+
+ public void Dispose()
+ {
+ _queue.CompleteAdding();
+ _cts.Cancel();
+ _workerThread.Join();
+
+ _queue.Dispose();
+ _cts.Dispose();
+ }
+ }
+}
diff --git a/Ryujinx.Common/Logging/LogClass.cs b/Ryujinx.Common/Logging/LogClass.cs
index aad04891..a35d01a5 100644
--- a/Ryujinx.Common/Logging/LogClass.cs
+++ b/Ryujinx.Common/Logging/LogClass.cs
@@ -9,12 +9,14 @@ namespace Ryujinx.Common.Logging
Emulation,
Gpu,
Hid,
+ Host1x,
Kernel,
KernelIpc,
KernelScheduler,
KernelSvc,
Loader,
ModLoader,
+ Nvdec,
Ptc,
Service,
ServiceAcc,
@@ -50,6 +52,7 @@ namespace Ryujinx.Common.Logging
ServiceSss,
ServiceTime,
ServiceVi,
- SurfaceFlinger
+ SurfaceFlinger,
+ Vic
}
} \ No newline at end of file
diff --git a/Ryujinx.Common/Memory/ArrayPtr.cs b/Ryujinx.Common/Memory/ArrayPtr.cs
new file mode 100644
index 00000000..7a145de2
--- /dev/null
+++ b/Ryujinx.Common/Memory/ArrayPtr.cs
@@ -0,0 +1,123 @@
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Common.Memory
+{
+ /// <summary>
+ /// Represents an array of unmanaged resources.
+ /// </summary>
+ /// <typeparam name="T">Array element type</typeparam>
+ public unsafe struct ArrayPtr<T> : IEquatable<ArrayPtr<T>>, IArray<T> where T : unmanaged
+ {
+ private IntPtr _ptr;
+
+ /// <summary>
+ /// Null pointer.
+ /// </summary>
+ public static ArrayPtr<T> Null => new ArrayPtr<T>() { _ptr = IntPtr.Zero };
+
+ /// <summary>
+ /// True if the pointer is null, false otherwise.
+ /// </summary>
+ public bool IsNull => _ptr == IntPtr.Zero;
+
+ /// <summary>
+ /// Number of elements on the array.
+ /// </summary>
+ public int Length { get; }
+
+ /// <summary>
+ /// Gets a reference to the item at the given index.
+ /// </summary>
+ /// <remarks>
+ /// No bounds checks are performed, this allows negative indexing,
+ /// but care must be taken if the index may be out of bounds.
+ /// </remarks>
+ /// <param name="index">Index of the element</param>
+ /// <returns>Reference to the element at the given index</returns>
+ public ref T this[int index] => ref Unsafe.AsRef<T>((T*)_ptr + index);
+
+ /// <summary>
+ /// Creates a new array from a given reference.
+ /// </summary>
+ /// <remarks>
+ /// For data on the heap, proper pinning is necessary during
+ /// use. Failure to do so will result in memory corruption and crashes.
+ /// </remarks>
+ /// <param name="value">Reference of the first array element</param>
+ /// <param name="length">Number of elements on the array</param>
+ public ArrayPtr(ref T value, int length)
+ {
+ _ptr = (IntPtr)Unsafe.AsPointer(ref value);
+ Length = length;
+ }
+
+ /// <summary>
+ /// Creates a new array from a given pointer.
+ /// </summary>
+ /// <param name="ptr">Array base pointer</param>
+ /// <param name="length">Number of elements on the array</param>
+ public ArrayPtr(T* ptr, int length)
+ {
+ _ptr = (IntPtr)ptr;
+ Length = length;
+ }
+
+ /// <summary>
+ /// Creates a new array from a given pointer.
+ /// </summary>
+ /// <param name="ptr">Array base pointer</param>
+ /// <param name="length">Number of elements on the array</param>
+ public ArrayPtr(IntPtr ptr, int length)
+ {
+ _ptr = ptr;
+ Length = length;
+ }
+
+ /// <summary>
+ /// Splits the array starting at the specified position.
+ /// </summary>
+ /// <param name="start">Index where the new array should start</param>
+ /// <returns>New array starting at the specified position</returns>
+ public ArrayPtr<T> Slice(int start) => new ArrayPtr<T>(ref this[start], Length - start);
+
+ /// <summary>
+ /// Gets a span from the array.
+ /// </summary>
+ /// <returns>Span of the array</returns>
+ public Span<T> ToSpan() => Length == 0 ? Span<T>.Empty : MemoryMarshal.CreateSpan(ref this[0], Length);
+
+ /// <summary>
+ /// Gets the array base pointer.
+ /// </summary>
+ /// <returns>Base pointer</returns>
+ public T* ToPointer() => (T*)_ptr;
+
+ public override bool Equals(object obj)
+ {
+ return obj is ArrayPtr<T> other && Equals(other);
+ }
+
+ public bool Equals([AllowNull] ArrayPtr<T> other)
+ {
+ return _ptr == other._ptr && Length == other.Length;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(_ptr, Length);
+ }
+
+ public static bool operator ==(ArrayPtr<T> left, ArrayPtr<T> right)
+ {
+ return left.Equals(right);
+ }
+
+ public static bool operator !=(ArrayPtr<T> left, ArrayPtr<T> right)
+ {
+ return !(left == right);
+ }
+ }
+}
diff --git a/Ryujinx.Common/Memory/IArray.cs b/Ryujinx.Common/Memory/IArray.cs
new file mode 100644
index 00000000..8f17fade
--- /dev/null
+++ b/Ryujinx.Common/Memory/IArray.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Common.Memory
+{
+ /// <summary>
+ /// Array interface.
+ /// </summary>
+ /// <typeparam name="T">Element type</typeparam>
+ public interface IArray<T> where T : unmanaged
+ {
+ /// <summary>
+ /// Used to index the array.
+ /// </summary>
+ /// <param name="index">Element index</param>
+ /// <returns>Element at the specified index</returns>
+ ref T this[int index] { get; }
+
+ /// <summary>
+ /// Number of elements on the array.
+ /// </summary>
+ int Length { get; }
+ }
+}
diff --git a/Ryujinx.Common/Memory/Ptr.cs b/Ryujinx.Common/Memory/Ptr.cs
new file mode 100644
index 00000000..66bcf569
--- /dev/null
+++ b/Ryujinx.Common/Memory/Ptr.cs
@@ -0,0 +1,68 @@
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Common.Memory
+{
+ /// <summary>
+ /// Represents a pointer to an unmanaged resource.
+ /// </summary>
+ /// <typeparam name="T">Type of the unmanaged resource</typeparam>
+ public unsafe struct Ptr<T> : IEquatable<Ptr<T>> where T : unmanaged
+ {
+ private IntPtr _ptr;
+
+ /// <summary>
+ /// Null pointer.
+ /// </summary>
+ public static Ptr<T> Null => new Ptr<T>() { _ptr = IntPtr.Zero };
+
+ /// <summary>
+ /// True if the pointer is null, false otherwise.
+ /// </summary>
+ public bool IsNull => _ptr == IntPtr.Zero;
+
+ /// <summary>
+ /// Gets a reference to the value.
+ /// </summary>
+ public ref T Value => ref Unsafe.AsRef<T>((void*)_ptr);
+
+ /// <summary>
+ /// Creates a new pointer to an unmanaged resource.
+ /// </summary>
+ /// <remarks>
+ /// For data on the heap, proper pinning is necessary during
+ /// use. Failure to do so will result in memory corruption and crashes.
+ /// </remarks>
+ /// <param name="value">Reference to the unmanaged resource</param>
+ public Ptr(ref T value)
+ {
+ _ptr = (IntPtr)Unsafe.AsPointer(ref value);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Ptr<T> other && Equals(other);
+ }
+
+ public bool Equals([AllowNull] Ptr<T> other)
+ {
+ return _ptr == other._ptr;
+ }
+
+ public override int GetHashCode()
+ {
+ return _ptr.GetHashCode();
+ }
+
+ public static bool operator ==(Ptr<T> left, Ptr<T> right)
+ {
+ return left.Equals(right);
+ }
+
+ public static bool operator !=(Ptr<T> left, Ptr<T> right)
+ {
+ return !(left == right);
+ }
+ }
+}
diff --git a/Ryujinx.Common/Memory/StructArrayHelpers.cs b/Ryujinx.Common/Memory/StructArrayHelpers.cs
new file mode 100644
index 00000000..eb8d3ce1
--- /dev/null
+++ b/Ryujinx.Common/Memory/StructArrayHelpers.cs
@@ -0,0 +1,518 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Common.Memory
+{
+ public struct Array1<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ public int Length => 1;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 1);
+ }
+ public struct Array2<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array1<T> _other;
+ public int Length => 2;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 2);
+ }
+ public struct Array3<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array2<T> _other;
+ public int Length => 3;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 3);
+ }
+ public struct Array4<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array3<T> _other;
+ public int Length => 4;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 4);
+ }
+ public struct Array5<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array4<T> _other;
+ public int Length => 5;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 5);
+ }
+ public struct Array6<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array5<T> _other;
+ public int Length => 6;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 6);
+ }
+ public struct Array7<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array6<T> _other;
+ public int Length => 7;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 7);
+ }
+ public struct Array8<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array7<T> _other;
+ public int Length => 8;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 8);
+ }
+ public struct Array9<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array8<T> _other;
+ public int Length => 9;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 9);
+ }
+ public struct Array10<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array9<T> _other;
+ public int Length => 10;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 10);
+ }
+ public struct Array11<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array10<T> _other;
+ public int Length => 11;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 11);
+ }
+ public struct Array12<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array11<T> _other;
+ public int Length => 12;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 12);
+ }
+ public struct Array13<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array12<T> _other;
+ public int Length => 13;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 13);
+ }
+ public struct Array14<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array13<T> _other;
+ public int Length => 14;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 14);
+ }
+ public struct Array15<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array14<T> _other;
+ public int Length => 15;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 15);
+ }
+ public struct Array16<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array15<T> _other;
+ public int Length => 16;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 16);
+ }
+ public struct Array17<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array16<T> _other;
+ public int Length => 17;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 17);
+ }
+ public struct Array18<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array17<T> _other;
+ public int Length => 18;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 18);
+ }
+ public struct Array19<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array18<T> _other;
+ public int Length => 19;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 19);
+ }
+ public struct Array20<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array19<T> _other;
+ public int Length => 20;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 20);
+ }
+ public struct Array21<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array20<T> _other;
+ public int Length => 21;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 21);
+ }
+ public struct Array22<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array21<T> _other;
+ public int Length => 22;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 22);
+ }
+ public struct Array23<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array22<T> _other;
+ public int Length => 23;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 23);
+ }
+ public struct Array24<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array23<T> _other;
+ public int Length => 24;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 24);
+ }
+ public struct Array25<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array24<T> _other;
+ public int Length => 25;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 25);
+ }
+ public struct Array26<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array25<T> _other;
+ public int Length => 26;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 26);
+ }
+ public struct Array27<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array26<T> _other;
+ public int Length => 27;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 27);
+ }
+ public struct Array28<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array27<T> _other;
+ public int Length => 28;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 28);
+ }
+ public struct Array29<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array28<T> _other;
+ public int Length => 29;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 29);
+ }
+ public struct Array30<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array29<T> _other;
+ public int Length => 30;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 30);
+ }
+ public struct Array31<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array30<T> _other;
+ public int Length => 31;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 31);
+ }
+ public struct Array32<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array31<T> _other;
+ public int Length => 32;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 32);
+ }
+ public struct Array33<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array32<T> _other;
+ public int Length => 33;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 33);
+ }
+ public struct Array34<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array33<T> _other;
+ public int Length => 34;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 34);
+ }
+ public struct Array35<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array34<T> _other;
+ public int Length => 35;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 35);
+ }
+ public struct Array36<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array35<T> _other;
+ public int Length => 36;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 36);
+ }
+ public struct Array37<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array36<T> _other;
+ public int Length => 37;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 37);
+ }
+ public struct Array38<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array37<T> _other;
+ public int Length => 38;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 38);
+ }
+ public struct Array39<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array38<T> _other;
+ public int Length => 39;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 39);
+ }
+ public struct Array40<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array39<T> _other;
+ public int Length => 40;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 40);
+ }
+ public struct Array41<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array40<T> _other;
+ public int Length => 41;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 41);
+ }
+ public struct Array42<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array41<T> _other;
+ public int Length => 42;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 42);
+ }
+ public struct Array43<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array42<T> _other;
+ public int Length => 43;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 43);
+ }
+ public struct Array44<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array43<T> _other;
+ public int Length => 44;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 44);
+ }
+ public struct Array45<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array44<T> _other;
+ public int Length => 45;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 45);
+ }
+ public struct Array46<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array45<T> _other;
+ public int Length => 46;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 46);
+ }
+ public struct Array47<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array46<T> _other;
+ public int Length => 47;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 47);
+ }
+ public struct Array48<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array47<T> _other;
+ public int Length => 48;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 48);
+ }
+ public struct Array49<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array48<T> _other;
+ public int Length => 49;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 49);
+ }
+ public struct Array50<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array49<T> _other;
+ public int Length => 50;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 50);
+ }
+ public struct Array51<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array50<T> _other;
+ public int Length => 51;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 51);
+ }
+ public struct Array52<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array51<T> _other;
+ public int Length => 52;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 52);
+ }
+ public struct Array53<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array52<T> _other;
+ public int Length => 53;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 53);
+ }
+ public struct Array54<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array53<T> _other;
+ public int Length => 54;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 54);
+ }
+ public struct Array55<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array54<T> _other;
+ public int Length => 55;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 55);
+ }
+ public struct Array56<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array55<T> _other;
+ public int Length => 56;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 56);
+ }
+ public struct Array57<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array56<T> _other;
+ public int Length => 57;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 57);
+ }
+ public struct Array58<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array57<T> _other;
+ public int Length => 58;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 58);
+ }
+ public struct Array59<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array58<T> _other;
+ public int Length => 59;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 59);
+ }
+ public struct Array60<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array59<T> _other;
+ public int Length => 60;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 60);
+ }
+ public struct Array61<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array60<T> _other;
+ public int Length => 61;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 61);
+ }
+ public struct Array62<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array61<T> _other;
+ public int Length => 62;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 62);
+ }
+ public struct Array63<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array62<T> _other;
+ public int Length => 63;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 63);
+ }
+ public struct Array64<T> : IArray<T> where T : unmanaged
+ {
+ T _e0;
+ Array63<T> _other;
+ public int Length => 64;
+ public ref T this[int index] => ref ToSpan()[index];
+ public Span<T> ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64);
+ }
+
+}
diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs
index 001c41f8..211a8c0d 100644
--- a/Ryujinx.Cpu/MemoryManager.cs
+++ b/Ryujinx.Cpu/MemoryManager.cs
@@ -194,6 +194,38 @@ namespace Ryujinx.Cpu
}
/// <summary>
+ /// Gets a region of memory that can be written to.
+ /// </summary>
+ /// <remarks>
+ /// If the requested region is not contiguous in physical memory,
+ /// this will perform an allocation, and flush the data (writing it
+ /// back to guest memory) on disposal.
+ /// </remarks>
+ /// <param name="va">Virtual address of the data</param>
+ /// <param name="size">Size of the data</param>
+ /// <returns>A writable region of memory containing the data</returns>
+ public WritableRegion GetWritableRegion(ulong va, int size)
+ {
+ if (size == 0)
+ {
+ return new WritableRegion(null, va, Memory<byte>.Empty);
+ }
+
+ if (IsContiguous(va, size))
+ {
+ return new WritableRegion(null, va, _backingMemory.GetMemory(GetPhysicalAddressInternal(va), size));
+ }
+ else
+ {
+ Memory<byte> memory = new byte[size];
+
+ GetSpan(va, size).CopyTo(memory.Span);
+
+ return new WritableRegion(this, va, memory);
+ }
+ }
+
+ /// <summary>
/// Gets a reference for the given type at the specified virtual memory address.
/// </summary>
/// <remarks>
diff --git a/Ryujinx.Cpu/WritableRegion.cs b/Ryujinx.Cpu/WritableRegion.cs
new file mode 100644
index 00000000..5ea0a2d8
--- /dev/null
+++ b/Ryujinx.Cpu/WritableRegion.cs
@@ -0,0 +1,29 @@
+using System;
+
+namespace Ryujinx.Cpu
+{
+ public sealed class WritableRegion : IDisposable
+ {
+ private readonly MemoryManager _mm;
+ private readonly ulong _va;
+
+ private bool NeedsWriteback => _mm != null;
+
+ public Memory<byte> Memory { get; }
+
+ internal WritableRegion(MemoryManager mm, ulong va, Memory<byte> memory)
+ {
+ _mm = mm;
+ _va = va;
+ Memory = memory;
+ }
+
+ public void Dispose()
+ {
+ if (NeedsWriteback)
+ {
+ _mm.Write(_va, Memory.Span);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Device/AccessControl.cs b/Ryujinx.Graphics.Device/AccessControl.cs
new file mode 100644
index 00000000..02203783
--- /dev/null
+++ b/Ryujinx.Graphics.Device/AccessControl.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Device
+{
+ public enum AccessControl
+ {
+ None = 0,
+ ReadOnly = 1 << 0,
+ WriteOnly = 1 << 1,
+ ReadWrite = ReadOnly | WriteOnly
+ }
+}
diff --git a/Ryujinx.Graphics.Device/DeviceState.cs b/Ryujinx.Graphics.Device/DeviceState.cs
new file mode 100644
index 00000000..ea6942ec
--- /dev/null
+++ b/Ryujinx.Graphics.Device/DeviceState.cs
@@ -0,0 +1,124 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Device
+{
+ public class DeviceState<TState> : IDeviceState where TState : unmanaged
+ {
+ private const int RegisterSize = sizeof(int);
+
+ public TState State;
+
+ private readonly BitArray _readableRegisters;
+ private readonly BitArray _writableRegisters;
+
+ private readonly Dictionary<int, Func<int>> _readCallbacks;
+ private readonly Dictionary<int, Action<int>> _writeCallbacks;
+
+ public DeviceState(IReadOnlyDictionary<string, RwCallback> callbacks = null)
+ {
+ int size = (Unsafe.SizeOf<TState>() + RegisterSize - 1) / RegisterSize;
+
+ _readableRegisters = new BitArray(size);
+ _writableRegisters = new BitArray(size);
+
+ _readCallbacks = new Dictionary<int, Func<int>>();
+ _writeCallbacks = new Dictionary<int, Action<int>>();
+
+ var fields = typeof(TState).GetFields();
+ int offset = 0;
+
+ for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
+ {
+ var field = fields[fieldIndex];
+ var regAttr = field.GetCustomAttributes<RegisterAttribute>(false).FirstOrDefault();
+
+ int sizeOfField = SizeCalculator.SizeOf(field.FieldType);
+
+ for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4)
+ {
+ _readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true;
+ _writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true;
+ }
+
+ if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb))
+ {
+ if (cb.Read != null)
+ {
+ _readCallbacks.Add(offset, cb.Read);
+ }
+
+ if (cb.Write != null)
+ {
+ _writeCallbacks.Add(offset, cb.Write);
+ }
+ }
+
+ offset += sizeOfField;
+ }
+
+ Debug.Assert(offset == Unsafe.SizeOf<TState>());
+ }
+
+ public virtual int Read(int offset)
+ {
+ if (Check(offset) && _readableRegisters[offset / RegisterSize])
+ {
+ int alignedOffset = Align(offset);
+
+ if (_readCallbacks.TryGetValue(alignedOffset, out Func<int> read))
+ {
+ return read();
+ }
+ else
+ {
+ return GetRef<int>(alignedOffset);
+ }
+ }
+
+ return 0;
+ }
+
+ public virtual void Write(int offset, int data)
+ {
+ if (Check(offset) && _writableRegisters[offset / RegisterSize])
+ {
+ int alignedOffset = Align(offset);
+
+ if (_writeCallbacks.TryGetValue(alignedOffset, out Action<int> write))
+ {
+ write(data);
+ }
+ else
+ {
+ GetRef<int>(alignedOffset) = data;
+ }
+ }
+ }
+
+ private bool Check(int offset)
+ {
+ return (uint)Align(offset) < Unsafe.SizeOf<TState>();
+ }
+
+ public ref T GetRef<T>(int offset) where T : unmanaged
+ {
+ if ((uint)(offset + Unsafe.SizeOf<T>()) > Unsafe.SizeOf<TState>())
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+
+ return ref Unsafe.As<TState, T>(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset));
+ }
+
+ private static int Align(int offset)
+ {
+ return offset & ~(RegisterSize - 1);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Device/IDeviceState.cs b/Ryujinx.Graphics.Device/IDeviceState.cs
new file mode 100644
index 00000000..077d69f2
--- /dev/null
+++ b/Ryujinx.Graphics.Device/IDeviceState.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Device
+{
+ public interface IDeviceState
+ {
+ int Read(int offset);
+ void Write(int offset, int data);
+ }
+}
diff --git a/Ryujinx.Graphics.Device/RegisterAttribute.cs b/Ryujinx.Graphics.Device/RegisterAttribute.cs
new file mode 100644
index 00000000..6e198963
--- /dev/null
+++ b/Ryujinx.Graphics.Device/RegisterAttribute.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace Ryujinx.Graphics.Device
+{
+ [AttributeUsage(AttributeTargets.Field, AllowMultiple = false)]
+ public sealed class RegisterAttribute : Attribute
+ {
+ public AccessControl AccessControl { get; }
+
+ public RegisterAttribute(AccessControl ac)
+ {
+ AccessControl = ac;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Device/RwCallback.cs b/Ryujinx.Graphics.Device/RwCallback.cs
new file mode 100644
index 00000000..6f1c8898
--- /dev/null
+++ b/Ryujinx.Graphics.Device/RwCallback.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace Ryujinx.Graphics.Device
+{
+ public struct RwCallback
+ {
+ public Action<int> Write { get; }
+ public Func<int> Read { get; }
+
+ public RwCallback(Action<int> write, Func<int> read)
+ {
+ Write = write;
+ Read = read;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj b/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj
new file mode 100644
index 00000000..7c4ae4ca
--- /dev/null
+++ b/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj
@@ -0,0 +1,7 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Device/SizeCalculator.cs b/Ryujinx.Graphics.Device/SizeCalculator.cs
new file mode 100644
index 00000000..7cc48915
--- /dev/null
+++ b/Ryujinx.Graphics.Device/SizeCalculator.cs
@@ -0,0 +1,63 @@
+using System;
+using System.Reflection;
+
+namespace Ryujinx.Graphics.Device
+{
+ static class SizeCalculator
+ {
+ public static int SizeOf(Type type)
+ {
+ // Is type a enum type?
+ if (type.IsEnum)
+ {
+ type = type.GetEnumUnderlyingType();
+ }
+
+ // Is type a pointer type?
+ if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr))
+ {
+ return IntPtr.Size;
+ }
+
+ // Is type a struct type?
+ if (type.IsValueType && !type.IsPrimitive)
+ {
+ // Check if the struct has a explicit size, if so, return that.
+ if (type.StructLayoutAttribute.Size != 0)
+ {
+ return type.StructLayoutAttribute.Size;
+ }
+
+ // Otherwise we calculate the sum of the sizes of all fields.
+ int size = 0;
+ var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
+
+ for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++)
+ {
+ size += SizeOf(fields[fieldIndex].FieldType);
+ }
+
+ return size;
+ }
+
+ // Primitive types.
+ return (Type.GetTypeCode(type)) switch
+ {
+ TypeCode.SByte => sizeof(sbyte),
+ TypeCode.Byte => sizeof(byte),
+ TypeCode.Int16 => sizeof(short),
+ TypeCode.UInt16 => sizeof(ushort),
+ TypeCode.Int32 => sizeof(int),
+ TypeCode.UInt32 => sizeof(uint),
+ TypeCode.Int64 => sizeof(long),
+ TypeCode.UInt64 => sizeof(ulong),
+ TypeCode.Char => sizeof(char),
+ TypeCode.Single => sizeof(float),
+ TypeCode.Double => sizeof(double),
+ TypeCode.Decimal => sizeof(decimal),
+ TypeCode.Boolean => sizeof(bool),
+ _ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.")
+ };
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index e40984af..60fba006 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -67,7 +67,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
TextureManager.SetComputeTextureBufferIndex(state.Get<int>(MethodOffset.TextureBufferIndex));
- ShaderProgramInfo info = cs.Shaders[0].Program.Info;
+ ShaderProgramInfo info = cs.Shaders[0].Program.Info;
for (int index = 0; index < info.CBuffers.Count; index++)
{
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
index c8d47b9f..225c732e 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs
@@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
else
{
evt.Flush();
- return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
+ return (_context.MemoryAccessor.Read<ulong>(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}
}
@@ -87,11 +87,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
if (evt != null && evt2 == null)
{
- useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual);
+ useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.Read<ulong>(gpuVa + 16), isEqual);
}
else if (evt == null && evt2 != null)
{
- useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual);
+ useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.Read<ulong>(gpuVa), isEqual);
}
else
{
@@ -107,8 +107,8 @@ namespace Ryujinx.Graphics.Gpu.Engine
evt?.Flush();
evt2?.Flush();
- ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa);
- ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16);
+ ulong x = _context.MemoryAccessor.Read<ulong>(gpuVa);
+ ulong y = _context.MemoryAccessor.Read<ulong>(gpuVa + 16);
return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False;
}
diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
index 06298cdf..d5b11c2c 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
@@ -466,7 +466,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
bool flipY = yControl.HasFlag(YControl.NegateY);
Origin origin = yControl.HasFlag(YControl.TriangleRastFlip) ? Origin.LowerLeft : Origin.UpperLeft;
-
+
_context.Renderer.Pipeline.SetOrigin(origin);
// The triangle rast flip flag only affects rasterization, the viewport is not flipped.
diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs
index 5e117831..b07694b9 100644
--- a/Ryujinx.Graphics.Gpu/GpuContext.cs
+++ b/Ryujinx.Graphics.Gpu/GpuContext.cs
@@ -77,7 +77,7 @@ namespace Ryujinx.Graphics.Gpu
{
Renderer = renderer;
- MemoryManager = new MemoryManager();
+ MemoryManager = new MemoryManager(this);
MemoryAccessor = new MemoryAccessor(this);
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
index ccd56ae2..69bee541 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
@@ -643,6 +643,8 @@ namespace Ryujinx.Graphics.Gpu.Image
overlap.ChangeSize(info.Width, info.Height, info.DepthOrLayers);
}
+ overlap.SynchronizeMemory();
+
return overlap;
}
}
diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs
index 38f448d9..5cc8ec24 100644
--- a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs
@@ -59,42 +59,6 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
- /// Reads a 32-bits signed integer from GPU mapped memory.
- /// </summary>
- /// <param name="gpuVa">GPU virtual address where the value is located</param>
- /// <returns>The value at the specified memory location</returns>
- public int ReadInt32(ulong gpuVa)
- {
- ulong processVa = _context.MemoryManager.Translate(gpuVa);
-
- return _context.PhysicalMemory.Read<int>(processVa);
- }
-
- /// <summary>
- /// Reads a 64-bits unsigned integer from GPU mapped memory.
- /// </summary>
- /// <param name="gpuVa">GPU virtual address where the value is located</param>
- /// <returns>The value at the specified memory location</returns>
- public ulong ReadUInt64(ulong gpuVa)
- {
- ulong processVa = _context.MemoryManager.Translate(gpuVa);
-
- return _context.PhysicalMemory.Read<ulong>(processVa);
- }
-
- /// <summary>
- /// Reads a 8-bits unsigned integer from GPU mapped memory.
- /// </summary>
- /// <param name="gpuVa">GPU virtual address where the value is located</param>
- /// <param name="value">The value to be written</param>
- public void WriteByte(ulong gpuVa, byte value)
- {
- ulong processVa = _context.MemoryManager.Translate(gpuVa);
-
- _context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1));
- }
-
- /// <summary>
/// Writes a 32-bits signed integer to GPU mapped memory.
/// </summary>
/// <param name="gpuVa">GPU virtual address to write the value into</param>
diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
index a9a8fbac..2d988f8d 100644
--- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
@@ -1,4 +1,7 @@
+using Ryujinx.Cpu;
using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Memory
{
@@ -33,15 +36,70 @@ namespace Ryujinx.Graphics.Gpu.Memory
public event EventHandler<UnmapEventArgs> MemoryUnmapped;
+ private GpuContext _context;
+
/// <summary>
/// Creates a new instance of the GPU memory manager.
/// </summary>
- public MemoryManager()
+ public MemoryManager(GpuContext context)
{
+ _context = context;
_pageTable = new ulong[PtLvl0Size][];
}
/// <summary>
+ /// Reads data from GPU mapped memory.
+ /// </summary>
+ /// <typeparam name="T">Type of the data</typeparam>
+ /// <param name="gpuVa">GPU virtual address where the data is located</param>
+ /// <returns>The data at the specified memory location</returns>
+ public T Read<T>(ulong gpuVa) where T : unmanaged
+ {
+ ulong processVa = Translate(gpuVa);
+
+ return MemoryMarshal.Cast<byte, T>(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf<T>()))[0];
+ }
+
+ /// <summary>
+ /// Gets a read-only span of data from GPU mapped memory.
+ /// This reads as much data as possible, up to the specified maximum size.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address where the data is located</param>
+ /// <param name="size">Size of the data</param>
+ /// <returns>The span of the data at the specified memory location</returns>
+ public ReadOnlySpan<byte> GetSpan(ulong gpuVa, int size)
+ {
+ ulong processVa = Translate(gpuVa);
+
+ return _context.PhysicalMemory.GetSpan(processVa, size);
+ }
+
+ /// <summary>
+ /// Gets a writable region from GPU mapped memory.
+ /// </summary>
+ /// <param name="address">Start address of the range</param>
+ /// <param name="size">Size in bytes to be range</param>
+ /// <returns>A writable region with the data at the specified memory location</returns>
+ public WritableRegion GetWritableRegion(ulong gpuVa, int size)
+ {
+ ulong processVa = Translate(gpuVa);
+
+ return _context.PhysicalMemory.GetWritableRegion(processVa, size);
+ }
+
+ /// <summary>
+ /// Writes data to GPU mapped memory.
+ /// </summary>
+ /// <param name="gpuVa">GPU virtual address to write the data into</param>
+ /// <param name="data">The data to be written</param>
+ public void Write(ulong gpuVa, ReadOnlySpan<byte> data)
+ {
+ ulong processVa = Translate(gpuVa);
+
+ _context.PhysicalMemory.Write(processVa, data);
+ }
+
+ /// <summary>
/// Maps a given range of pages to the specified CPU virtual address.
/// </summary>
/// <remarks>
diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs
index 4a80aa1a..88beab8f 100644
--- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Cpu;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@@ -35,6 +36,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
+ /// Gets a writable region from the application process.
+ /// </summary>
+ /// <param name="address">Start address of the range</param>
+ /// <param name="size">Size in bytes to be range</param>
+ /// <returns>A writable region with the data at the specified memory location</returns>
+ public WritableRegion GetWritableRegion(ulong address, int size)
+ {
+ return _cpuMemory.GetWritableRegion(address, size);
+ }
+
+ /// <summary>
/// Reads data from the application process.
/// </summary>
/// <typeparam name="T">Type of the structure</typeparam>
diff --git a/Ryujinx.Graphics.Host1x/ClassId.cs b/Ryujinx.Graphics.Host1x/ClassId.cs
new file mode 100644
index 00000000..dfeadd4c
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/ClassId.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Host1x
+{
+ public enum ClassId
+ {
+ Host1x = 0x1,
+ Mpeg = 0x20,
+ Nvenc = 0x21,
+ Vi = 0x30,
+ Isp = 0x32,
+ Ispb = 0x34,
+ Vii2c = 0x36,
+ Vic = 0x5d,
+ Gr3d = 0x60,
+ Gpu = 0x61,
+ Tsec = 0xe0,
+ Tsecb = 0xe1,
+ Nvjpg = 0xc0,
+ Nvdec = 0xf0
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/Devices.cs b/Ryujinx.Graphics.Host1x/Devices.cs
new file mode 100644
index 00000000..5b3bed6b
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/Devices.cs
@@ -0,0 +1,32 @@
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ class Devices : IDisposable
+ {
+ private readonly Dictionary<ClassId, IDeviceState> _devices = new Dictionary<ClassId, IDeviceState>();
+
+ public void RegisterDevice(ClassId classId, IDeviceState device)
+ {
+ _devices[classId] = device;
+ }
+
+ public IDeviceState GetDevice(ClassId classId)
+ {
+ return _devices.TryGetValue(classId, out IDeviceState device) ? device : null;
+ }
+
+ public void Dispose()
+ {
+ foreach (var device in _devices.Values)
+ {
+ if (device is ThiDevice thi)
+ {
+ thi.Dispose();
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/Host1xClass.cs b/Ryujinx.Graphics.Host1x/Host1xClass.cs
new file mode 100644
index 00000000..1a1297f9
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/Host1xClass.cs
@@ -0,0 +1,33 @@
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Synchronization;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ public class Host1xClass : IDeviceState
+ {
+ private readonly SynchronizationManager _syncMgr;
+ private readonly DeviceState<Host1xClassRegisters> _state;
+
+ public Host1xClass(SynchronizationManager syncMgr)
+ {
+ _syncMgr = syncMgr;
+ _state = new DeviceState<Host1xClassRegisters>(new Dictionary<string, RwCallback>
+ {
+ { nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) }
+ });
+ }
+
+ public int Read(int offset) => _state.Read(offset);
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ private void WaitSyncpt32(int data)
+ {
+ uint syncpointId = (uint)(data & 0xFF);
+ uint threshold = _state.State.LoadSyncptPayload32;
+
+ _syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs b/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs
new file mode 100644
index 00000000..e476bdfa
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs
@@ -0,0 +1,41 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ struct Host1xClassRegisters
+ {
+ public uint IncrSyncpt;
+ public uint IncrSyncptCntrl;
+ public uint IncrSyncptError;
+ public Array5<uint> ReservedC;
+ public uint WaitSyncpt;
+ public uint WaitSyncptBase;
+ public uint WaitSyncptIncr;
+ public uint LoadSyncptBase;
+ public uint IncrSyncptBase;
+ public uint Clear;
+ public uint Wait;
+ public uint WaitWithIntr;
+ public uint DelayUsec;
+ public uint TickcountHi;
+ public uint TickcountLo;
+ public uint Tickctrl;
+ public Array23<uint> Reserved50;
+ public uint Indctrl;
+ public uint Indoff2;
+ public uint Indoff;
+ public Array31<uint> Inddata;
+ public uint Reserved134;
+ public uint LoadSyncptPayload32;
+ public uint Stallctrl;
+ public uint WaitSyncpt32;
+ public uint WaitSyncptBase32;
+ public uint LoadSyncptBase32;
+ public uint IncrSyncptBase32;
+ public uint StallcountHi;
+ public uint StallcountLo;
+ public uint Xrefctrl;
+ public uint ChannelXrefHi;
+ public uint ChannelXrefLo;
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/Host1xDevice.cs b/Ryujinx.Graphics.Host1x/Host1xDevice.cs
new file mode 100644
index 00000000..6406378f
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/Host1xDevice.cs
@@ -0,0 +1,123 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Synchronization;
+using System;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ public sealed class Host1xDevice : IDisposable
+ {
+ private readonly SyncptIncrManager _syncptIncrMgr;
+ private readonly AsyncWorkQueue<int[]> _commandQueue;
+
+ private readonly Devices _devices = new Devices();
+
+ public Host1xClass Class { get; }
+
+ private IDeviceState _device;
+
+ private int _count;
+ private int _offset;
+ private int _mask;
+ private bool _incrementing;
+
+ public Host1xDevice(SynchronizationManager syncMgr)
+ {
+ _syncptIncrMgr = new SyncptIncrManager(syncMgr);
+ _commandQueue = new AsyncWorkQueue<int[]>(Process, "Ryujinx.Host1xProcessor");
+
+ Class = new Host1xClass(syncMgr);
+
+ _devices.RegisterDevice(ClassId.Host1x, Class);
+ }
+
+ public void RegisterDevice(ClassId classId, IDeviceState device)
+ {
+ var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr);
+ _devices.RegisterDevice(classId, thi);
+ }
+
+ public void Submit(ReadOnlySpan<int> commandBuffer)
+ {
+ _commandQueue.Add(commandBuffer.ToArray());
+ }
+
+ private void Process(int[] commandBuffer)
+ {
+ for (int index = 0; index < commandBuffer.Length; index++)
+ {
+ Step(commandBuffer[index]);
+ }
+ }
+
+ private void Step(int value)
+ {
+ if (_mask != 0)
+ {
+ int lbs = BitOperations.TrailingZeroCount(_mask);
+
+ _mask &= ~(1 << lbs);
+
+ DeviceWrite(_offset + lbs, value);
+
+ return;
+ }
+ else if (_count != 0)
+ {
+ _count--;
+
+ DeviceWrite(_offset, value);
+
+ if (_incrementing)
+ {
+ _offset++;
+ }
+
+ return;
+ }
+
+ OpCode opCode = (OpCode)((value >> 28) & 0xf);
+
+ switch (opCode)
+ {
+ case OpCode.SetClass:
+ _mask = value & 0x3f;
+ ClassId classId = (ClassId)((value >> 6) & 0x3ff);
+ _offset = (value >> 16) & 0xfff;
+ _device = _devices.GetDevice(classId);
+ break;
+ case OpCode.Incr:
+ case OpCode.NonIncr:
+ _count = value & 0xffff;
+ _offset = (value >> 16) & 0xfff;
+ _incrementing = opCode == OpCode.Incr;
+ break;
+ case OpCode.Mask:
+ _mask = value & 0xffff;
+ _offset = (value >> 16) & 0xfff;
+ break;
+ case OpCode.Imm:
+ int data = value & 0xfff;
+ _offset = (value >> 16) & 0xfff;
+ DeviceWrite(_offset, data);
+ break;
+ default:
+ Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\".");
+ break;
+ }
+ }
+
+ private void DeviceWrite(int offset, int data)
+ {
+ _device?.Write(offset * 4, data);
+ }
+
+ public void Dispose()
+ {
+ _commandQueue.Dispose();
+ _devices.Dispose();
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/OpCode.cs b/Ryujinx.Graphics.Host1x/OpCode.cs
new file mode 100644
index 00000000..2ec6034b
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/OpCode.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Graphics.Host1x
+{
+ enum OpCode
+ {
+ SetClass,
+ Incr,
+ NonIncr,
+ Mask,
+ Imm,
+ Restart,
+ Gather,
+ SetStrmId,
+ SetAppId,
+ SetPyld,
+ IncrW,
+ NonIncrW,
+ GatherW,
+ RestartW,
+ Extend
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj b/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj
new file mode 100644
index 00000000..4c0736cf
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj
@@ -0,0 +1,20 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+ <AllowUnsafeBlocks>false</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs b/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs
new file mode 100644
index 00000000..82ac5e7d
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs
@@ -0,0 +1,99 @@
+using Ryujinx.Graphics.Gpu.Synchronization;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ class SyncptIncrManager
+ {
+ private readonly SynchronizationManager _syncMgr;
+
+ private struct SyncptIncr
+ {
+ public uint Id { get; }
+ public ClassId ClassId { get; }
+ public uint SyncptId { get; }
+ public bool Done { get; }
+
+ public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false)
+ {
+ Id = id;
+ ClassId = classId;
+ SyncptId = syncptId;
+ Done = done;
+ }
+ }
+
+ private readonly List<SyncptIncr> _incrs = new List<SyncptIncr>();
+
+ private uint _currentId;
+
+ public SyncptIncrManager(SynchronizationManager syncMgr)
+ {
+ _syncMgr = syncMgr;
+ }
+
+ public void Increment(uint id)
+ {
+ lock (_incrs)
+ {
+ _incrs.Add(new SyncptIncr(0, 0, id, true));
+
+ IncrementAllDone();
+ }
+ }
+
+ public uint IncrementWhenDone(ClassId classId, uint id)
+ {
+ lock (_incrs)
+ {
+ uint handle = _currentId++;
+
+ _incrs.Add(new SyncptIncr(handle, classId, id));
+
+ return handle;
+ }
+ }
+
+ public void SignalDone(uint handle)
+ {
+ lock (_incrs)
+ {
+ // Set pending increment with the given handle to "done".
+ for (int i = 0; i < _incrs.Count; i++)
+ {
+ SyncptIncr incr = _incrs[i];
+
+ if (_incrs[i].Id == handle)
+ {
+ _incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true);
+
+ break;
+ }
+ }
+
+ IncrementAllDone();
+ }
+ }
+
+ private void IncrementAllDone()
+ {
+ lock (_incrs)
+ {
+ // Increment all sequential pending increments that are already done.
+ int doneCount = 0;
+
+ for (; doneCount < _incrs.Count; doneCount++)
+ {
+ if (!_incrs[doneCount].Done)
+ {
+ break;
+ }
+
+ _syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId);
+ }
+
+ _incrs.RemoveRange(0, doneCount);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/ThiDevice.cs b/Ryujinx.Graphics.Host1x/ThiDevice.cs
new file mode 100644
index 00000000..8e3e11b0
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/ThiDevice.cs
@@ -0,0 +1,96 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ class ThiDevice : IDeviceState, IDisposable
+ {
+ private readonly ClassId _classId;
+ private readonly IDeviceState _device;
+
+ private readonly SyncptIncrManager _syncptIncrMgr;
+
+ private class CommandAction
+ {
+ public int Data { get; }
+
+ public CommandAction(int data)
+ {
+ Data = data;
+ }
+ }
+
+ private class MethodCallAction : CommandAction
+ {
+ public int Method { get; }
+
+ public MethodCallAction(int method, int data) : base(data)
+ {
+ Method = method;
+ }
+ }
+
+ private class SyncptIncrAction : CommandAction
+ {
+ public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle)
+ {
+ }
+ }
+
+ private readonly AsyncWorkQueue<CommandAction> _commandQueue;
+
+ private readonly DeviceState<ThiRegisters> _state;
+
+ public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr)
+ {
+ _classId = classId;
+ _device = device;
+ _syncptIncrMgr = syncptIncrMgr;
+ _commandQueue = new AsyncWorkQueue<CommandAction>(Process, $"Ryujinx.{classId}Processor");
+ _state = new DeviceState<ThiRegisters>(new Dictionary<string, RwCallback>
+ {
+ { nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) },
+ { nameof(ThiRegisters.Method1), new RwCallback(Method1, null) }
+ });
+ }
+
+ public int Read(int offset) => _state.Read(offset);
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ private void IncrSyncpt(int data)
+ {
+ uint syncpointId = (uint)(data & 0xFF);
+ uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done
+
+ if (cond == 0)
+ {
+ _syncptIncrMgr.Increment(syncpointId);
+ }
+ else
+ {
+ _commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId)));
+ }
+ }
+
+ private void Method1(int data)
+ {
+ _commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data));
+ }
+
+ private void Process(CommandAction cmdAction)
+ {
+ if (cmdAction is SyncptIncrAction syncptIncrAction)
+ {
+ _syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data);
+ }
+ else if (cmdAction is MethodCallAction methodCallAction)
+ {
+ _device.Write(methodCallAction.Method, methodCallAction.Data);
+ }
+ }
+
+ public void Dispose() => _commandQueue.Dispose();
+ }
+}
diff --git a/Ryujinx.Graphics.Host1x/ThiRegisters.cs b/Ryujinx.Graphics.Host1x/ThiRegisters.cs
new file mode 100644
index 00000000..00c93182
--- /dev/null
+++ b/Ryujinx.Graphics.Host1x/ThiRegisters.cs
@@ -0,0 +1,22 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Host1x
+{
+ struct ThiRegisters
+ {
+ public uint IncrSyncpt;
+ public uint Reserved4;
+ public uint IncrSyncptErr;
+ public uint CtxswIncrSyncpt;
+ public Array4<uint> Reserved10;
+ public uint Ctxsw;
+ public uint Reserved24;
+ public uint ContSyncptEof;
+ public Array5<uint> Reserved2C;
+ public uint Method0;
+ public uint Method1;
+ public Array12<uint> Reserved48;
+ public uint IntStatus;
+ public uint IntMask;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.H264/Decoder.cs b/Ryujinx.Graphics.Nvdec.H264/Decoder.cs
new file mode 100644
index 00000000..7a7e184a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/Decoder.cs
@@ -0,0 +1,40 @@
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.H264
+{
+ public class Decoder : IH264Decoder
+ {
+ public bool IsHardwareAccelerated => false;
+
+ private const int WorkBufferSize = 0x200;
+
+ private readonly byte[] _workBuffer = new byte[WorkBufferSize];
+
+ private readonly FFmpegContext _context = new FFmpegContext();
+
+ public ISurface CreateSurface(int width, int height)
+ {
+ return new Surface();
+ }
+
+ public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream)
+ {
+ Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer));
+
+ return _context.DecodeFrame((Surface)output, bs) == 0;
+ }
+
+ private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep)
+ {
+ byte[] output = new byte[data.Length + prep.Length];
+
+ prep.CopyTo(output);
+ data.CopyTo(new Span<byte>(output).Slice(prep.Length));
+
+ return output;
+ }
+
+ public void Dispose() => _context.Dispose();
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs
new file mode 100644
index 00000000..b4f9206b
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs
@@ -0,0 +1,51 @@
+using FFmpeg.AutoGen;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.H264
+{
+ unsafe class FFmpegContext : IDisposable
+ {
+ private readonly AVCodec* _codec;
+ private AVCodecContext* _context;
+
+ public FFmpegContext()
+ {
+ _codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264);
+ _context = ffmpeg.avcodec_alloc_context3(_codec);
+
+ ffmpeg.avcodec_open2(_context, _codec, null);
+ }
+
+ public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream)
+ {
+ AVPacket packet;
+
+ ffmpeg.av_init_packet(&packet);
+
+ fixed (byte* ptr = bitstream)
+ {
+ packet.data = ptr;
+ packet.size = bitstream.Length;
+
+ int rc = ffmpeg.avcodec_send_packet(_context, &packet);
+
+ if (rc != 0)
+ {
+ return rc;
+ }
+ }
+
+ return ffmpeg.avcodec_receive_frame(_context, output.Frame);
+ }
+
+ public void Dispose()
+ {
+ ffmpeg.avcodec_close(_context);
+
+ fixed (AVCodecContext** ppContext = &_context)
+ {
+ ffmpeg.avcodec_free_context(ppContext);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs
new file mode 100644
index 00000000..c0e2357d
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs
@@ -0,0 +1,121 @@
+using System;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Nvdec.H264
+{
+ struct H264BitStreamWriter
+ {
+ private const int BufferSize = 8;
+
+ private readonly byte[] _workBuffer;
+
+ private int _offset;
+ private int _buffer;
+ private int _bufferPos;
+
+ public H264BitStreamWriter(byte[] workBuffer)
+ {
+ _workBuffer = workBuffer;
+ _offset = 0;
+ _buffer = 0;
+ _bufferPos = 0;
+ }
+
+ public void WriteBit(bool value)
+ {
+ WriteBits(value ? 1 : 0, 1);
+ }
+
+ public void WriteBits(int value, int valueSize)
+ {
+ int valuePos = 0;
+
+ int remaining = valueSize;
+
+ while (remaining > 0)
+ {
+ int copySize = remaining;
+
+ int free = GetFreeBufferBits();
+
+ if (copySize > free)
+ {
+ copySize = free;
+ }
+
+ int mask = (1 << copySize) - 1;
+
+ int srcShift = (valueSize - valuePos) - copySize;
+ int dstShift = (BufferSize - _bufferPos) - copySize;
+
+ _buffer |= ((value >> srcShift) & mask) << dstShift;
+
+ valuePos += copySize;
+ _bufferPos += copySize;
+ remaining -= copySize;
+ }
+ }
+
+ private int GetFreeBufferBits()
+ {
+ if (_bufferPos == BufferSize)
+ {
+ Flush();
+ }
+
+ return BufferSize - _bufferPos;
+ }
+
+ public void Flush()
+ {
+ if (_bufferPos != 0)
+ {
+ _workBuffer[_offset++] = (byte)_buffer;
+
+ _buffer = 0;
+ _bufferPos = 0;
+ }
+ }
+
+ public void End()
+ {
+ WriteBit(true);
+
+ Flush();
+ }
+
+ public Span<byte> AsSpan()
+ {
+ return new Span<byte>(_workBuffer).Slice(0, _offset);
+ }
+
+ public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize);
+ public void WriteSe(int value) => WriteExpGolombCodedInt(value);
+ public void WriteUe(uint value) => WriteExpGolombCodedUInt(value);
+
+ private void WriteExpGolombCodedInt(int value)
+ {
+ int sign = value <= 0 ? 0 : 1;
+
+ if (value < 0)
+ {
+ value = -value;
+ }
+
+ value = (value << 1) - sign;
+
+ WriteExpGolombCodedUInt((uint)value);
+ }
+
+ private void WriteExpGolombCodedUInt(uint value)
+ {
+ int size = 32 - BitOperations.LeadingZeroCount(value + 1);
+
+ WriteBits(1, size);
+
+ value -= (1u << (size - 1)) - 1;
+
+ WriteBits((int)value, size - 1);
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj b/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj
new file mode 100644
index 00000000..cda0d933
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj
@@ -0,0 +1,23 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs
new file mode 100644
index 00000000..e75c555e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs
@@ -0,0 +1,159 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.H264
+{
+ static class SpsAndPpsReconstruction
+ {
+ public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer)
+ {
+ H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer);
+
+ // Sequence Parameter Set.
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(7, 5);
+ writer.WriteU(100, 8); // Profile idc
+ writer.WriteU(0, 8); // Reserved
+ writer.WriteU(31, 8); // Level idc
+ writer.WriteUe(0); // Seq parameter set id
+ writer.WriteUe(pictureInfo.ChromaFormatIdc);
+
+ if (pictureInfo.ChromaFormatIdc == 3)
+ {
+ writer.WriteBit(false); // Separate colour plane flag
+ }
+
+ writer.WriteUe(0); // Bit depth luma minus 8
+ writer.WriteUe(0); // Bit depth chroma minus 8
+ writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag);
+ writer.WriteBit(false); // Scaling matrix present flag
+
+ writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4);
+ writer.WriteUe(pictureInfo.PicOrderCntType);
+
+ if (pictureInfo.PicOrderCntType == 0)
+ {
+ writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4);
+ }
+ else if (pictureInfo.PicOrderCntType == 1)
+ {
+ writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag);
+
+ writer.WriteSe(0); // Offset for non-ref pic
+ writer.WriteSe(0); // Offset for top to bottom field
+ writer.WriteUe(0); // Num ref frames in pic order cnt cycle
+ }
+
+ writer.WriteUe(16); // Max num ref frames
+ writer.WriteBit(false); // Gaps in frame num value allowed flag
+ writer.WriteUe(pictureInfo.PicWidthInMbsMinus1);
+ writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1);
+ writer.WriteBit(pictureInfo.FrameMbsOnlyFlag);
+
+ if (!pictureInfo.FrameMbsOnlyFlag)
+ {
+ writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag);
+ }
+
+ writer.WriteBit(pictureInfo.Direct8x8InferenceFlag);
+ writer.WriteBit(false); // Frame cropping flag
+ writer.WriteBit(false); // VUI parameter present flag
+
+ writer.End();
+
+ // Picture Parameter Set.
+ writer.WriteU(1, 24);
+ writer.WriteU(0, 1);
+ writer.WriteU(3, 2);
+ writer.WriteU(8, 5);
+
+ writer.WriteUe(0); // Pic parameter set id
+ writer.WriteUe(0); // Seq parameter set id
+
+ writer.WriteBit(pictureInfo.EntropyCodingModeFlag);
+ writer.WriteBit(false); // Bottom field pic order in frame present flag
+ writer.WriteUe(0); // Num slice groups minus 1
+ writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1);
+ writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1);
+ writer.WriteBit(pictureInfo.WeightedPredFlag);
+ writer.WriteU(pictureInfo.WeightedBipredIdc, 2);
+ writer.WriteSe(pictureInfo.PicInitQpMinus26);
+ writer.WriteSe(0); // Pic init qs minus 26
+ writer.WriteSe(pictureInfo.ChromaQpIndexOffset);
+ writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag);
+ writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag);
+ writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag);
+ writer.WriteBit(pictureInfo.Transform8x8ModeFlag);
+
+ writer.WriteBit(pictureInfo.ScalingMatrixPresent);
+
+ if (pictureInfo.ScalingMatrixPresent)
+ {
+ for (int index = 0; index < 6; index++)
+ {
+ writer.WriteBit(true);
+
+ WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]);
+ }
+
+ if (pictureInfo.Transform8x8ModeFlag)
+ {
+ for (int index = 0; index < 2; index++)
+ {
+ writer.WriteBit(true);
+
+ WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]);
+ }
+ }
+ }
+
+ writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset);
+
+ writer.End();
+
+ return writer.AsSpan();
+ }
+
+ // ZigZag LUTs from libavcodec.
+ private static readonly byte[] ZigZagDirect = new byte[]
+ {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+ };
+
+ private static readonly byte[] ZigZagScan = new byte[]
+ {
+ 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+ 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+ 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+ 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
+ };
+
+ private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list)
+ {
+ byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect;
+
+ int lastScale = 8;
+
+ for (int index = 0; index < list.Length; index++)
+ {
+ byte value = list[scan[index]];
+
+ int deltaScale = value - lastScale;
+
+ writer.WriteSe(deltaScale);
+
+ lastScale = value;
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.H264/Surface.cs b/Ryujinx.Graphics.Nvdec.H264/Surface.cs
new file mode 100644
index 00000000..a6c16ba3
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.H264/Surface.cs
@@ -0,0 +1,33 @@
+using FFmpeg.AutoGen;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.H264
+{
+ unsafe class Surface : ISurface
+ {
+ public AVFrame* Frame { get; }
+
+ public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height);
+ public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight);
+ public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight);
+
+ public int Width => Frame->width;
+ public int Height => Frame->height;
+ public int Stride => Frame->linesize[0];
+ public int UvWidth => (Frame->width + 1) >> 1;
+ public int UvHeight => (Frame->height + 1) >> 1;
+ public int UvStride => Frame->linesize[1];
+
+ public Surface()
+ {
+ Frame = ffmpeg.av_frame_alloc();
+ }
+
+ public void Dispose()
+ {
+ ffmpeg.av_frame_unref(Frame);
+ ffmpeg.av_free(Frame);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs b/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs
new file mode 100644
index 00000000..b7b70953
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal enum BitDepth
+ {
+ Bits8 = 8, /**< 8 bits */
+ Bits10 = 10, /**< 10 bits */
+ Bits12 = 12, /**< 12 bits */
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs b/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs
new file mode 100644
index 00000000..b695fed5
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs
@@ -0,0 +1,56 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal enum CodecErr
+ {
+ /*!\brief Operation completed without error */
+ CodecOk,
+
+ /*!\brief Unspecified error */
+ CodecError,
+
+ /*!\brief Memory operation failed */
+ CodecMemError,
+
+ /*!\brief ABI version mismatch */
+ CodecAbiMismatch,
+
+ /*!\brief Algorithm does not have required capability */
+ CodecIncapable,
+
+ /*!\brief The given bitstream is not supported.
+ *
+ * The bitstream was unable to be parsed at the highest level. The decoder
+ * is unable to proceed. This error \ref SHOULD be treated as fatal to the
+ * stream. */
+ CodecUnsupBitstream,
+
+ /*!\brief Encoded bitstream uses an unsupported feature
+ *
+ * The decoder does not implement a feature required by the encoder. This
+ * return code should only be used for features that prevent future
+ * pictures from being properly decoded. This error \ref MAY be treated as
+ * fatal to the stream or \ref MAY be treated as fatal to the current GOP.
+ */
+ CodecUnsupFeature,
+
+ /*!\brief The coded data for this stream is corrupt or incomplete
+ *
+ * There was a problem decoding the current frame. This return code
+ * should only be used for failures that prevent future pictures from
+ * being properly decoded. This error \ref MAY be treated as fatal to the
+ * stream or \ref MAY be treated as fatal to the current GOP. If decoding
+ * is continued for the current GOP, artifacts may be present.
+ */
+ CodecCorruptFrame,
+
+ /*!\brief An application-supplied parameter is not valid.
+ *
+ */
+ CodecInvalidParam,
+
+ /*!\brief An iterator reached the end of list.
+ *
+ */
+ CodecListEnd
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
new file mode 100644
index 00000000..a7c6d148
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs
@@ -0,0 +1,59 @@
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Common
+{
+ internal static class BitUtils
+ {
+ // FIXME: Enable inlining here after AVX2 gather bug is fixed.
+ // [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte ClipPixel(int val)
+ {
+ return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort ClipPixelHighbd(int val, int bd)
+ {
+ return bd switch
+ {
+ 10 => (ushort)Math.Clamp(val, 0, 1023),
+ 12 => (ushort)Math.Clamp(val, 0, 4095),
+ _ => (ushort)Math.Clamp(val, 0, 255)
+ };
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int RoundPowerOfTwo(int value, int n)
+ {
+ return (value + (1 << (n - 1))) >> n;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long RoundPowerOfTwo(long value, int n)
+ {
+ return (value + (1L << (n - 1))) >> n;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int AlignPowerOfTwo(int value, int n)
+ {
+ return (value + ((1 << n) - 1)) & ~((1 << n) - 1);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int GetMsb(uint n)
+ {
+ Debug.Assert(n != 0);
+ return 31 ^ BitOperations.LeadingZeroCount(n);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int GetUnsignedBits(uint numValues)
+ {
+ return numValues > 0 ? GetMsb(numValues) + 1 : 0;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs
new file mode 100644
index 00000000..473dd904
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs
@@ -0,0 +1,94 @@
+using Ryujinx.Common.Memory;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Common
+{
+ internal class MemoryAllocator : IDisposable
+ {
+ private const int PoolEntries = 10;
+
+ private struct PoolItem
+ {
+ public IntPtr Pointer;
+ public int Length;
+ public bool InUse;
+ }
+
+ private PoolItem[] _pool = new PoolItem[PoolEntries];
+
+ public ArrayPtr<T> Allocate<T>(int length) where T : unmanaged
+ {
+ int lengthInBytes = Unsafe.SizeOf<T>() * length;
+
+ IntPtr ptr = IntPtr.Zero;
+
+ for (int i = 0; i < PoolEntries; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (!item.InUse && item.Length == lengthInBytes)
+ {
+ item.InUse = true;
+ ptr = item.Pointer;
+ break;
+ }
+ }
+
+ if (ptr == IntPtr.Zero)
+ {
+ ptr = Marshal.AllocHGlobal(lengthInBytes);
+
+ for (int i = 0; i < PoolEntries; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (!item.InUse)
+ {
+ item.InUse = true;
+ if (item.Pointer != IntPtr.Zero)
+ {
+ Marshal.FreeHGlobal(item.Pointer);
+ }
+ item.Pointer = ptr;
+ item.Length = lengthInBytes;
+ break;
+ }
+ }
+ }
+
+ return new ArrayPtr<T>(ptr, length);
+ }
+
+ public unsafe void Free<T>(ArrayPtr<T> arr) where T : unmanaged
+ {
+ IntPtr ptr = (IntPtr)arr.ToPointer();
+
+ for (int i = 0; i < PoolEntries; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (item.Pointer == ptr)
+ {
+ item.InUse = false;
+ break;
+ }
+ }
+ }
+
+ public void Dispose()
+ {
+ for (int i = 0; i < PoolEntries; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (item.Pointer != IntPtr.Zero)
+ {
+ Marshal.FreeHGlobal(item.Pointer);
+ item.Pointer = IntPtr.Zero;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs
new file mode 100644
index 00000000..e53ec9bd
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs
@@ -0,0 +1,25 @@
+using Ryujinx.Common.Memory;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Common
+{
+ internal static class MemoryUtil
+ {
+ public static unsafe void Copy<T>(T* dest, T* source, int length) where T : unmanaged
+ {
+ new Span<T>(source, length).CopyTo(new Span<T>(dest, length));
+ }
+
+ public static void Copy<T>(ref T dest, ref T source) where T : unmanaged
+ {
+ MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1));
+ }
+
+ public static unsafe void Fill<T>(T* ptr, T value, int length) where T : unmanaged
+ {
+ new Span<T>(ptr, length).Fill(value);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs b/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs
new file mode 100644
index 00000000..407e6f42
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs
@@ -0,0 +1,71 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class Constants
+ {
+ public const int Vp9InterpExtend = 4;
+
+ public const int MaxMbPlane = 3;
+
+ public const int None = -1;
+ public const int IntraFrame = 0;
+ public const int LastFrame = 1;
+ public const int GoldenFrame = 2;
+ public const int AltRefFrame = 3;
+ public const int MaxRefFrames = 4;
+
+ public const int MiSizeLog2 = 3;
+ public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6
+
+ public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit
+ public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block
+ public const int MiMask = MiBlockSize - 1;
+
+ public const int PartitionPloffset = 4; // number of probability models per block size
+
+ /* Segment Feature Masks */
+ public const int MaxMvRefCandidates = 2;
+
+ public const int CompInterContexts = 5;
+ public const int RefContexts = 5;
+
+ public const int EightTap = 0;
+ public const int EightTapSmooth = 1;
+ public const int EightTapSharp = 2;
+ public const int SwitchableFilters = 3; /* Number of switchable filters */
+ public const int Bilinear = 3;
+ public const int Switchable = 4; /* should be the last one */
+
+ // Frame
+ public const int RefsPerFrame = 3;
+
+ public const int NumPingPongBuffers = 2;
+
+ public const int Class0Bits = 1; /* bits at integer precision for class 0 */
+ public const int Class0Size = 1 << Class0Bits;
+
+ public const int MvInUseBits = 14;
+ public const int MvUpp = (1 << MvInUseBits) - 1;
+ public const int MvLow = -(1 << MvInUseBits);
+
+ // Coefficient token alphabet
+ public const int ZeroToken = 0; // 0 Extra Bits 0+0
+ public const int OneToken = 1; // 1 Extra Bits 0+1
+ public const int TwoToken = 2; // 2 Extra Bits 0+1
+
+ public const int PivotNode = 2;
+
+ public const int Cat1MinVal = 5;
+ public const int Cat2MinVal = 7;
+ public const int Cat3MinVal = 11;
+ public const int Cat4MinVal = 19;
+ public const int Cat5MinVal = 35;
+ public const int Cat6MinVal = 67;
+
+ public const int EobModelToken = 3;
+
+ public const int SegmentAbsData = 1;
+ public const int MaxSegments = 8;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
new file mode 100644
index 00000000..81c187e1
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs
@@ -0,0 +1,1190 @@
+using Ryujinx.Common.Memory;
+using System;
+using System.Buffers.Binary;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+using Mv = Ryujinx.Graphics.Nvdec.Vp9.Types.Mv;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ static class DecodeFrame
+ {
+ private static bool ReadIsValid(ArrayPtr<byte> start, int len)
+ {
+ return len != 0 && len <= start.Length;
+ }
+
+ private static void InverseTransformBlockInter(ref MacroBlockD xd, int plane, TxSize txSize, Span<byte> dst, int stride, int eob)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ArrayPtr<int> dqcoeff = pd.DqCoeff;
+ Debug.Assert(eob > 0);
+ if (xd.CurBuf.HighBd)
+ {
+ Span<ushort> dst16 = MemoryMarshal.Cast<byte, ushort>(dst);
+ if (xd.Lossless)
+ {
+ Idct.HighbdIwht4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ }
+ else
+ {
+ switch (txSize)
+ {
+ case TxSize.Tx4x4:
+ Idct.HighbdIdct4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx8x8:
+ Idct.HighbdIdct8x8Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx16x16:
+ Idct.HighbdIdct16x16Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx32x32:
+ Idct.HighbdIdct32x32Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ default: Debug.Assert(false, "Invalid transform size"); break;
+ }
+ }
+ }
+ else
+ {
+ if (xd.Lossless)
+ {
+ Idct.Iwht4x4Add(dqcoeff.ToSpan(), dst, stride, eob);
+ }
+ else
+ {
+ switch (txSize)
+ {
+ case TxSize.Tx4x4: Idct.Idct4x4Add(dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx8x8: Idct.Idct8x8Add(dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx16x16: Idct.Idct16x16Add(dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx32x32: Idct.Idct32x32Add(dqcoeff.ToSpan(), dst, stride, eob); break;
+ default: Debug.Assert(false, "Invalid transform size"); return;
+ }
+ }
+ }
+
+ if (eob == 1)
+ {
+ dqcoeff.ToSpan()[0] = 0;
+ }
+ else
+ {
+ if (txSize <= TxSize.Tx16x16 && eob <= 10)
+ {
+ dqcoeff.ToSpan().Slice(0, 4 * (4 << (int)txSize)).Fill(0);
+ }
+ else if (txSize == TxSize.Tx32x32 && eob <= 34)
+ {
+ dqcoeff.ToSpan().Slice(0, 256).Fill(0);
+ }
+ else
+ {
+ dqcoeff.ToSpan().Slice(0, 16 << ((int)txSize << 1)).Fill(0);
+ }
+ }
+ }
+
+ private static void InverseTransformBlockIntra(
+ ref MacroBlockD xd,
+ int plane,
+ TxType txType,
+ TxSize txSize,
+ Span<byte> dst,
+ int stride,
+ int eob)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ArrayPtr<int> dqcoeff = pd.DqCoeff;
+ Debug.Assert(eob > 0);
+ if (xd.CurBuf.HighBd)
+ {
+ Span<ushort> dst16 = MemoryMarshal.Cast<byte, ushort>(dst);
+ if (xd.Lossless)
+ {
+ Idct.HighbdIwht4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ }
+ else
+ {
+ switch (txSize)
+ {
+ case TxSize.Tx4x4:
+ Idct.HighbdIht4x4Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx8x8:
+ Idct.HighbdIht8x8Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx16x16:
+ Idct.HighbdIht16x16Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ case TxSize.Tx32x32:
+ Idct.HighbdIdct32x32Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd);
+ break;
+ default: Debug.Assert(false, "Invalid transform size"); break;
+ }
+ }
+ }
+ else
+ {
+ if (xd.Lossless)
+ {
+ Idct.Iwht4x4Add(dqcoeff.ToSpan(), dst, stride, eob);
+ }
+ else
+ {
+ switch (txSize)
+ {
+ case TxSize.Tx4x4: Idct.Iht4x4Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx8x8: Idct.Iht8x8Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx16x16: Idct.Iht16x16Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break;
+ case TxSize.Tx32x32: Idct.Idct32x32Add(dqcoeff.ToSpan(), dst, stride, eob); break;
+ default: Debug.Assert(false, "Invalid transform size"); return;
+ }
+ }
+ }
+
+ if (eob == 1)
+ {
+ dqcoeff.ToSpan()[0] = 0;
+ }
+ else
+ {
+ if (txType == TxType.DctDct && txSize <= TxSize.Tx16x16 && eob <= 10)
+ {
+ dqcoeff.ToSpan().Slice(0, 4 * (4 << (int)txSize)).Fill(0);
+ }
+ else if (txSize == TxSize.Tx32x32 && eob <= 34)
+ {
+ dqcoeff.ToSpan().Slice(0, 256).Fill(0);
+ }
+ else
+ {
+ dqcoeff.ToSpan().Slice(0, 16 << ((int)txSize << 1)).Fill(0);
+ }
+ }
+ }
+
+ private static unsafe void PredictAndReconstructIntraBlock(
+ ref TileWorkerData twd,
+ ref ModeInfo mi,
+ int plane,
+ int row,
+ int col,
+ TxSize txSize)
+ {
+ ref MacroBlockD xd = ref twd.Xd;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ PredictionMode mode = (plane == 0) ? mi.Mode : mi.UvMode;
+ int dstOffset = 4 * row * pd.Dst.Stride + 4 * col;
+ byte* dst = &pd.Dst.Buf.ToPointer()[dstOffset];
+ Span<byte> dstSpan = pd.Dst.Buf.ToSpan().Slice(dstOffset);
+
+ if (mi.SbType < BlockSize.Block8x8)
+ {
+ if (plane == 0)
+ {
+ mode = xd.Mi[0].Value.Bmi[(row << 1) + col].Mode;
+ }
+ }
+
+ ReconIntra.PredictIntraBlock(ref xd, pd.N4Wl, txSize, mode, dst, pd.Dst.Stride, dst, pd.Dst.Stride, col, row, plane);
+
+ if (mi.Skip == 0)
+ {
+ TxType txType =
+ (plane != 0 || xd.Lossless) ? TxType.DctDct : ReconIntra.IntraModeToTxTypeLookup[(int)mode];
+ var sc = (plane != 0 || xd.Lossless)
+ ? Luts.Vp9DefaultScanOrders[(int)txSize]
+ : Luts.Vp9ScanOrders[(int)txSize][(int)txType];
+ int eob = Detokenize.DecodeBlockTokens(ref twd, plane, sc, col, row, txSize, mi.SegmentId);
+ if (eob > 0)
+ {
+ InverseTransformBlockIntra(ref xd, plane, txType, txSize, dstSpan, pd.Dst.Stride, eob);
+ }
+ }
+ }
+
+ private static int ReconstructInterBlock(
+ ref TileWorkerData twd,
+ ref ModeInfo mi,
+ int plane,
+ int row,
+ int col,
+ TxSize txSize)
+ {
+ ref MacroBlockD xd = ref twd.Xd;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ var sc = Luts.Vp9DefaultScanOrders[(int)txSize];
+ int eob = Detokenize.DecodeBlockTokens(ref twd, plane, sc, col, row, txSize, mi.SegmentId);
+ Span<byte> dst = pd.Dst.Buf.ToSpan().Slice(4 * row * pd.Dst.Stride + 4 * col);
+
+ if (eob > 0)
+ {
+ InverseTransformBlockInter(ref xd, plane, txSize, dst, pd.Dst.Stride, eob);
+ }
+ return eob;
+ }
+
+ private static unsafe void BuildMcBorder(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ int x,
+ int y,
+ int bW,
+ int bH,
+ int w,
+ int h)
+ {
+ // Get a pointer to the start of the real data for this row.
+ byte* refRow = src - x - y * srcStride;
+
+ if (y >= h)
+ {
+ refRow += (h - 1) * srcStride;
+ }
+ else if (y > 0)
+ {
+ refRow += y * srcStride;
+ }
+
+ do
+ {
+ int right = 0, copy;
+ int left = x < 0 ? -x : 0;
+
+ if (left > bW)
+ {
+ left = bW;
+ }
+
+ if (x + bW > w)
+ {
+ right = x + bW - w;
+ }
+
+ if (right > bW)
+ {
+ right = bW;
+ }
+
+ copy = bW - left - right;
+
+ if (left != 0)
+ {
+ MemoryUtil.Fill(dst, refRow[0], left);
+ }
+
+ if (copy != 0)
+ {
+ MemoryUtil.Copy(dst + left, refRow + x + left, copy);
+ }
+
+ if (right != 0)
+ {
+ MemoryUtil.Fill(dst + left + copy, refRow[w - 1], right);
+ }
+
+ dst += dstStride;
+ ++y;
+
+ if (y > 0 && y < h)
+ {
+ refRow += srcStride;
+ }
+ } while (--bH != 0);
+ }
+
+ private static unsafe void HighBuildMcBorder(
+ byte* src8,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ int x,
+ int y,
+ int bW,
+ int bH,
+ int w,
+ int h)
+ {
+ // Get a pointer to the start of the real data for this row.
+ ushort* src = (ushort*)src8;
+ ushort* refRow = src - x - y * srcStride;
+
+ if (y >= h)
+ {
+ refRow += (h - 1) * srcStride;
+ }
+ else if (y > 0)
+ {
+ refRow += y * srcStride;
+ }
+
+ do
+ {
+ int right = 0, copy;
+ int left = x < 0 ? -x : 0;
+
+ if (left > bW)
+ {
+ left = bW;
+ }
+
+ if (x + bW > w)
+ {
+ right = x + bW - w;
+ }
+
+ if (right > bW)
+ {
+ right = bW;
+ }
+
+ copy = bW - left - right;
+
+ if (left != 0)
+ {
+ MemoryUtil.Fill(dst, refRow[0], left);
+ }
+
+ if (copy != 0)
+ {
+ MemoryUtil.Copy(dst + left, refRow + x + left, copy);
+ }
+
+ if (right != 0)
+ {
+ MemoryUtil.Fill(dst + left + copy, refRow[w - 1], right);
+ }
+
+ dst += dstStride;
+ ++y;
+
+ if (y > 0 && y < h)
+ {
+ refRow += srcStride;
+ }
+ } while (--bH != 0);
+ }
+
+ [StructLayout(LayoutKind.Sequential, Size = 80 * 2 * 80 * 2)]
+ struct McBufHigh
+ {
+ }
+
+ private static unsafe void ExtendAndPredict(
+ byte* bufPtr1,
+ int preBufStride,
+ int x0,
+ int y0,
+ int bW,
+ int bH,
+ int frameWidth,
+ int frameHeight,
+ int borderOffset,
+ byte* dst,
+ int dstBufStride,
+ int subpelX,
+ int subpelY,
+ Array8<short>[] kernel,
+ ref ScaleFactors sf,
+ ref MacroBlockD xd,
+ int w,
+ int h,
+ int refr,
+ int xs,
+ int ys)
+ {
+ McBufHigh mcBufHighStruct;
+ ushort* mcBufHigh = (ushort*)Unsafe.AsPointer(ref mcBufHighStruct); // Avoid zero initialization.
+ if (xd.CurBuf.HighBd)
+ {
+ HighBuildMcBorder(bufPtr1, preBufStride, mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight);
+ ReconInter.HighbdInterPredictor(
+ mcBufHigh + borderOffset,
+ bW,
+ (ushort*)dst,
+ dstBufStride,
+ subpelX,
+ subpelY,
+ ref sf,
+ w,
+ h,
+ refr,
+ kernel,
+ xs,
+ ys,
+ xd.Bd);
+ }
+ else
+ {
+ BuildMcBorder(bufPtr1, preBufStride, (byte*)mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight);
+ ReconInter.InterPredictor(
+ (byte*)mcBufHigh + borderOffset,
+ bW,
+ dst,
+ dstBufStride,
+ subpelX,
+ subpelY,
+ ref sf,
+ w,
+ h,
+ refr,
+ kernel,
+ xs,
+ ys);
+ }
+ }
+
+ private static unsafe void DecBuildInterPredictors(
+ ref MacroBlockD xd,
+ int plane,
+ int bw,
+ int bh,
+ int x,
+ int y,
+ int w,
+ int h,
+ int miX,
+ int miY,
+ Array8<short>[] kernel,
+ ref ScaleFactors sf,
+ ref Buf2D preBuf,
+ ref Buf2D dstBuf,
+ ref Mv mv,
+ ref Surface refFrameBuf,
+ bool isScaled,
+ int refr)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ byte* dst = dstBuf.Buf.ToPointer() + dstBuf.Stride * y + x;
+ Mv32 scaledMv;
+ int xs, ys, x0, y0, x0_16, y0_16, frameWidth, frameHeight, bufStride, subpelX, subpelY;
+ byte* refFrame;
+ byte* bufPtr;
+
+ // Get reference frame pointer, width and height.
+ if (plane == 0)
+ {
+ frameWidth = refFrameBuf.Width;
+ frameHeight = refFrameBuf.Height;
+ refFrame = refFrameBuf.YBuffer.ToPointer();
+ }
+ else
+ {
+ frameWidth = refFrameBuf.UvWidth;
+ frameHeight = refFrameBuf.UvHeight;
+ refFrame = plane == 1 ? refFrameBuf.UBuffer.ToPointer() : refFrameBuf.VBuffer.ToPointer();
+ }
+
+ if (isScaled)
+ {
+ Mv mvQ4 = ReconInter.ClampMvToUmvBorderSb(ref xd, ref mv, bw, bh, pd.SubsamplingX, pd.SubsamplingY);
+ // Co-ordinate of containing block to pixel precision.
+ int xStart = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX));
+ int yStart = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY));
+ // Co-ordinate of the block to 1/16th pixel precision.
+ x0_16 = (xStart + x) << Filter.SubpelBits;
+ y0_16 = (yStart + y) << Filter.SubpelBits;
+
+ // Co-ordinate of current block in reference frame
+ // to 1/16th pixel precision.
+ x0_16 = sf.ScaleValueX(x0_16);
+ y0_16 = sf.ScaleValueY(y0_16);
+
+ // Map the top left corner of the block into the reference frame.
+ x0 = sf.ScaleValueX(xStart + x);
+ y0 = sf.ScaleValueY(yStart + y);
+
+ // Scale the MV and incorporate the sub-pixel offset of the block
+ // in the reference frame.
+ scaledMv = sf.ScaleMv(ref mvQ4, miX + x, miY + y);
+ xs = sf.XStepQ4;
+ ys = sf.YStepQ4;
+ }
+ else
+ {
+ // Co-ordinate of containing block to pixel precision.
+ x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
+ y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
+
+ // Co-ordinate of the block to 1/16th pixel precision.
+ x0_16 = x0 << Filter.SubpelBits;
+ y0_16 = y0 << Filter.SubpelBits;
+
+ scaledMv.Row = mv.Row * (1 << (1 - pd.SubsamplingY));
+ scaledMv.Col = mv.Col * (1 << (1 - pd.SubsamplingX));
+ xs = ys = 16;
+ }
+ subpelX = scaledMv.Col & Filter.SubpelMask;
+ subpelY = scaledMv.Row & Filter.SubpelMask;
+
+ // Calculate the top left corner of the best matching block in the
+ // reference frame.
+ x0 += scaledMv.Col >> Filter.SubpelBits;
+ y0 += scaledMv.Row >> Filter.SubpelBits;
+ x0_16 += scaledMv.Col;
+ y0_16 += scaledMv.Row;
+
+ // Get reference block pointer.
+ bufPtr = refFrame + y0 * preBuf.Stride + x0;
+ bufStride = preBuf.Stride;
+
+ // Do border extension if there is motion or the
+ // width/height is not a multiple of 8 pixels.
+ if (isScaled || scaledMv.Col != 0 || scaledMv.Row != 0 || (frameWidth & 0x7) != 0 || (frameHeight & 0x7) != 0)
+ {
+ int y1 = ((y0_16 + (h - 1) * ys) >> Filter.SubpelBits) + 1;
+
+ // Get reference block bottom right horizontal coordinate.
+ int x1 = ((x0_16 + (w - 1) * xs) >> Filter.SubpelBits) + 1;
+ int xPad = 0, yPad = 0;
+
+ if (subpelX != 0 || (sf.XStepQ4 != Filter.SubpelShifts))
+ {
+ x0 -= Constants.Vp9InterpExtend - 1;
+ x1 += Constants.Vp9InterpExtend;
+ xPad = 1;
+ }
+
+ if (subpelY != 0 || (sf.YStepQ4 != Filter.SubpelShifts))
+ {
+ y0 -= Constants.Vp9InterpExtend - 1;
+ y1 += Constants.Vp9InterpExtend;
+ yPad = 1;
+ }
+
+ // Skip border extension if block is inside the frame.
+ if (x0 < 0 || x0 > frameWidth - 1 || x1 < 0 || x1 > frameWidth - 1 ||
+ y0 < 0 || y0 > frameHeight - 1 || y1 < 0 || y1 > frameHeight - 1)
+ {
+ // Extend the border.
+ byte* bufPtr1 = refFrame + y0 * bufStride + x0;
+ int bW = x1 - x0 + 1;
+ int bH = y1 - y0 + 1;
+ int borderOffset = yPad * 3 * bW + xPad * 3;
+
+ ExtendAndPredict(
+ bufPtr1,
+ bufStride,
+ x0,
+ y0,
+ bW,
+ bH,
+ frameWidth,
+ frameHeight,
+ borderOffset,
+ dst,
+ dstBuf.Stride,
+ subpelX,
+ subpelY,
+ kernel,
+ ref sf,
+ ref xd,
+ w,
+ h,
+ refr,
+ xs,
+ ys);
+ return;
+ }
+ }
+ if (xd.CurBuf.HighBd)
+ {
+ ReconInter.HighbdInterPredictor(
+ (ushort*)bufPtr,
+ bufStride,
+ (ushort*)dst,
+ dstBuf.Stride,
+ subpelX,
+ subpelY,
+ ref sf,
+ w,
+ h,
+ refr,
+ kernel,
+ xs,
+ ys,
+ xd.Bd);
+ }
+ else
+ {
+ ReconInter.InterPredictor(
+ bufPtr,
+ bufStride,
+ dst,
+ dstBuf.Stride,
+ subpelX,
+ subpelY,
+ ref sf,
+ w,
+ h,
+ refr,
+ kernel,
+ xs,
+ ys);
+ }
+ }
+
+ private static void DecBuildInterPredictorsSb(ref Vp9Common cm, ref MacroBlockD xd, int miRow, int miCol)
+ {
+ int plane;
+ int miX = miCol * Constants.MiSize;
+ int miY = miRow * Constants.MiSize;
+ ref ModeInfo mi = ref xd.Mi[0].Value;
+ Array8<short>[] kernel = Luts.Vp9FilterKernels[mi.InterpFilter];
+ BlockSize sbType = mi.SbType;
+ int isCompound = mi.HasSecondRef() ? 1 : 0;
+ int refr;
+ bool isScaled;
+
+ for (refr = 0; refr < 1 + isCompound; ++refr)
+ {
+ int frame = mi.RefFrame[refr];
+ ref RefBuffer refBuf = ref cm.FrameRefs[frame - Constants.LastFrame];
+ ref ScaleFactors sf = ref refBuf.Sf;
+ ref Surface refFrameBuf = ref refBuf.Buf;
+
+ if (!sf.IsValidScale())
+ {
+ xd.ErrorInfo.Value.InternalError(CodecErr.CodecUnsupBitstream, "Reference frame has invalid dimensions");
+ }
+
+ isScaled = sf.IsScaled();
+ ReconInter.SetupPrePlanes(ref xd, refr, ref refFrameBuf, miRow, miCol, isScaled ? new Ptr<ScaleFactors>(ref sf) : Ptr<ScaleFactors>.Null);
+ xd.BlockRefs[refr] = new Ptr<RefBuffer>(ref refBuf);
+
+ if (sbType < BlockSize.Block8x8)
+ {
+ for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ref Buf2D dstBuf = ref pd.Dst;
+ int num4x4W = pd.N4W;
+ int num4x4H = pd.N4H;
+ int n4Wx4 = 4 * num4x4W;
+ int n4Hx4 = 4 * num4x4H;
+ ref Buf2D preBuf = ref pd.Pre[refr];
+ int i = 0, x, y;
+ for (y = 0; y < num4x4H; ++y)
+ {
+ for (x = 0; x < num4x4W; ++x)
+ {
+ Mv mv = ReconInter.AverageSplitMvs(ref pd, ref mi, refr, i++);
+ DecBuildInterPredictors(
+ ref xd,
+ plane,
+ n4Wx4,
+ n4Hx4,
+ 4 * x,
+ 4 * y,
+ 4,
+ 4,
+ miX,
+ miY,
+ kernel,
+ ref sf,
+ ref preBuf,
+ ref dstBuf,
+ ref mv,
+ ref refFrameBuf,
+ isScaled,
+ refr);
+ }
+ }
+ }
+ }
+ else
+ {
+ Mv mv = mi.Mv[refr];
+ for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ref Buf2D dstBuf = ref pd.Dst;
+ int num4x4W = pd.N4W;
+ int num4x4H = pd.N4H;
+ int n4Wx4 = 4 * num4x4W;
+ int n4Hx4 = 4 * num4x4H;
+ ref Buf2D preBuf = ref pd.Pre[refr];
+ DecBuildInterPredictors(
+ ref xd,
+ plane,
+ n4Wx4,
+ n4Hx4,
+ 0,
+ 0,
+ n4Wx4,
+ n4Hx4,
+ miX,
+ miY,
+ kernel,
+ ref sf,
+ ref preBuf,
+ ref dstBuf,
+ ref mv,
+ ref refFrameBuf,
+ isScaled,
+ refr);
+ }
+ }
+ }
+ }
+
+ private static unsafe void DecResetSkipContext(ref MacroBlockD xd)
+ {
+ int i;
+ for (i = 0; i < Constants.MaxMbPlane; i++)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[i];
+ MemoryUtil.Fill(pd.AboveContext.ToPointer(), (sbyte)0, pd.N4W);
+ MemoryUtil.Fill(pd.LeftContext.ToPointer(), (sbyte)0, pd.N4H);
+ }
+ }
+
+ private static void SetPlaneN4(ref MacroBlockD xd, int bw, int bh, int bwl, int bhl)
+ {
+ int i;
+ for (i = 0; i < Constants.MaxMbPlane; i++)
+ {
+ xd.Plane[i].N4W = (ushort)((bw << 1) >> xd.Plane[i].SubsamplingX);
+ xd.Plane[i].N4H = (ushort)((bh << 1) >> xd.Plane[i].SubsamplingY);
+ xd.Plane[i].N4Wl = (byte)(bwl - xd.Plane[i].SubsamplingX);
+ xd.Plane[i].N4Hl = (byte)(bhl - xd.Plane[i].SubsamplingY);
+ }
+ }
+
+ private static ref ModeInfo SetOffsets(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ BlockSize bsize,
+ int miRow,
+ int miCol,
+ int bw,
+ int bh,
+ int xMis,
+ int yMis,
+ int bwl,
+ int bhl)
+ {
+ int offset = miRow * cm.MiStride + miCol;
+ int x, y;
+ ref TileInfo tile = ref xd.Tile;
+
+ xd.Mi = cm.MiGridVisible.Slice(offset);
+ xd.Mi[0] = new Ptr<ModeInfo>(ref cm.Mi[offset]);
+ xd.Mi[0].Value.SbType = bsize;
+ for (y = 0; y < yMis; ++y)
+ {
+ for (x = y == 0 ? 1 : 0; x < xMis; ++x)
+ {
+ xd.Mi[y * cm.MiStride + x] = xd.Mi[0];
+ }
+ }
+
+ SetPlaneN4(ref xd, bw, bh, bwl, bhl);
+
+ xd.SetSkipContext(miRow, miCol);
+
+ // Distance of Mb to the various image edges. These are specified to 8th pel
+ // as they are always compared to values that are in 1/8th pel units
+ xd.SetMiRowCol(ref tile, miRow, bh, miCol, bw, cm.MiRows, cm.MiCols);
+
+ ReconInter.SetupDstPlanes(ref xd.Plane, ref xd.CurBuf, miRow, miCol);
+ return ref xd.Mi[0].Value;
+ }
+
+ private static void DecodeBlock(
+ ref TileWorkerData twd,
+ ref Vp9Common cm,
+ int miRow,
+ int miCol,
+ BlockSize bsize,
+ int bwl,
+ int bhl)
+ {
+ bool less8x8 = bsize < BlockSize.Block8x8;
+ int bw = 1 << (bwl - 1);
+ int bh = 1 << (bhl - 1);
+ int xMis = Math.Min(bw, cm.MiCols - miCol);
+ int yMis = Math.Min(bh, cm.MiRows - miRow);
+ ref Reader r = ref twd.BitReader;
+ ref MacroBlockD xd = ref twd.Xd;
+
+ ref ModeInfo mi = ref SetOffsets(ref cm, ref xd, bsize, miRow, miCol, bw, bh, xMis, yMis, bwl, bhl);
+
+ if (bsize >= BlockSize.Block8x8 && (cm.SubsamplingX != 0 || cm.SubsamplingY != 0))
+ {
+ BlockSize uvSubsize = Luts.SsSizeLookup[(int)bsize][cm.SubsamplingX][cm.SubsamplingY];
+ if (uvSubsize == BlockSize.BlockInvalid)
+ {
+ xd.ErrorInfo.Value.InternalError(CodecErr.CodecCorruptFrame, "Invalid block size.");
+ }
+ }
+
+ DecodeMv.ReadModeInfo(ref twd, ref cm, miRow, miCol, xMis, yMis);
+
+ if (mi.Skip != 0)
+ {
+ DecResetSkipContext(ref xd);
+ }
+
+ if (!mi.IsInterBlock())
+ {
+ int plane;
+ for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize;
+ int num4x4W = pd.N4W;
+ int num4x4H = pd.N4H;
+ int step = 1 << (int)txSize;
+ int row, col;
+ int maxBlocksWide = num4x4W + (xd.MbToRightEdge >= 0 ? 0 : xd.MbToRightEdge >> (5 + pd.SubsamplingX));
+ int maxBlocksHigh = num4x4H + (xd.MbToBottomEdge >= 0 ? 0 : xd.MbToBottomEdge >> (5 + pd.SubsamplingY));
+
+ xd.MaxBlocksWide = (uint)(xd.MbToRightEdge >= 0 ? 0 : maxBlocksWide);
+ xd.MaxBlocksHigh = (uint)(xd.MbToBottomEdge >= 0 ? 0 : maxBlocksHigh);
+
+ for (row = 0; row < maxBlocksHigh; row += step)
+ {
+ for (col = 0; col < maxBlocksWide; col += step)
+ {
+ PredictAndReconstructIntraBlock(ref twd, ref mi, plane, row, col, txSize);
+ }
+ }
+ }
+ }
+ else
+ {
+ // Prediction
+ DecBuildInterPredictorsSb(ref cm, ref xd, miRow, miCol);
+
+ // Reconstruction
+ if (mi.Skip == 0)
+ {
+ int eobtotal = 0;
+ int plane;
+
+ for (plane = 0; plane < Constants.MaxMbPlane; ++plane)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize;
+ int num4x4W = pd.N4W;
+ int num4x4H = pd.N4H;
+ int step = 1 << (int)txSize;
+ int row, col;
+ int maxBlocksWide = num4x4W + (xd.MbToRightEdge >= 0 ? 0 : xd.MbToRightEdge >> (5 + pd.SubsamplingX));
+ int maxBlocksHigh = num4x4H + (xd.MbToBottomEdge >= 0 ? 0 : xd.MbToBottomEdge >> (5 + pd.SubsamplingY));
+
+ xd.MaxBlocksWide = (uint)(xd.MbToRightEdge >= 0 ? 0 : maxBlocksWide);
+ xd.MaxBlocksHigh = (uint)(xd.MbToBottomEdge >= 0 ? 0 : maxBlocksHigh);
+
+ for (row = 0; row < maxBlocksHigh; row += step)
+ {
+ for (col = 0; col < maxBlocksWide; col += step)
+ {
+ eobtotal += ReconstructInterBlock(ref twd, ref mi, plane, row, col, txSize);
+ }
+ }
+ }
+
+ if (!less8x8 && eobtotal == 0)
+ {
+ mi.Skip = 1; // Skip loopfilter
+ }
+ }
+ }
+
+ xd.Corrupted |= r.HasError();
+
+ if (cm.Lf.FilterLevel != 0)
+ {
+ LoopFilter.BuildMask(ref cm, ref mi, miRow, miCol, bw, bh);
+ }
+ }
+
+ private static int DecPartitionPlaneContext(ref TileWorkerData twd, int miRow, int miCol, int bsl)
+ {
+ ref sbyte aboveCtx = ref twd.Xd.AboveSegContext[miCol];
+ ref sbyte leftCtx = ref twd.Xd.LeftSegContext[miRow & Constants.MiMask];
+ int above = (aboveCtx >> bsl) & 1, left = (leftCtx >> bsl) & 1;
+
+ return (left * 2 + above) + bsl * Constants.PartitionPloffset;
+ }
+
+ private static void DecUpdatePartitionContext(
+ ref TileWorkerData twd,
+ int miRow,
+ int miCol,
+ BlockSize subsize,
+ int bw)
+ {
+ Span<sbyte> aboveCtx = twd.Xd.AboveSegContext.Slice(miCol).ToSpan();
+ Span<sbyte> leftCtx = MemoryMarshal.CreateSpan(ref twd.Xd.LeftSegContext[miRow & Constants.MiMask], 8 - (miRow & Constants.MiMask));
+
+ // Update the partition context at the end notes. Set partition bits
+ // of block sizes larger than the current one to be one, and partition
+ // bits of smaller block sizes to be zero.
+ aboveCtx.Slice(0, bw).Fill(Luts.PartitionContextLookup[(int)subsize].Above);
+ leftCtx.Slice(0, bw).Fill(Luts.PartitionContextLookup[(int)subsize].Left);
+ }
+
+ private static PartitionType ReadPartition(
+ ref TileWorkerData twd,
+ int miRow,
+ int miCol,
+ int hasRows,
+ int hasCols,
+ int bsl)
+ {
+ int ctx = DecPartitionPlaneContext(ref twd, miRow, miCol, bsl);
+ ReadOnlySpan<byte> probs = MemoryMarshal.CreateReadOnlySpan(ref twd.Xd.PartitionProbs[ctx][0], 3);
+ PartitionType p;
+ ref Reader r = ref twd.BitReader;
+
+ if (hasRows != 0 && hasCols != 0)
+ {
+ p = (PartitionType)r.ReadTree(Luts.Vp9PartitionTree, probs);
+ }
+ else if (hasRows == 0 && hasCols != 0)
+ {
+ p = r.Read(probs[1]) != 0 ? PartitionType.PartitionSplit : PartitionType.PartitionHorz;
+ }
+ else if (hasRows != 0 && hasCols == 0)
+ {
+ p = r.Read(probs[2]) != 0 ? PartitionType.PartitionSplit : PartitionType.PartitionVert;
+ }
+ else
+ {
+ p = PartitionType.PartitionSplit;
+ }
+
+ if (!twd.Xd.Counts.IsNull)
+ {
+ ++twd.Xd.Counts.Value.Partition[ctx][(int)p];
+ }
+
+ return p;
+ }
+
+ private static void DecodePartition(
+ ref TileWorkerData twd,
+ ref Vp9Common cm,
+ int miRow,
+ int miCol,
+ BlockSize bsize,
+ int n4x4L2)
+ {
+ int n8x8L2 = n4x4L2 - 1;
+ int num8x8Wh = 1 << n8x8L2;
+ int hbs = num8x8Wh >> 1;
+ PartitionType partition;
+ BlockSize subsize;
+ bool hasRows = (miRow + hbs) < cm.MiRows;
+ bool hasCols = (miCol + hbs) < cm.MiCols;
+ ref MacroBlockD xd = ref twd.Xd;
+
+ if (miRow >= cm.MiRows || miCol >= cm.MiCols)
+ {
+ return;
+ }
+
+ partition = ReadPartition(ref twd, miRow, miCol, hasRows ? 1 : 0, hasCols ? 1 : 0, n8x8L2);
+ subsize = Luts.SubsizeLookup[(int)partition][(int)bsize];
+ if (hbs == 0)
+ {
+ // Calculate bmode block dimensions (log 2)
+ xd.BmodeBlocksWl = (byte)(1 >> ((partition & PartitionType.PartitionVert) != 0 ? 1 : 0));
+ xd.BmodeBlocksHl = (byte)(1 >> ((partition & PartitionType.PartitionHorz) != 0 ? 1 : 0));
+ DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, 1, 1);
+ }
+ else
+ {
+ switch (partition)
+ {
+ case PartitionType.PartitionNone:
+ DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n4x4L2, n4x4L2);
+ break;
+ case PartitionType.PartitionHorz:
+ DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n4x4L2, n8x8L2);
+ if (hasRows)
+ {
+ DecodeBlock(ref twd, ref cm, miRow + hbs, miCol, subsize, n4x4L2, n8x8L2);
+ }
+
+ break;
+ case PartitionType.PartitionVert:
+ DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n8x8L2, n4x4L2);
+ if (hasCols)
+ {
+ DecodeBlock(ref twd, ref cm, miRow, miCol + hbs, subsize, n8x8L2, n4x4L2);
+ }
+
+ break;
+ case PartitionType.PartitionSplit:
+ DecodePartition(ref twd, ref cm, miRow, miCol, subsize, n8x8L2);
+ DecodePartition(ref twd, ref cm, miRow, miCol + hbs, subsize, n8x8L2);
+ DecodePartition(ref twd, ref cm, miRow + hbs, miCol, subsize, n8x8L2);
+ DecodePartition(ref twd, ref cm, miRow + hbs, miCol + hbs, subsize, n8x8L2);
+ break;
+ default: Debug.Assert(false, "Invalid partition type"); break;
+ }
+ }
+
+ // Update partition context
+ if (bsize >= BlockSize.Block8x8 && (bsize == BlockSize.Block8x8 || partition != PartitionType.PartitionSplit))
+ {
+ DecUpdatePartitionContext(ref twd, miRow, miCol, subsize, num8x8Wh);
+ }
+ }
+
+ private static void SetupTokenDecoder(
+ ArrayPtr<byte> data,
+ int readSize,
+ ref InternalErrorInfo errorInfo,
+ ref Reader r)
+ {
+ // Validate the calculated partition length. If the buffer described by the
+ // partition can't be fully read then throw an error.
+ if (!ReadIsValid(data, readSize))
+ {
+ errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile length");
+ }
+
+ if (r.Init(data, readSize))
+ {
+ errorInfo.InternalError(CodecErr.CodecMemError, "Failed to allocate bool decoder 1");
+ }
+ }
+
+ // Reads the next tile returning its size and adjusting '*data' accordingly
+ // based on 'isLast'.
+ private static void GetTileBuffer(
+ bool isLast,
+ ref InternalErrorInfo errorInfo,
+ ref ArrayPtr<byte> data,
+ ref TileBuffer buf)
+ {
+ int size;
+
+ if (!isLast)
+ {
+ if (!ReadIsValid(data, 4))
+ {
+ errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile length");
+ }
+
+ size = BinaryPrimitives.ReadInt32BigEndian(data.ToSpan());
+ data = data.Slice(4);
+
+ if (size > data.Length)
+ {
+ errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile size");
+ }
+ }
+ else
+ {
+ size = data.Length;
+ }
+
+ buf.Data = data;
+ buf.Size = size;
+
+ data = data.Slice(size);
+ }
+
+ private static void GetTileBuffers(
+ ref Vp9Common cm,
+ ArrayPtr<byte> data,
+ int tileCols,
+ int tileRows,
+ ref Array4<Array64<TileBuffer>> tileBuffers)
+ {
+ int r, c;
+
+ for (r = 0; r < tileRows; ++r)
+ {
+ for (c = 0; c < tileCols; ++c)
+ {
+ bool isLast = (r == tileRows - 1) && (c == tileCols - 1);
+ ref TileBuffer buf = ref tileBuffers[r][c];
+ GetTileBuffer(isLast, ref cm.Error, ref data, ref buf);
+ }
+ }
+ }
+
+ public static unsafe ArrayPtr<byte> DecodeTiles(ref Vp9Common cm, ArrayPtr<byte> data)
+ {
+ int alignedCols = TileInfo.MiColsAlignedToSb(cm.MiCols);
+ int tileCols = 1 << cm.Log2TileCols;
+ int tileRows = 1 << cm.Log2TileRows;
+ Array4<Array64<TileBuffer>> tileBuffers = new Array4<Array64<TileBuffer>>();
+ int tileRow, tileCol;
+ int miRow, miCol;
+
+ Debug.Assert(tileRows <= 4);
+ Debug.Assert(tileCols <= (1 << 6));
+
+ // Note: this memset assumes above_context[0], [1] and [2]
+ // are allocated as part of the same buffer.
+ MemoryUtil.Fill(cm.AboveContext.ToPointer(), (sbyte)0, Constants.MaxMbPlane * 2 * alignedCols);
+ MemoryUtil.Fill(cm.AboveSegContext.ToPointer(), (sbyte)0, alignedCols);
+
+ LoopFilter.ResetLfm(ref cm);
+
+ GetTileBuffers(ref cm, data, tileCols, tileRows, ref tileBuffers);
+ // Load all tile information into tile_data.
+ for (tileRow = 0; tileRow < tileRows; ++tileRow)
+ {
+ for (tileCol = 0; tileCol < tileCols; ++tileCol)
+ {
+ ref TileBuffer buf = ref tileBuffers[tileRow][tileCol];
+ ref TileWorkerData tileData = ref cm.TileWorkerData[tileCols * tileRow + tileCol];
+ tileData.Xd = cm.Mb;
+ tileData.Xd.Corrupted = false;
+ tileData.Xd.Counts = cm.FrameParallelDecodingMode ? Ptr<Vp9BackwardUpdates>.Null : cm.Counts;
+ tileData.Dqcoeff = new Array32<Array32<int>>();
+ tileData.Xd.Tile.Init(ref cm, tileRow, tileCol);
+ SetupTokenDecoder(buf.Data, buf.Size, ref cm.Error, ref tileData.BitReader);
+ cm.InitMacroBlockD(ref tileData.Xd, new ArrayPtr<int>(ref tileData.Dqcoeff[0][0], 32 * 32));
+ }
+ }
+
+ for (tileRow = 0; tileRow < tileRows; ++tileRow)
+ {
+ TileInfo tile = new TileInfo();
+ tile.SetRow(ref cm, tileRow);
+ for (miRow = tile.MiRowStart; miRow < tile.MiRowEnd; miRow += Constants.MiBlockSize)
+ {
+ for (tileCol = 0; tileCol < tileCols; ++tileCol)
+ {
+ int col = tileCol;
+ ref TileWorkerData tileData = ref cm.TileWorkerData[tileCols * tileRow + col];
+ tile.SetCol(ref cm, col);
+ tileData.Xd.LeftContext = new Array3<Array16<sbyte>>();
+ tileData.Xd.LeftSegContext = new Array8<sbyte>();
+ for (miCol = tile.MiColStart; miCol < tile.MiColEnd; miCol += Constants.MiBlockSize)
+ {
+ DecodePartition(ref tileData, ref cm, miRow, miCol, BlockSize.Block64x64, 4);
+ }
+ cm.Mb.Corrupted |= tileData.Xd.Corrupted;
+ if (cm.Mb.Corrupted)
+ {
+ cm.Error.InternalError(CodecErr.CodecCorruptFrame, "Failed to decode tile data");
+ };
+ }
+ }
+ }
+
+ // Get last tile data.
+ return cm.TileWorkerData[tileCols * tileRows - 1].BitReader.FindEnd();
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs
new file mode 100644
index 00000000..96cdd574
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs
@@ -0,0 +1,1159 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using Mv = Ryujinx.Graphics.Nvdec.Vp9.Types.Mv;
+using MvRef = Ryujinx.Graphics.Nvdec.Vp9.Types.MvRef;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class DecodeMv
+ {
+ private const int MvrefNeighbours = 8;
+
+ private static PredictionMode ReadIntraMode(ref Reader r, ReadOnlySpan<byte> p)
+ {
+ return (PredictionMode)r.ReadTree(Luts.Vp9IntraModeTree, p);
+ }
+
+ private static PredictionMode ReadIntraModeY(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, int sizeGroup)
+ {
+ PredictionMode yMode = ReadIntraMode(ref r, cm.Fc.Value.YModeProb[sizeGroup].ToSpan());
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.YMode[sizeGroup][(int)yMode];
+ }
+
+ return yMode;
+ }
+
+ private static PredictionMode ReadIntraModeUv(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, byte yMode)
+ {
+ PredictionMode uvMode = ReadIntraMode(ref r, cm.Fc.Value.UvModeProb[yMode].ToSpan());
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.UvMode[yMode][(int)uvMode];
+ }
+
+ return uvMode;
+ }
+
+ private static PredictionMode ReadInterMode(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, int ctx)
+ {
+ int mode = r.ReadTree(Luts.Vp9InterModeTree, cm.Fc.Value.InterModeProb[ctx].ToSpan());
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.InterMode[ctx][mode];
+ }
+
+ return PredictionMode.NearestMv + mode;
+ }
+
+ private static int ReadSegmentId(ref Reader r, ref Array7<byte> segTreeProbs)
+ {
+ return r.ReadTree(Luts.Vp9SegmentTree, segTreeProbs.ToSpan());
+ }
+
+ private static ReadOnlySpan<byte> GetTxProbs(ref Vp9EntropyProbs fc, TxSize maxTxSize, int ctx)
+ {
+ switch (maxTxSize)
+ {
+ case TxSize.Tx8x8: return fc.Tx8x8Prob[ctx].ToSpan();
+ case TxSize.Tx16x16: return fc.Tx16x16Prob[ctx].ToSpan();
+ case TxSize.Tx32x32: return fc.Tx32x32Prob[ctx].ToSpan();
+ default: Debug.Assert(false, "Invalid maxTxSize."); return ReadOnlySpan<byte>.Empty;
+ }
+ }
+
+ private static Span<uint> GetTxCounts(ref Vp9BackwardUpdates counts, TxSize maxTxSize, int ctx)
+ {
+ switch (maxTxSize)
+ {
+ case TxSize.Tx8x8: return counts.Tx8x8[ctx].ToSpan();
+ case TxSize.Tx16x16: return counts.Tx16x16[ctx].ToSpan();
+ case TxSize.Tx32x32: return counts.Tx32x32[ctx].ToSpan();
+ default: Debug.Assert(false, "Invalid maxTxSize."); return Span<uint>.Empty;
+ }
+ }
+
+ private static TxSize ReadSelectedTxSize(ref Vp9Common cm, ref MacroBlockD xd, TxSize maxTxSize, ref Reader r)
+ {
+ int ctx = xd.GetTxSizeContext();
+ ReadOnlySpan<byte> txProbs = GetTxProbs(ref cm.Fc.Value, maxTxSize, ctx);
+ TxSize txSize = (TxSize)r.Read(txProbs[0]);
+ if (txSize != TxSize.Tx4x4 && maxTxSize >= TxSize.Tx16x16)
+ {
+ txSize += r.Read(txProbs[1]);
+ if (txSize != TxSize.Tx8x8 && maxTxSize >= TxSize.Tx32x32)
+ {
+ txSize += r.Read(txProbs[2]);
+ }
+ }
+
+ if (!xd.Counts.IsNull)
+ {
+ ++GetTxCounts(ref xd.Counts.Value, maxTxSize, ctx)[(int)txSize];
+ }
+
+ return txSize;
+ }
+
+ private static TxSize ReadTxSize(ref Vp9Common cm, ref MacroBlockD xd, bool allowSelect, ref Reader r)
+ {
+ TxMode txMode = cm.TxMode;
+ BlockSize bsize = xd.Mi[0].Value.SbType;
+ TxSize maxTxSize = Luts.MaxTxSizeLookup[(int)bsize];
+ if (allowSelect && txMode == TxMode.TxModeSelect && bsize >= BlockSize.Block8x8)
+ {
+ return ReadSelectedTxSize(ref cm, ref xd, maxTxSize, ref r);
+ }
+ else
+ {
+ return (TxSize)Math.Min((int)maxTxSize, (int)Luts.TxModeToBiggestTxSize[(int)txMode]);
+ }
+ }
+
+ private static int DecGetSegmentId(ref Vp9Common cm, ArrayPtr<byte> segmentIds, int miOffset, int xMis, int yMis)
+ {
+ int x, y, segmentId = int.MaxValue;
+
+ for (y = 0; y < yMis; y++)
+ {
+ for (x = 0; x < xMis; x++)
+ {
+ segmentId = Math.Min(segmentId, segmentIds[miOffset + y * cm.MiCols + x]);
+ }
+ }
+
+ Debug.Assert(segmentId >= 0 && segmentId < Constants.MaxSegments);
+ return segmentId;
+ }
+
+ private static void SetSegmentId(ref Vp9Common cm, int miOffset, int xMis, int yMis, int segmentId)
+ {
+ int x, y;
+
+ Debug.Assert(segmentId >= 0 && segmentId < Constants.MaxSegments);
+
+ for (y = 0; y < yMis; y++)
+ {
+ for (x = 0; x < xMis; x++)
+ {
+ cm.CurrentFrameSegMap[miOffset + y * cm.MiCols + x] = (byte)segmentId;
+ }
+ }
+ }
+
+ private static void CopySegmentId(
+ ref Vp9Common cm,
+ ArrayPtr<byte> lastSegmentIds,
+ ArrayPtr<byte> currentSegmentIds,
+ int miOffset,
+ int xMis,
+ int yMis)
+ {
+ int x, y;
+
+ for (y = 0; y < yMis; y++)
+ {
+ for (x = 0; x < xMis; x++)
+ {
+ currentSegmentIds[miOffset + y * cm.MiCols + x] = (byte)(!lastSegmentIds.IsNull ? lastSegmentIds[miOffset + y * cm.MiCols + x] : 0);
+ }
+ }
+ }
+
+ private static int ReadIntraSegmentId(ref Vp9Common cm, int miOffset, int xMis, int yMis, ref Reader r)
+ {
+ ref Segmentation seg = ref cm.Seg;
+ int segmentId;
+
+ if (!seg.Enabled)
+ {
+ return 0; // Default for disabled segmentation
+ }
+
+ if (!seg.UpdateMap)
+ {
+ CopySegmentId(ref cm, cm.LastFrameSegMap, cm.CurrentFrameSegMap, miOffset, xMis, yMis);
+ return 0;
+ }
+
+ segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
+ SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId);
+ return segmentId;
+ }
+
+ private static int ReadInterSegmentId(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ int miRow,
+ int miCol,
+ ref Reader r,
+ int xMis,
+ int yMis)
+ {
+ ref Segmentation seg = ref cm.Seg;
+ ref ModeInfo mi = ref xd.Mi[0].Value;
+ int predictedSegmentId, segmentId;
+ int miOffset = miRow * cm.MiCols + miCol;
+
+ if (!seg.Enabled)
+ {
+ return 0; // Default for disabled segmentation
+ }
+
+ predictedSegmentId = !cm.LastFrameSegMap.IsNull
+ ? DecGetSegmentId(ref cm, cm.LastFrameSegMap, miOffset, xMis, yMis)
+ : 0;
+
+ if (!seg.UpdateMap)
+ {
+ CopySegmentId(ref cm, cm.LastFrameSegMap, cm.CurrentFrameSegMap, miOffset, xMis, yMis);
+ return predictedSegmentId;
+ }
+
+ if (seg.TemporalUpdate)
+ {
+ byte predProb = Segmentation.GetPredProbSegId(ref cm.Fc.Value.SegPredProb, ref xd);
+ mi.SegIdPredicted = (sbyte)r.Read(predProb);
+ segmentId = mi.SegIdPredicted != 0 ? predictedSegmentId : ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
+ }
+ else
+ {
+ segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb);
+ }
+ SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId);
+ return segmentId;
+ }
+
+ private static int ReadSkip(ref Vp9Common cm, ref MacroBlockD xd, int segmentId, ref Reader r)
+ {
+ if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlSkip) != 0)
+ {
+ return 1;
+ }
+ else
+ {
+ int ctx = xd.GetSkipContext();
+ int skip = r.Read(cm.Fc.Value.SkipProb[ctx]);
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.Skip[ctx][skip];
+ }
+
+ return skip;
+ }
+ }
+
+ private static int ReadMvComponent(ref Reader r, ref Vp9EntropyProbs fc, int mvcomp, bool usehp)
+ {
+ int mag, d, fr, hp;
+ bool sign = r.Read(fc.Sign[mvcomp]) != 0;
+ MvClassType mvClass = (MvClassType)r.ReadTree(Luts.Vp9MvClassTree, fc.Classes[mvcomp].ToSpan());
+ bool class0 = mvClass == MvClassType.MvClass0;
+
+ // Integer part
+ if (class0)
+ {
+ d = r.Read(fc.Class0[mvcomp][0]);
+ mag = 0;
+ }
+ else
+ {
+ int i;
+ int n = (int)mvClass + Constants.Class0Bits - 1; // Number of bits
+
+ d = 0;
+ for (i = 0; i < n; ++i)
+ {
+ d |= r.Read(fc.Bits[mvcomp][i]) << i;
+ }
+
+ mag = Constants.Class0Size << ((int)mvClass + 2);
+ }
+
+ // Fractional part
+ fr = r.ReadTree(Luts.Vp9MvFPTree, class0 ? fc.Class0Fp[mvcomp][d].ToSpan() : fc.Fp[mvcomp].ToSpan());
+
+ // High precision part (if hp is not used, the default value of the hp is 1)
+ hp = usehp ? r.Read(class0 ? fc.Class0Hp[mvcomp] : fc.Hp[mvcomp]) : 1;
+
+ // Result
+ mag += ((d << 3) | (fr << 1) | hp) + 1;
+ return sign ? -mag : mag;
+ }
+
+ private static void ReadMv(
+ ref Reader r,
+ ref Mv mv,
+ ref Mv refr,
+ ref Vp9EntropyProbs fc,
+ Ptr<Vp9BackwardUpdates> counts,
+ bool allowHP)
+ {
+ MvJointType jointType = (MvJointType)r.ReadTree(Luts.Vp9MvJointTree, fc.Joints.ToSpan());
+ bool useHP = allowHP && refr.UseMvHp();
+ Mv diff = new Mv();
+
+ if (Mv.MvJointVertical(jointType))
+ {
+ diff.Row = (short)ReadMvComponent(ref r, ref fc, 0, useHP);
+ }
+
+ if (Mv.MvJointHorizontal(jointType))
+ {
+ diff.Col = (short)ReadMvComponent(ref r, ref fc, 1, useHP);
+ }
+
+ diff.IncMv(counts);
+
+ mv.Row = (short)(refr.Row + diff.Row);
+ mv.Col = (short)(refr.Col + diff.Col);
+ }
+
+ private static ReferenceMode ReadBlockReferenceMode(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r)
+ {
+ if (cm.ReferenceMode == ReferenceMode.ReferenceModeSelect)
+ {
+ int ctx = PredCommon.GetReferenceModeContext(ref cm, ref xd);
+ ReferenceMode mode = (ReferenceMode)r.Read(cm.Fc.Value.CompInterProb[ctx]);
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.CompInter[ctx][(int)mode];
+ }
+
+ return mode; // SingleReference or CompoundReference
+ }
+ else
+ {
+ return cm.ReferenceMode;
+ }
+ }
+
+ // Read the referncence frame
+ private static void ReadRefFrames(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ ref Reader r,
+ int segmentId,
+ ref Array2<sbyte> refFrame)
+ {
+ ref Vp9EntropyProbs fc = ref cm.Fc.Value;
+
+ if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlRefFrame) != 0)
+ {
+ refFrame[0] = (sbyte)cm.Seg.GetSegData(segmentId, SegLvlFeatures.SegLvlRefFrame);
+ refFrame[1] = Constants.None;
+ }
+ else
+ {
+ ReferenceMode mode = ReadBlockReferenceMode(ref cm, ref xd, ref r);
+ if (mode == ReferenceMode.CompoundReference)
+ {
+ int idx = cm.RefFrameSignBias[cm.CompFixedRef];
+ int ctx = PredCommon.GetPredContextCompRefP(ref cm, ref xd);
+ int bit = r.Read(fc.CompRefProb[ctx]);
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.CompRef[ctx][bit];
+ }
+
+ refFrame[idx] = cm.CompFixedRef;
+ refFrame[idx == 0 ? 1 : 0] = cm.CompVarRef[bit];
+ }
+ else if (mode == ReferenceMode.SingleReference)
+ {
+ int ctx0 = PredCommon.GetPredContextSingleRefP1(ref xd);
+ int bit0 = r.Read(fc.SingleRefProb[ctx0][0]);
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.SingleRef[ctx0][0][bit0];
+ }
+
+ if (bit0 != 0)
+ {
+ int ctx1 = PredCommon.GetPredContextSingleRefP2(ref xd);
+ int bit1 = r.Read(fc.SingleRefProb[ctx1][1]);
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.SingleRef[ctx1][1][bit1];
+ }
+
+ refFrame[0] = (sbyte)(bit1 != 0 ? Constants.AltRefFrame : Constants.GoldenFrame);
+ }
+ else
+ {
+ refFrame[0] = Constants.LastFrame;
+ }
+
+ refFrame[1] = Constants.None;
+ }
+ else
+ {
+ Debug.Assert(false, "Invalid prediction mode.");
+ }
+ }
+ }
+
+ private static byte ReadSwitchableInterpFilter(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r)
+ {
+ int ctx = xd.GetPredContextSwitchableInterp();
+ byte type = (byte)r.ReadTree(Luts.Vp9SwitchableInterpTree, cm.Fc.Value.SwitchableInterpProb[ctx].ToSpan());
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.SwitchableInterp[ctx][type];
+ }
+
+ return type;
+ }
+
+ private static void ReadIntraBlockModeInfo(ref Vp9Common cm, ref MacroBlockD xd, ref ModeInfo mi, ref Reader r)
+ {
+ BlockSize bsize = mi.SbType;
+ int i;
+
+ switch (bsize)
+ {
+ case BlockSize.Block4x4:
+ for (i = 0; i < 4; ++i)
+ {
+ mi.Bmi[i].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
+ }
+
+ mi.Mode = mi.Bmi[3].Mode;
+ break;
+ case BlockSize.Block4x8:
+ mi.Bmi[0].Mode = mi.Bmi[2].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
+ mi.Bmi[1].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
+ break;
+ case BlockSize.Block8x4:
+ mi.Bmi[0].Mode = mi.Bmi[1].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
+ mi.Bmi[2].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0);
+ break;
+ default: mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, Luts.SizeGroupLookup[(int)bsize]); break;
+ }
+
+ mi.UvMode = ReadIntraModeUv(ref cm, ref xd, ref r, (byte)mi.Mode);
+
+ // Initialize interp_filter here so we do not have to check for inter block
+ // modes in GetPredContextSwitchableInterp()
+ mi.InterpFilter = Constants.SwitchableFilters;
+
+ mi.RefFrame[0] = Constants.IntraFrame;
+ mi.RefFrame[1] = Constants.None;
+ }
+
+ private static bool IsMvValid(ref Mv mv)
+ {
+ return mv.Row > Constants.MvLow &&
+ mv.Row < Constants.MvUpp &&
+ mv.Col > Constants.MvLow &&
+ mv.Col < Constants.MvUpp;
+ }
+
+ private static void CopyMvPair(ref Array2<Mv> dst, ref Array2<Mv> src)
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ }
+
+ private static void ZeroMvPair(ref Array2<Mv> dst)
+ {
+ dst[0] = new Mv();
+ dst[1] = new Mv();
+ }
+
+ private static bool AssignMv(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ PredictionMode mode,
+ ref Array2<Mv> mv,
+ ref Array2<Mv> refMv,
+ ref Array2<Mv> nearNearestMv,
+ int isCompound,
+ bool allowHP,
+ ref Reader r)
+ {
+ int i;
+ bool ret = true;
+
+ switch (mode)
+ {
+ case PredictionMode.NewMv:
+ {
+ for (i = 0; i < 1 + isCompound; ++i)
+ {
+ ReadMv(ref r, ref mv[i], ref refMv[i], ref cm.Fc.Value, xd.Counts, allowHP);
+ ret = ret && IsMvValid(ref mv[i]);
+ }
+ break;
+ }
+ case PredictionMode.NearMv:
+ case PredictionMode.NearestMv:
+ {
+ CopyMvPair(ref mv, ref nearNearestMv);
+ break;
+ }
+ case PredictionMode.ZeroMv:
+ {
+ ZeroMvPair(ref mv);
+ break;
+ }
+ default: return false;
+ }
+ return ret;
+ }
+
+ private static bool ReadIsInterBlock(ref Vp9Common cm, ref MacroBlockD xd, int segmentId, ref Reader r)
+ {
+ if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlRefFrame) != 0)
+ {
+ return cm.Seg.GetSegData(segmentId, SegLvlFeatures.SegLvlRefFrame) != Constants.IntraFrame;
+ }
+ else
+ {
+ int ctx = xd.GetIntraInterContext();
+ bool isInter = r.Read(cm.Fc.Value.IntraInterProb[ctx]) != 0;
+ if (!xd.Counts.IsNull)
+ {
+ ++xd.Counts.Value.IntraInter[ctx][isInter ? 1 : 0];
+ }
+
+ return isInter;
+ }
+ }
+
+ private static void DecFindBestRefMvs(bool allowHP, Span<Mv> mvlist, ref Mv bestMv, int refmvCount)
+ {
+ int i;
+
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < refmvCount; ++i)
+ {
+ mvlist[i].LowerMvPrecision(allowHP);
+ bestMv = mvlist[i];
+ }
+ }
+
+ private static bool AddMvRefListEb(Mv mv, ref int refMvCount, Span<Mv> mvRefList, bool earlyBreak)
+ {
+ if (refMvCount != 0)
+ {
+ if (Unsafe.As<Mv, int>(ref mv) != Unsafe.As<Mv, int>(ref mvRefList[0]))
+ {
+ mvRefList[refMvCount] = mv;
+ refMvCount++;
+ return true;
+ }
+ }
+ else
+ {
+ mvRefList[refMvCount++] = mv;
+ if (earlyBreak)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ // Performs mv sign inversion if indicated by the reference frame combination.
+ private static Mv ScaleMv(ref ModeInfo mi, int refr, sbyte thisRefFrame, ref Array4<sbyte> refSignBias)
+ {
+ Mv mv = mi.Mv[refr];
+ if (refSignBias[mi.RefFrame[refr]] != refSignBias[thisRefFrame])
+ {
+ mv.Row *= -1;
+ mv.Col *= -1;
+ }
+ return mv;
+ }
+
+ private static bool IsDiffRefFrameAddMvEb(
+ ref ModeInfo mbmi,
+ sbyte refFrame,
+ ref Array4<sbyte> refSignBias,
+ ref int refmvCount,
+ Span<Mv> mvRefList,
+ bool earlyBreak)
+ {
+ if (mbmi.IsInterBlock())
+ {
+ if (mbmi.RefFrame[0] != refFrame)
+ {
+ if (AddMvRefListEb(ScaleMv(ref mbmi, 0, refFrame, ref refSignBias), ref refmvCount, mvRefList, earlyBreak))
+ {
+ return true;
+ }
+ }
+ if (mbmi.HasSecondRef() && mbmi.RefFrame[1] != refFrame && Unsafe.As<Mv, int>(ref mbmi.Mv[1]) != Unsafe.As<Mv, int>(ref mbmi.Mv[0]))
+ {
+ if (AddMvRefListEb(ScaleMv(ref mbmi, 1, refFrame, ref refSignBias), ref refmvCount, mvRefList, earlyBreak))
+ {
+ return true;
+ }
+ }
+
+ }
+ return false;
+ }
+
+ // This function searches the neighborhood of a given MB/SB
+ // to try and find candidate reference vectors.
+ private static unsafe int DecFindMvRefs(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ PredictionMode mode,
+ sbyte refFrame,
+ Span<Position> mvRefSearch,
+ Span<Mv> mvRefList,
+ int miRow,
+ int miCol,
+ int block,
+ int isSub8X8)
+ {
+ ref Array4<sbyte> refSignBias = ref cm.RefFrameSignBias;
+ int i, refmvCount = 0;
+ bool differentRefFound = false;
+ Ptr<MvRef> prevFrameMvs = cm.UsePrevFrameMvs ? new Ptr<MvRef>(ref cm.PrevFrameMvs[miRow * cm.MiCols + miCol]) : Ptr<MvRef>.Null;
+ ref TileInfo tile = ref xd.Tile;
+ // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop
+ // searching after the first mv is found.
+ bool earlyBreak = mode != PredictionMode.NearMv;
+
+ // Blank the reference vector list
+ mvRefList.Slice(0, Constants.MaxMvRefCandidates).Fill(new Mv());
+
+ i = 0;
+ if (isSub8X8 != 0)
+ {
+ // If the size < 8x8 we get the mv from the bmi substructure for the
+ // nearest two blocks.
+ for (i = 0; i < 2; ++i)
+ {
+ ref Position mvRef = ref mvRefSearch[i];
+ if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef))
+ {
+ ref ModeInfo candidateMi = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value;
+ differentRefFound = true;
+
+ if (candidateMi.RefFrame[0] == refFrame)
+ {
+ if (AddMvRefListEb(candidateMi.GetSubBlockMv(0, mvRef.Col, block), ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ else if (candidateMi.RefFrame[1] == refFrame)
+ {
+ if (AddMvRefListEb(candidateMi.GetSubBlockMv(1, mvRef.Col, block), ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ }
+ }
+ }
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (; i < MvrefNeighbours; ++i)
+ {
+ ref Position mvRef = ref mvRefSearch[i];
+ if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef))
+ {
+ ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value;
+ differentRefFound = true;
+
+ if (candidate.RefFrame[0] == refFrame)
+ {
+ if (AddMvRefListEb(candidate.Mv[0], ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ else if (candidate.RefFrame[1] == refFrame)
+ {
+ if (AddMvRefListEb(candidate.Mv[1], ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ }
+ }
+
+ // Check the last frame's mode and mv info.
+ if (!prevFrameMvs.IsNull)
+ {
+ if (prevFrameMvs.Value.RefFrame[0] == refFrame)
+ {
+ if (AddMvRefListEb(prevFrameMvs.Value.Mv[0], ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ else if (prevFrameMvs.Value.RefFrame[1] == refFrame)
+ {
+ if (AddMvRefListEb(prevFrameMvs.Value.Mv[1], ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (differentRefFound)
+ {
+ for (i = 0; i < MvrefNeighbours; ++i)
+ {
+ ref Position mvRef = ref mvRefSearch[i];
+ if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef))
+ {
+ ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value;
+
+ // If the candidate is Intra we don't want to consider its mv.
+ if (IsDiffRefFrameAddMvEb(ref candidate, refFrame, ref refSignBias, ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ }
+ }
+
+ // Since we still don't have a candidate we'll try the last frame.
+ if (!prevFrameMvs.IsNull)
+ {
+ if (prevFrameMvs.Value.RefFrame[0] != refFrame && prevFrameMvs.Value.RefFrame[0] > Constants.IntraFrame)
+ {
+ Mv mv = prevFrameMvs.Value.Mv[0];
+ if (refSignBias[prevFrameMvs.Value.RefFrame[0]] != refSignBias[refFrame])
+ {
+ mv.Row *= -1;
+ mv.Col *= -1;
+ }
+ if (AddMvRefListEb(mv, ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+
+ if (prevFrameMvs.Value.RefFrame[1] > Constants.IntraFrame &&
+ prevFrameMvs.Value.RefFrame[1] != refFrame &&
+ Unsafe.As<Mv, int>(ref prevFrameMvs.Value.Mv[1]) != Unsafe.As<Mv, int>(ref prevFrameMvs.Value.Mv[0]))
+ {
+ Mv mv = prevFrameMvs.Value.Mv[1];
+ if (refSignBias[prevFrameMvs.Value.RefFrame[1]] != refSignBias[refFrame])
+ {
+ mv.Row *= -1;
+ mv.Col *= -1;
+ }
+ if (AddMvRefListEb(mv, ref refmvCount, mvRefList, earlyBreak))
+ {
+ goto Done;
+ }
+ }
+ }
+
+ if (mode == PredictionMode.NearMv)
+ {
+ refmvCount = Constants.MaxMvRefCandidates;
+ }
+ else
+ {
+ // We only care about the nearestmv for the remaining modes
+ refmvCount = 1;
+ }
+
+ Done:
+ // Clamp vectors
+ for (i = 0; i < refmvCount; ++i)
+ {
+ mvRefList[i].ClampMvRef(ref xd);
+ }
+
+ return refmvCount;
+ }
+
+ private static void AppendSub8x8MvsForIdx(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ Span<Position> mvRefSearch,
+ PredictionMode bMode,
+ int block,
+ int refr,
+ int miRow,
+ int miCol,
+ ref Mv bestSub8x8)
+ {
+ Span<Mv> mvList = stackalloc Mv[Constants.MaxMvRefCandidates];
+ ref ModeInfo mi = ref xd.Mi[0].Value;
+ ref Array4<BModeInfo> bmi = ref mi.Bmi;
+ int n;
+ int refmvCount;
+
+ Debug.Assert(Constants.MaxMvRefCandidates == 2);
+
+ refmvCount = DecFindMvRefs(ref cm, ref xd, bMode, mi.RefFrame[refr], mvRefSearch, mvList, miRow, miCol, block, 1);
+
+ switch (block)
+ {
+ case 0: bestSub8x8 = mvList[refmvCount - 1]; break;
+ case 1:
+ case 2:
+ if (bMode == PredictionMode.NearestMv)
+ {
+ bestSub8x8 = bmi[0].Mv[refr];
+ }
+ else
+ {
+ bestSub8x8 = new Mv();
+ for (n = 0; n < refmvCount; ++n)
+ {
+ if (Unsafe.As<Mv, int>(ref bmi[0].Mv[refr]) != Unsafe.As<Mv, int>(ref mvList[n]))
+ {
+ bestSub8x8 = mvList[n];
+ break;
+ }
+ }
+ }
+ break;
+ case 3:
+ if (bMode == PredictionMode.NearestMv)
+ {
+ bestSub8x8 = bmi[2].Mv[refr];
+ }
+ else
+ {
+ Span<Mv> candidates = stackalloc Mv[2 + Constants.MaxMvRefCandidates];
+ candidates[0] = bmi[1].Mv[refr];
+ candidates[1] = bmi[0].Mv[refr];
+ candidates[2] = mvList[0];
+ candidates[3] = mvList[1];
+ bestSub8x8 = new Mv();
+ for (n = 0; n < 2 + Constants.MaxMvRefCandidates; ++n)
+ {
+ if (Unsafe.As<Mv, int>(ref bmi[2].Mv[refr]) != Unsafe.As<Mv, int>(ref candidates[n]))
+ {
+ bestSub8x8 = candidates[n];
+ break;
+ }
+ }
+ }
+ break;
+ default: Debug.Assert(false, "Invalid block index."); break;
+ }
+ }
+
+ private static byte GetModeContext(ref Vp9Common cm, ref MacroBlockD xd, Span<Position> mvRefSearch, int miRow, int miCol)
+ {
+ int i;
+ int contextCounter = 0;
+ ref TileInfo tile = ref xd.Tile;
+
+ // Get mode count from nearest 2 blocks
+ for (i = 0; i < 2; ++i)
+ {
+ ref Position mvRef = ref mvRefSearch[i];
+ if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef))
+ {
+ ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value;
+ // Keep counts for entropy encoding.
+ contextCounter += Luts.Mode2Counter[(int)candidate.Mode];
+ }
+ }
+
+ return (byte)Luts.CounterToContext[contextCounter];
+ }
+
+ private static void ReadInterBlockModeInfo(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ ref ModeInfo mi,
+ int miRow,
+ int miCol,
+ ref Reader r)
+ {
+ BlockSize bsize = mi.SbType;
+ bool allowHP = cm.AllowHighPrecisionMv;
+ Array2<Mv> bestRefMvs = new Array2<Mv>();
+ int refr, isCompound;
+ byte interModeCtx;
+ Span<Position> mvRefSearch = Luts.MvRefBlocks[(int)bsize];
+
+ ReadRefFrames(ref cm, ref xd, ref r, mi.SegmentId, ref mi.RefFrame);
+ isCompound = mi.HasSecondRef() ? 1 : 0;
+ interModeCtx = GetModeContext(ref cm, ref xd, mvRefSearch, miRow, miCol);
+
+ if (cm.Seg.IsSegFeatureActive(mi.SegmentId, SegLvlFeatures.SegLvlSkip) != 0)
+ {
+ mi.Mode = PredictionMode.ZeroMv;
+ if (bsize < BlockSize.Block8x8)
+ {
+ xd.ErrorInfo.Value.InternalError(CodecErr.CodecUnsupBitstream, "Invalid usage of segement feature on small blocks");
+ return;
+ }
+ }
+ else
+ {
+ if (bsize >= BlockSize.Block8x8)
+ {
+ mi.Mode = ReadInterMode(ref cm, ref xd, ref r, interModeCtx);
+ }
+ else
+ {
+ // Sub 8x8 blocks use the nearestmv as a ref_mv if the bMode is NewMv.
+ // Setting mode to NearestMv forces the search to stop after the nearestmv
+ // has been found. After bModes have been read, mode will be overwritten
+ // by the last bMode.
+ mi.Mode = PredictionMode.NearestMv;
+ }
+
+ if (mi.Mode != PredictionMode.ZeroMv)
+ {
+ for (refr = 0; refr < 1 + isCompound; ++refr)
+ {
+ Span<Mv> tmpMvs = stackalloc Mv[Constants.MaxMvRefCandidates];
+ sbyte frame = mi.RefFrame[refr];
+ int refmvCount;
+
+ refmvCount = DecFindMvRefs(ref cm, ref xd, mi.Mode, frame, mvRefSearch, tmpMvs, miRow, miCol, -1, 0);
+
+ DecFindBestRefMvs(allowHP, tmpMvs, ref bestRefMvs[refr], refmvCount);
+ }
+ }
+ }
+
+ mi.InterpFilter = (cm.InterpFilter == Constants.Switchable) ? ReadSwitchableInterpFilter(ref cm, ref xd, ref r) : cm.InterpFilter;
+
+ if (bsize < BlockSize.Block8x8)
+ {
+ int num4X4W = 1 << xd.BmodeBlocksWl;
+ int num4X4H = 1 << xd.BmodeBlocksHl;
+ int idx, idy;
+ PredictionMode bMode = 0;
+ Array2<Mv> bestSub8x8 = new Array2<Mv>();
+ const uint invalidMv = 0x80008000;
+ // Initialize the 2nd element as even though it won't be used meaningfully
+ // if isCompound is false.
+ Unsafe.As<Mv, uint>(ref bestSub8x8[1]) = invalidMv;
+ for (idy = 0; idy < 2; idy += num4X4H)
+ {
+ for (idx = 0; idx < 2; idx += num4X4W)
+ {
+ int j = idy * 2 + idx;
+ bMode = ReadInterMode(ref cm, ref xd, ref r, interModeCtx);
+
+ if (bMode == PredictionMode.NearestMv || bMode == PredictionMode.NearMv)
+ {
+ for (refr = 0; refr < 1 + isCompound; ++refr)
+ {
+ AppendSub8x8MvsForIdx(ref cm, ref xd, mvRefSearch, bMode, j, refr, miRow, miCol, ref bestSub8x8[refr]);
+ }
+ }
+
+ if (!AssignMv(ref cm, ref xd, bMode, ref mi.Bmi[j].Mv, ref bestRefMvs, ref bestSub8x8, isCompound, allowHP, ref r))
+ {
+ xd.Corrupted |= true;
+ break;
+ }
+
+ if (num4X4H == 2)
+ {
+ mi.Bmi[j + 2] = mi.Bmi[j];
+ }
+
+ if (num4X4W == 2)
+ {
+ mi.Bmi[j + 1] = mi.Bmi[j];
+ }
+ }
+ }
+
+ mi.Mode = bMode;
+
+ CopyMvPair(ref mi.Mv, ref mi.Bmi[3].Mv);
+ }
+ else
+ {
+ xd.Corrupted |= !AssignMv(ref cm, ref xd, mi.Mode, ref mi.Mv, ref bestRefMvs, ref bestRefMvs, isCompound, allowHP, ref r);
+ }
+ }
+
+ private static void ReadInterFrameModeInfo(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ int miRow,
+ int miCol,
+ ref Reader r,
+ int xMis,
+ int yMis)
+ {
+ ref ModeInfo mi = ref xd.Mi[0].Value;
+ bool interBlock;
+
+ mi.SegmentId = (sbyte)ReadInterSegmentId(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis);
+ mi.Skip = (sbyte)ReadSkip(ref cm, ref xd, mi.SegmentId, ref r);
+ interBlock = ReadIsInterBlock(ref cm, ref xd, mi.SegmentId, ref r);
+ mi.TxSize = ReadTxSize(ref cm, ref xd, mi.Skip == 0 || !interBlock, ref r);
+
+ if (interBlock)
+ {
+ ReadInterBlockModeInfo(ref cm, ref xd, ref mi, miRow, miCol, ref r);
+ }
+ else
+ {
+ ReadIntraBlockModeInfo(ref cm, ref xd, ref mi, ref r);
+ }
+ }
+
+ private static PredictionMode LeftBlockMode(Ptr<ModeInfo> curMi, Ptr<ModeInfo> leftMi, int b)
+ {
+ if (b == 0 || b == 2)
+ {
+ if (leftMi.IsNull || leftMi.Value.IsInterBlock())
+ {
+ return PredictionMode.DcPred;
+ }
+
+ return leftMi.Value.GetYMode(b + 1);
+ }
+ else
+ {
+ Debug.Assert(b == 1 || b == 3);
+ return curMi.Value.Bmi[b - 1].Mode;
+ }
+ }
+
+ private static PredictionMode AboveBlockMode(Ptr<ModeInfo> curMi, Ptr<ModeInfo> aboveMi, int b)
+ {
+ if (b == 0 || b == 1)
+ {
+ if (aboveMi.IsNull || aboveMi.Value.IsInterBlock())
+ {
+ return PredictionMode.DcPred;
+ }
+
+ return aboveMi.Value.GetYMode(b + 2);
+ }
+ else
+ {
+ Debug.Assert(b == 2 || b == 3);
+ return curMi.Value.Bmi[b - 2].Mode;
+ }
+ }
+
+ private static ReadOnlySpan<byte> GetYModeProbs(
+ ref Vp9EntropyProbs fc,
+ Ptr<ModeInfo> mi,
+ Ptr<ModeInfo> aboveMi,
+ Ptr<ModeInfo> leftMi,
+ int block)
+ {
+ PredictionMode above = AboveBlockMode(mi, aboveMi, block);
+ PredictionMode left = LeftBlockMode(mi, leftMi, block);
+ return fc.KfYModeProb[(int)above][(int)left].ToSpan();
+ }
+
+ private static void ReadIntraFrameModeInfo(
+ ref Vp9Common cm,
+ ref MacroBlockD xd,
+ int miRow,
+ int miCol,
+ ref Reader r,
+ int xMis,
+ int yMis)
+ {
+ Ptr<ModeInfo> mi = xd.Mi[0];
+ Ptr<ModeInfo> aboveMi = xd.AboveMi;
+ Ptr<ModeInfo> leftMi = xd.LeftMi;
+ BlockSize bsize = mi.Value.SbType;
+ int i;
+ int miOffset = miRow * cm.MiCols + miCol;
+
+ mi.Value.SegmentId = (sbyte)ReadIntraSegmentId(ref cm, miOffset, xMis, yMis, ref r);
+ mi.Value.Skip = (sbyte)ReadSkip(ref cm, ref xd, mi.Value.SegmentId, ref r);
+ mi.Value.TxSize = ReadTxSize(ref cm, ref xd, true, ref r);
+ mi.Value.RefFrame[0] = Constants.IntraFrame;
+ mi.Value.RefFrame[1] = Constants.None;
+
+ switch (bsize)
+ {
+ case BlockSize.Block4x4:
+ for (i = 0; i < 4; ++i)
+ {
+ mi.Value.Bmi[i].Mode =
+ ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, i));
+ }
+
+ mi.Value.Mode = mi.Value.Bmi[3].Mode;
+ break;
+ case BlockSize.Block4x8:
+ mi.Value.Bmi[0].Mode = mi.Value.Bmi[2].Mode =
+ ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0));
+ mi.Value.Bmi[1].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode =
+ ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 1));
+ break;
+ case BlockSize.Block8x4:
+ mi.Value.Bmi[0].Mode = mi.Value.Bmi[1].Mode =
+ ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0));
+ mi.Value.Bmi[2].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode =
+ ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 2));
+ break;
+ default:
+ mi.Value.Mode = ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0));
+ break;
+ }
+
+ mi.Value.UvMode = ReadIntraMode(ref r, cm.Fc.Value.KfUvModeProb[(int)mi.Value.Mode].ToSpan());
+ }
+
+ private static void CopyRefFramePair(ref Array2<sbyte> dst, ref Array2<sbyte> src)
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ }
+
+ public static void ReadModeInfo(
+ ref TileWorkerData twd,
+ ref Vp9Common cm,
+ int miRow,
+ int miCol,
+ int xMis,
+ int yMis)
+ {
+ ref Reader r = ref twd.BitReader;
+ ref MacroBlockD xd = ref twd.Xd;
+ ref ModeInfo mi = ref xd.Mi[0].Value;
+ ArrayPtr<MvRef> frameMvs = cm.CurFrameMvs.Slice(miRow * cm.MiCols + miCol);
+ int w, h;
+
+ if (cm.FrameIsIntraOnly())
+ {
+ ReadIntraFrameModeInfo(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis);
+ }
+ else
+ {
+ ReadInterFrameModeInfo(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis);
+
+ for (h = 0; h < yMis; ++h)
+ {
+ for (w = 0; w < xMis; ++w)
+ {
+ ref MvRef mv = ref frameMvs[w];
+ CopyRefFramePair(ref mv.RefFrame, ref mi.RefFrame);
+ CopyMvPair(ref mv.Mv, ref mi.Mv);
+ }
+ frameMvs = frameMvs.Slice(cm.MiCols);
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs b/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs
new file mode 100644
index 00000000..df3199cf
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs
@@ -0,0 +1,164 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+using System;
+using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ public class Decoder : IVp9Decoder
+ {
+ public bool IsHardwareAccelerated => false;
+
+ private readonly MemoryAllocator _allocator = new MemoryAllocator();
+
+ public ISurface CreateSurface(int width, int height) => new Surface(width, height);
+
+ private static readonly byte[] LiteralToFilter = new byte[]
+ {
+ Constants.EightTapSmooth,
+ Constants.EightTap,
+ Constants.EightTapSharp,
+ Constants.Bilinear
+ };
+
+ public unsafe bool Decode(
+ ref Vp9PictureInfo pictureInfo,
+ ISurface output,
+ ReadOnlySpan<byte> bitstream,
+ ReadOnlySpan<Vp9MvRef> mvsIn,
+ Span<Vp9MvRef> mvsOut)
+ {
+ Vp9Common cm = new Vp9Common();
+
+ cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame;
+ cm.IntraOnly = pictureInfo.IntraOnly;
+
+ cm.Width = output.Width;
+ cm.Height = output.Height;
+
+ cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs;
+
+ cm.RefFrameSignBias = pictureInfo.RefFrameSignBias;
+
+ cm.BaseQindex = pictureInfo.BaseQIndex;
+ cm.YDcDeltaQ = pictureInfo.YDcDeltaQ;
+ cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ;
+ cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ;
+
+ cm.Mb.Lossless = pictureInfo.Lossless;
+
+ cm.TxMode = (TxMode)pictureInfo.TransformMode;
+
+ cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv;
+
+ cm.InterpFilter = (byte)pictureInfo.InterpFilter;
+
+ if (cm.InterpFilter != Constants.Switchable)
+ {
+ cm.InterpFilter = LiteralToFilter[cm.InterpFilter];
+ }
+
+ cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode;
+
+ cm.CompFixedRef = pictureInfo.CompFixedRef;
+ cm.CompVarRef = pictureInfo.CompVarRef;
+
+ cm.Log2TileCols = pictureInfo.Log2TileCols;
+ cm.Log2TileRows = pictureInfo.Log2TileRows;
+
+ cm.Seg.Enabled = pictureInfo.SegmentEnabled;
+ cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate;
+ cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate;
+ cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta;
+ cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable;
+ cm.Seg.FeatureData = pictureInfo.SegmentFeatureData;
+
+ cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled;
+ cm.Lf.RefDeltas = pictureInfo.RefDeltas;
+ cm.Lf.ModeDeltas = pictureInfo.ModeDeltas;
+
+ cm.Fc = new Ptr<Vp9EntropyProbs>(ref pictureInfo.Entropy);
+ cm.Counts = new Ptr<Vp9BackwardUpdates>(ref pictureInfo.BackwardUpdateCounts);
+
+ cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference;
+ cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference;
+ cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference;
+ cm.Mb.CurBuf = (Surface)output;
+
+ cm.Mb.SetupBlockPlanes(1, 1);
+
+ cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows);
+ cm.AllocContextBuffers(_allocator, output.Width, output.Height);
+ cm.InitContextBuffers();
+ cm.SetupSegmentationDequant();
+ cm.SetupScaleFactors();
+
+ SetMvs(ref cm, mvsIn);
+
+ fixed (byte* dataPtr = bitstream)
+ {
+ try
+ {
+ DecodeFrame.DecodeTiles(ref cm, new ArrayPtr<byte>(dataPtr, bitstream.Length));
+ }
+ catch (InternalErrorException)
+ {
+ return false;
+ }
+ }
+
+ GetMvs(ref cm, mvsOut);
+
+ cm.FreeTileWorkerData(_allocator);
+ cm.FreeContextBuffers(_allocator);
+
+ return true;
+ }
+
+ private static void SetMvs(ref Vp9Common cm, ReadOnlySpan<Vp9MvRef> mvs)
+ {
+ if (mvs.Length > cm.PrevFrameMvs.Length)
+ {
+ throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}.");
+ }
+
+ for (int i = 0; i < mvs.Length; i++)
+ {
+ ref var mv = ref cm.PrevFrameMvs[i];
+
+ mv.Mv[0].Row = mvs[i].Mvs[0].Row;
+ mv.Mv[0].Col = mvs[i].Mvs[0].Col;
+ mv.Mv[1].Row = mvs[i].Mvs[1].Row;
+ mv.Mv[1].Col = mvs[i].Mvs[1].Col;
+
+ mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0];
+ mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1];
+ }
+ }
+
+ private static void GetMvs(ref Vp9Common cm, Span<Vp9MvRef> mvs)
+ {
+ if (mvs.Length > cm.CurFrameMvs.Length)
+ {
+ throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}.");
+ }
+
+ for (int i = 0; i < mvs.Length; i++)
+ {
+ ref var mv = ref cm.CurFrameMvs[i];
+
+ mvs[i].Mvs[0].Row = mv.Mv[0].Row;
+ mvs[i].Mvs[0].Col = mv.Mv[0].Col;
+ mvs[i].Mvs[1].Row = mv.Mv[1].Row;
+ mvs[i].Mvs[1].Col = mv.Mv[1].Col;
+
+ mvs[i].RefFrames[0] = mv.RefFrame[0];
+ mvs[i].RefFrames[1] = mv.RefFrame[1];
+ }
+ }
+
+ public void Dispose() => _allocator.Dispose();
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
new file mode 100644
index 00000000..7ede6d34
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs
@@ -0,0 +1,325 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class Detokenize
+ {
+ private const int EobContextNode = 0;
+ private const int ZeroContextNode = 1;
+ private const int OneContextNode = 2;
+
+ private static int GetCoefContext(ReadOnlySpan<short> neighbors, ReadOnlySpan<byte> tokenCache, int c)
+ {
+ const int maxNeighbors = 2;
+
+ return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1;
+ }
+
+ private static int ReadCoeff(
+ ref Reader r,
+ ReadOnlySpan<byte> probs,
+ int n,
+ ref ulong value,
+ ref int count,
+ ref uint range)
+ {
+ int i, val = 0;
+ for (i = 0; i < n; ++i)
+ {
+ val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range);
+ }
+
+ return val;
+ }
+
+ private static int DecodeCoefs(
+ ref MacroBlockD xd,
+ PlaneType type,
+ Span<int> dqcoeff,
+ TxSize txSize,
+ ref Array2<short> dq,
+ int ctx,
+ ReadOnlySpan<short> scan,
+ ReadOnlySpan<short> nb,
+ ref Reader r)
+ {
+ ref Vp9BackwardUpdates counts = ref xd.Counts.Value;
+ int maxEob = 16 << ((int)txSize << 1);
+ ref Vp9EntropyProbs fc = ref xd.Fc.Value;
+ int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0;
+ int band, c = 0;
+ ref Array6<Array6<Array3<byte>>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr];
+ Span<byte> tokenCache = stackalloc byte[32 * 32];
+ ReadOnlySpan<byte> bandTranslate = Luts.get_band_translate(txSize);
+ int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0;
+ int v;
+ short dqv = dq[0];
+ ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
+ ? Luts.Vp9Cat6ProbHigh12
+ : (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
+ int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
+ // Keep value, range, and count as locals. The compiler produces better
+ // results with the locals than using r directly.
+ ulong value = r.Value;
+ uint range = r.Range;
+ int count = r.Count;
+
+ while (c < maxEob)
+ {
+ int val = -1;
+ band = bandTranslate[0];
+ bandTranslate = bandTranslate.Slice(1);
+ ref Array3<byte> prob = ref coefProbs[band][ctx];
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx];
+ }
+
+ if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0)
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken];
+ }
+
+ break;
+ }
+
+ while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0)
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken];
+ }
+
+ dqv = dq[1];
+ tokenCache[scan[c]] = 0;
+ ++c;
+ if (c >= maxEob)
+ {
+ r.Value = value;
+ r.Range = range;
+ r.Count = count;
+ return c; // Zero tokens at the end (no eob token)
+ }
+ ctx = GetCoefContext(nb, tokenCache, c);
+ band = bandTranslate[0];
+ bandTranslate = bandTranslate.Slice(1);
+ prob = ref coefProbs[band][ctx];
+ }
+
+ if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0)
+ {
+ ReadOnlySpan<byte> p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1];
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken];
+ }
+
+ if (r.ReadBool(p[0], ref value, ref count, ref range) != 0)
+ {
+ if (r.ReadBool(p[3], ref value, ref count, ref range) != 0)
+ {
+ tokenCache[scan[c]] = 5;
+ if (r.ReadBool(p[5], ref value, ref count, ref range) != 0)
+ {
+ if (r.ReadBool(p[7], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range);
+ }
+ }
+ else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range);
+ }
+ }
+ else
+ {
+ tokenCache[scan[c]] = 4;
+ if (r.ReadBool(p[4], ref value, ref count, ref range) != 0)
+ {
+ val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range);
+ }
+ else
+ {
+ val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range);
+ }
+ }
+ // Val may use 18-bits
+ v = (int)(((long)val * dqv) >> dqShift);
+ }
+ else
+ {
+ if (r.ReadBool(p[1], ref value, ref count, ref range) != 0)
+ {
+ tokenCache[scan[c]] = 3;
+ v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift;
+ }
+ else
+ {
+ tokenCache[scan[c]] = 2;
+ v = (2 * dqv) >> dqShift;
+ }
+ }
+ }
+ else
+ {
+ if (!xd.Counts.IsNull)
+ {
+ ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken];
+ }
+
+ tokenCache[scan[c]] = 1;
+ v = dqv >> dqShift;
+ }
+ dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd);
+ ++c;
+ ctx = GetCoefContext(nb, tokenCache, c);
+ dqv = dq[1];
+ }
+
+ r.Value = value;
+ r.Range = range;
+ r.Count = count;
+ return c;
+ }
+
+ private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks)
+ {
+ if (xd.MaxBlocksWide != 0)
+ {
+ if (txSizeInBlocks + x > xd.MaxBlocksWide)
+ {
+ ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8;
+ }
+ }
+ if (xd.MaxBlocksHigh != 0)
+ {
+ if (txSizeInBlocks + y > xd.MaxBlocksHigh)
+ {
+ ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8;
+ }
+ }
+ }
+
+ private static PlaneType GetPlaneType(int plane)
+ {
+ return (PlaneType)(plane > 0 ? 1 : 0);
+ }
+
+ public static int DecodeBlockTokens(
+ ref TileWorkerData twd,
+ int plane,
+ Luts.ScanOrder sc,
+ int x,
+ int y,
+ TxSize txSize,
+ int segId)
+ {
+ ref Reader r = ref twd.BitReader;
+ ref MacroBlockD xd = ref twd.Xd;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ ref Array2<short> dequant = ref pd.SegDequant[segId];
+ int eob;
+ Span<sbyte> a = pd.AboveContext.ToSpan().Slice(x);
+ Span<sbyte> l = pd.LeftContext.ToSpan().Slice(y);
+ int ctx;
+ int ctxShiftA = 0;
+ int ctxShiftL = 0;
+
+ switch (txSize)
+ {
+ case TxSize.Tx4x4:
+ ctx = a[0] != 0 ? 1 : 0;
+ ctx += l[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0);
+ break;
+ case TxSize.Tx8x8:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8);
+ ctx = MemoryMarshal.Cast<sbyte, ushort>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, ushort>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, ushort>(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA);
+ MemoryMarshal.Cast<sbyte, ushort>(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL);
+ break;
+ case TxSize.Tx16x16:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16);
+ ctx = MemoryMarshal.Cast<sbyte, uint>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, uint>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, uint>(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA);
+ MemoryMarshal.Cast<sbyte, uint>(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL);
+ break;
+ case TxSize.Tx32x32:
+ GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32);
+ // NOTE: Casting to ulong here is safe because the default memory
+ // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte
+ // boundaries.
+ ctx = MemoryMarshal.Cast<sbyte, ulong>(a)[0] != 0 ? 1 : 0;
+ ctx += MemoryMarshal.Cast<sbyte, ulong>(l)[0] != 0 ? 1 : 0;
+ eob = DecodeCoefs(
+ ref xd,
+ GetPlaneType(plane),
+ pd.DqCoeff.ToSpan(),
+ txSize,
+ ref dequant,
+ ctx,
+ sc.Scan,
+ sc.Neighbors,
+ ref r);
+ MemoryMarshal.Cast<sbyte, ulong>(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA;
+ MemoryMarshal.Cast<sbyte, ulong>(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL;
+ break;
+ default:
+ Debug.Assert(false, "Invalid transform size.");
+ eob = 0;
+ break;
+ }
+
+ return eob;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
new file mode 100644
index 00000000..b74c33dc
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs
@@ -0,0 +1,949 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class Convolve
+ {
+ private const bool UseIntrinsics = true;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<int> MultiplyAddAdjacent(
+ Vector128<short> vsrc0,
+ Vector128<short> vsrc1,
+ Vector128<short> vsrc2,
+ Vector128<short> vsrc3,
+ Vector128<short> vfilter,
+ Vector128<int> zero)
+ {
+ // < sumN, sumN, sumN, sumN >
+ Vector128<int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter);
+ Vector128<int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter);
+ Vector128<int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter);
+ Vector128<int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter);
+
+ // < 0, 0, sumN, sumN >
+ sum0 = Ssse3.HorizontalAdd(sum0, zero);
+ sum1 = Ssse3.HorizontalAdd(sum1, zero);
+ sum2 = Ssse3.HorizontalAdd(sum2, zero);
+ sum3 = Ssse3.HorizontalAdd(sum3, zero);
+
+ // < 0, 0, 0, sumN >
+ sum0 = Ssse3.HorizontalAdd(sum0, zero);
+ sum1 = Ssse3.HorizontalAdd(sum1, zero);
+ sum2 = Ssse3.HorizontalAdd(sum2, zero);
+ sum3 = Ssse3.HorizontalAdd(sum3, zero);
+
+ // < 0, 0, sum1, sum0 >
+ Vector128<int> sum01 = Sse2.UnpackLow(sum0, sum1);
+
+ // < 0, 0, sum3, sum2 >
+ Vector128<int> sum23 = Sse2.UnpackLow(sum2, sum3);
+
+ // < sum3, sum2, sum1, sum0 >
+ return Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<int> RoundShift(Vector128<int> value, Vector128<int> const64)
+ {
+ return Sse2.ShiftRightArithmetic(Sse2.Add(value, const64), FilterBits);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<byte> PackUnsignedSaturate(Vector128<int> value, Vector128<int> zero)
+ {
+ return Sse2.PackUnsignedSaturate(Sse41.PackUnsignedSaturate(value, zero).AsInt16(), zero.AsInt16());
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static unsafe void ConvolveHorizSse41(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] xFilters,
+ int x0Q4,
+ int w,
+ int h)
+ {
+ Vector128<int> zero = Vector128<int>.Zero;
+ Vector128<int> const64 = Vector128.Create(64);
+
+ ulong x, y;
+ src -= SubpelTaps / 2 - 1;
+
+ fixed (Array8<short>* xFilter = xFilters)
+ {
+ Vector128<short> vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8);
+
+ for (y = 0; y < (uint)h; ++y)
+ {
+ ulong srcOffset = (uint)x0Q4 >> SubpelBits;
+ for (x = 0; x < (uint)w; x += 4)
+ {
+ Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]);
+ Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]);
+ Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 2]);
+ Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 3]);
+
+ Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
+
+ Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static unsafe void ConvolveHoriz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] xFilters,
+ int x0Q4,
+ int xStepQ4,
+ int w,
+ int h)
+ {
+ if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits)
+ {
+ ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h);
+ return;
+ }
+
+ int x, y;
+ src -= SubpelTaps / 2 - 1;
+
+ for (y = 0; y < h; ++y)
+ {
+ int xQ4 = x0Q4;
+ for (x = 0; x < w; ++x)
+ {
+ byte* srcX = &src[xQ4 >> SubpelBits];
+ ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcX[k] * xFilter[k];
+ }
+
+ dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
+ xQ4 += xStepQ4;
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ private static unsafe void ConvolveAvgHoriz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] xFilters,
+ int x0Q4,
+ int xStepQ4,
+ int w,
+ int h)
+ {
+ int x, y;
+ src -= SubpelTaps / 2 - 1;
+
+ for (y = 0; y < h; ++y)
+ {
+ int xQ4 = x0Q4;
+ for (x = 0; x < w; ++x)
+ {
+ byte* srcX = &src[xQ4 >> SubpelBits];
+ ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcX[k] * xFilter[k];
+ }
+
+ dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
+ xQ4 += xStepQ4;
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static unsafe void ConvolveVertAvx2(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] yFilters,
+ int y0Q4,
+ int w,
+ int h)
+ {
+ Vector128<int> zero = Vector128<int>.Zero;
+ Vector128<int> const64 = Vector128.Create(64);
+ Vector256<int> indices = Vector256.Create(
+ 0,
+ srcStride,
+ srcStride * 2,
+ srcStride * 3,
+ srcStride * 4,
+ srcStride * 5,
+ srcStride * 6,
+ srcStride * 7);
+
+ ulong x, y;
+ src -= srcStride * (SubpelTaps / 2 - 1);
+
+ fixed (Array8<short>* yFilter = yFilters)
+ {
+ Vector128<short> vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8);
+
+ ulong srcBaseY = (uint)y0Q4 >> SubpelBits;
+ for (y = 0; y < (uint)h; ++y)
+ {
+ ulong srcOffset = (srcBaseY + y) * (uint)srcStride;
+ for (x = 0; x < (uint)w; x += 4)
+ {
+ Vector256<int> vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32();
+
+ Vector128<int> vsrcL = vsrc.GetLower();
+ Vector128<int> vsrcH = vsrc.GetUpper();
+
+ Vector128<byte> vsrcUnpck11 = Sse2.UnpackLow(vsrcL.AsByte(), vsrcH.AsByte());
+ Vector128<byte> vsrcUnpck12 = Sse2.UnpackHigh(vsrcL.AsByte(), vsrcH.AsByte());
+
+ Vector128<byte> vsrcUnpck21 = Sse2.UnpackLow(vsrcUnpck11, vsrcUnpck12);
+ Vector128<byte> vsrcUnpck22 = Sse2.UnpackHigh(vsrcUnpck11, vsrcUnpck12);
+
+ Vector128<byte> vsrc01 = Sse2.UnpackLow(vsrcUnpck21, vsrcUnpck22);
+ Vector128<byte> vsrc23 = Sse2.UnpackHigh(vsrcUnpck21, vsrcUnpck22);
+
+ Vector128<byte> vsrc11 = Sse.MoveHighToLow(vsrc01.AsSingle(), vsrc01.AsSingle()).AsByte();
+ Vector128<byte> vsrc33 = Sse.MoveHighToLow(vsrc23.AsSingle(), vsrc23.AsSingle()).AsByte();
+
+ Vector128<short> vsrc0 = Sse41.ConvertToVector128Int16(vsrc01);
+ Vector128<short> vsrc1 = Sse41.ConvertToVector128Int16(vsrc11);
+ Vector128<short> vsrc2 = Sse41.ConvertToVector128Int16(vsrc23);
+ Vector128<short> vsrc3 = Sse41.ConvertToVector128Int16(vsrc33);
+
+ Vector128<int> sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero);
+
+ Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle());
+ }
+ dst += dstStride;
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static unsafe void ConvolveVert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] yFilters,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits)
+ {
+ ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h);
+ return;
+ }
+
+ int x, y;
+ src -= srcStride * (SubpelTaps / 2 - 1);
+
+ for (x = 0; x < w; ++x)
+ {
+ int yQ4 = y0Q4;
+ for (y = 0; y < h; ++y)
+ {
+ byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
+ ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcY[k * srcStride] * yFilter[k];
+ }
+
+ dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits));
+ yQ4 += yStepQ4;
+ }
+ ++src;
+ ++dst;
+ }
+ }
+
+ private static unsafe void ConvolveAvgVert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] yFilters,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ int x, y;
+ src -= srcStride * (SubpelTaps / 2 - 1);
+
+ for (x = 0; x < w; ++x)
+ {
+ int yQ4 = y0Q4;
+ for (y = 0; y < h; ++y)
+ {
+ byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
+ ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcY[k * srcStride] * yFilter[k];
+ }
+
+ dst[y * dstStride] = (byte)BitUtils.RoundPowerOfTwo(
+ dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1);
+ yQ4 += yStepQ4;
+ }
+ ++src;
+ ++dst;
+ }
+ }
+
+ public static unsafe void Convolve8Horiz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ ConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
+ }
+
+ public static unsafe void Convolve8AvgHoriz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ ConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h);
+ }
+
+ public static unsafe void Convolve8Vert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ ConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void Convolve8AvgVert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
+ }
+
+ [StructLayout(LayoutKind.Sequential, Size = 64 * 135)]
+ struct Temp
+ {
+ }
+
+ public static unsafe void Convolve8(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ // Note: Fixed size intermediate buffer, temp, places limits on parameters.
+ // 2d filtering proceeds in 2 steps:
+ // (1) Interpolate horizontally into an intermediate buffer, temp.
+ // (2) Interpolate temp vertically to derive the sub-pixel result.
+ // Deriving the maximum number of rows in the temp buffer (135):
+ // --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
+ // --Largest block size is 64x64 pixels.
+ // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
+ // original frame (in 1/16th pixel units).
+ // --Must round-up because block may be located at sub-pixel position.
+ // --Require an additional SubpelTaps rows for the 8-tap filter tails.
+ // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+ // When calling in frame scaling function, the smallest scaling factor is x1/4
+ // ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still
+ // big enough.
+ Temp tempStruct;
+ byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization.
+ int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
+
+ Debug.Assert(w <= 64);
+ Debug.Assert(h <= 64);
+ Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32));
+ Debug.Assert(xStepQ4 <= 64);
+
+ ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight);
+ ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void Convolve8Avg(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ // Fixed size intermediate buffer places limits on parameters.
+ byte* temp = stackalloc byte[64 * 64];
+ Debug.Assert(w <= 64);
+ Debug.Assert(h <= 64);
+
+ Convolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ ConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h);
+ }
+
+ public static unsafe void ConvolveCopy(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ int r;
+
+ for (r = h; r > 0; --r)
+ {
+ MemoryUtil.Copy(dst, src, w);
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ public static unsafe void ConvolveAvg(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ int x, y;
+
+ for (y = 0; y < h; ++y)
+ {
+ for (x = 0; x < w; ++x)
+ {
+ dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
+ }
+
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ public static unsafe void ScaledHoriz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8Horiz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void ScaledVert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8Vert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void Scaled2D(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void ScaledAvgHoriz(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8AvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void ScaledAvgVert(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8AvgVert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ public static unsafe void ScaledAvg2D(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h)
+ {
+ Convolve8Avg(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h);
+ }
+
+ private static unsafe void HighbdConvolveHoriz(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] xFilters,
+ int x0Q4,
+ int xStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int x, y;
+ src -= SubpelTaps / 2 - 1;
+
+ for (y = 0; y < h; ++y)
+ {
+ int xQ4 = x0Q4;
+ for (x = 0; x < w; ++x)
+ {
+ ushort* srcX = &src[xQ4 >> SubpelBits];
+ ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcX[k] * xFilter[k];
+ }
+
+ dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
+ xQ4 += xStepQ4;
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ private static unsafe void HighbdConvolveAvgHoriz(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] xFilters,
+ int x0Q4,
+ int xStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int x, y;
+ src -= SubpelTaps / 2 - 1;
+
+ for (y = 0; y < h; ++y)
+ {
+ int xQ4 = x0Q4;
+ for (x = 0; x < w; ++x)
+ {
+ ushort* srcX = &src[xQ4 >> SubpelBits];
+ ref Array8<short> xFilter = ref xFilters[xQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcX[k] * xFilter[k];
+ }
+
+ dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
+ xQ4 += xStepQ4;
+ }
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ private static unsafe void HighbdConvolveVert(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] yFilters,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int x, y;
+ src -= srcStride * (SubpelTaps / 2 - 1);
+
+ for (x = 0; x < w; ++x)
+ {
+ int yQ4 = y0Q4;
+ for (y = 0; y < h; ++y)
+ {
+ ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
+ ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcY[k * srcStride] * yFilter[k];
+ }
+
+ dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd);
+ yQ4 += yStepQ4;
+ }
+ ++src;
+ ++dst;
+ }
+ }
+
+ private static unsafe void HighConvolveAvgVert(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] yFilters,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int x, y;
+ src -= srcStride * (SubpelTaps / 2 - 1);
+
+ for (x = 0; x < w; ++x)
+ {
+ int yQ4 = y0Q4;
+ for (y = 0; y < h; ++y)
+ {
+ ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride];
+ ref Array8<short> yFilter = ref yFilters[yQ4 & SubpelMask];
+ int k, sum = 0;
+ for (k = 0; k < SubpelTaps; ++k)
+ {
+ sum += srcY[k * srcStride] * yFilter[k];
+ }
+
+ dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo(
+ dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1);
+ yQ4 += yStepQ4;
+ }
+ ++src;
+ ++dst;
+ }
+ }
+
+ private static unsafe void HighbdConvolve(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ // Note: Fixed size intermediate buffer, temp, places limits on parameters.
+ // 2d filtering proceeds in 2 steps:
+ // (1) Interpolate horizontally into an intermediate buffer, temp.
+ // (2) Interpolate temp vertically to derive the sub-pixel result.
+ // Deriving the maximum number of rows in the temp buffer (135):
+ // --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative).
+ // --Largest block size is 64x64 pixels.
+ // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
+ // original frame (in 1/16th pixel units).
+ // --Must round-up because block may be located at sub-pixel position.
+ // --Require an additional SubpelTaps rows for the 8-tap filter tails.
+ // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+ ushort* temp = stackalloc ushort[64 * 135];
+ int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps;
+
+ Debug.Assert(w <= 64);
+ Debug.Assert(h <= 64);
+ Debug.Assert(yStepQ4 <= 32);
+ Debug.Assert(xStepQ4 <= 32);
+
+ HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd);
+ HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8Horiz(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ HighbdConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8AvgHoriz(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ HighbdConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8Vert(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ HighbdConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8AvgVert(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ HighConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ HighbdConvolve(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolve8Avg(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ // Fixed size intermediate buffer places limits on parameters.
+ ushort* temp = stackalloc ushort[64 * 64];
+ Debug.Assert(w <= 64);
+ Debug.Assert(h <= 64);
+
+ HighbdConvolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd);
+ HighbdConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h, bd);
+ }
+
+ public static unsafe void HighbdConvolveCopy(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int r;
+
+ for (r = h; r > 0; --r)
+ {
+ MemoryUtil.Copy(dst, src, w);
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+
+ public static unsafe void HighbdConvolveAvg(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd)
+ {
+ int x, y;
+
+ for (y = 0; y < h; ++y)
+ {
+ for (x = 0; x < w; ++x)
+ {
+ dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1);
+ }
+
+ src += srcStride;
+ dst += dstStride;
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs
new file mode 100644
index 00000000..16962897
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class Filter
+ {
+ public const int FilterBits = 7;
+
+ public const int SubpelBits = 4;
+ public const int SubpelMask = (1 << SubpelBits) - 1;
+ public const int SubpelShifts = 1 << SubpelBits;
+ public const int SubpelTaps = 8;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs
new file mode 100644
index 00000000..62b3a9b1
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs
@@ -0,0 +1,1379 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class IntraPred
+ {
+ private static unsafe ref byte Dst(byte* dst, int stride, int x, int y)
+ {
+ return ref dst[x + y * stride];
+ }
+
+ private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y)
+ {
+ return ref dst[x + y * stride];
+ }
+
+ private static byte Avg3(byte a, byte b, byte c)
+ {
+ return (byte)((a + 2 * b + c + 2) >> 2);
+ }
+
+ private static ushort Avg3(ushort a, ushort b, ushort c)
+ {
+ return (ushort)((a + 2 * b + c + 2) >> 2);
+ }
+
+ private static byte Avg2(byte a, byte b)
+ {
+ return (byte)((a + b + 1) >> 1);
+ }
+
+ private static ushort Avg2(ushort a, ushort b)
+ {
+ return (ushort)((a + b + 1) >> 1);
+ }
+
+ public static unsafe void D207Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D207Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D207Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D207Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D207Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D207Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r, c;
+ // First column
+ for (r = 0; r < bs - 1; ++r)
+ {
+ dst[r * stride] = Avg2(left[r], left[r + 1]);
+ }
+
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Second column
+ for (r = 0; r < bs - 2; ++r)
+ {
+ dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
+ }
+
+ dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Rest of last row
+ for (c = 0; c < bs - 2; ++c)
+ {
+ dst[(bs - 1) * stride + c] = left[bs - 1];
+ }
+
+ for (r = bs - 2; r >= 0; --r)
+ {
+ for (c = 0; c < bs - 2; ++c)
+ {
+ dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
+ }
+ }
+ }
+
+ public static unsafe void D63Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D63Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D63Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D63Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D63Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D63Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r, c;
+ int size;
+ for (c = 0; c < bs; ++c)
+ {
+ dst[c] = Avg2(above[c], above[c + 1]);
+ dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
+ }
+ for (r = 2, size = bs - 2; r < bs; r += 2, --size)
+ {
+ MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
+ MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
+ MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
+ MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
+ }
+ }
+
+ public static unsafe void D45Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D45Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D45Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D45Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D45Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D45Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D45Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ byte aboveRight = above[bs - 1];
+ byte* dstRow0 = dst;
+ int x, size;
+
+ for (x = 0; x < bs - 1; ++x)
+ {
+ dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
+ }
+ dst[bs - 1] = aboveRight;
+ dst += stride;
+ for (x = 1, size = bs - 2; x < bs; ++x, --size)
+ {
+ MemoryUtil.Copy(dst, dstRow0 + x, size);
+ MemoryUtil.Fill(dst + size, aboveRight, x + 1);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void D117Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D117Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D117Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D117Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D117Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D117Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r, c;
+
+ // First row
+ for (c = 0; c < bs; c++)
+ {
+ dst[c] = Avg2(above[c - 1], above[c]);
+ }
+
+ dst += stride;
+
+ // Second row
+ dst[0] = Avg3(left[0], above[-1], above[0]);
+ for (c = 1; c < bs; c++)
+ {
+ dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
+ }
+
+ dst += stride;
+
+ // The rest of first col
+ dst[0] = Avg3(above[-1], left[0], left[1]);
+ for (r = 3; r < bs; ++r)
+ {
+ dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
+ }
+
+ // The rest of the block
+ for (r = 2; r < bs; ++r)
+ {
+ for (c = 1; c < bs; c++)
+ {
+ dst[c] = dst[-2 * stride + c - 1];
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void D135Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D135Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D135Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D135Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D135Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D135Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int i;
+ byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right
+
+ // Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
+ for (i = 0; i < bs - 2; ++i)
+ {
+ border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
+ }
+ border[bs - 2] = Avg3(above[-1], left[0], left[1]);
+ border[bs - 1] = Avg3(left[0], above[-1], above[0]);
+ border[bs - 0] = Avg3(above[-1], above[0], above[1]);
+ // dst[0][2, size), i.e., remaining top border ascending
+ for (i = 0; i < bs - 2; ++i)
+ {
+ border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
+ }
+
+ for (i = 0; i < bs; ++i)
+ {
+ MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
+ }
+ }
+
+ public static unsafe void D153Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ D153Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void D153Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ D153Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void D153Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ D153Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r, c;
+ dst[0] = Avg2(above[-1], left[0]);
+ for (r = 1; r < bs; r++)
+ {
+ dst[r * stride] = Avg2(left[r - 1], left[r]);
+ }
+
+ dst++;
+
+ dst[0] = Avg3(left[0], above[-1], above[0]);
+ dst[stride] = Avg3(above[-1], left[0], left[1]);
+ for (r = 2; r < bs; r++)
+ {
+ dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
+ }
+
+ dst++;
+
+ for (c = 0; c < bs - 2; c++)
+ {
+ dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
+ }
+
+ dst += stride;
+
+ for (r = 1; r < bs; ++r)
+ {
+ for (c = 0; c < bs - 2; c++)
+ {
+ dst[c] = dst[-stride + c - 2];
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void VPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ VPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void VPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ VPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void VPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ VPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void VPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ VPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Copy(dst, above, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ HPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void HPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ HPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void HPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ HPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void HPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ HPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, left[r], bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ TMPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ TMPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ TMPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ TMPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r, c;
+ int yTopLeft = above[-1];
+
+ for (r = 0; r < bs; r++)
+ {
+ for (c = 0; c < bs; c++)
+ {
+ dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft);
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void Dc128Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ Dc128Predictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void Dc128Predictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ Dc128Predictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void Dc128Predictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ Dc128Predictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void Dc128Predictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ Dc128Predictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int r;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (byte)128, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void DcLeftPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcLeftPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void DcLeftPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcLeftPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void DcLeftPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcLeftPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void DcLeftPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcLeftPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int i, r, expectedDc, sum = 0;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += left[i];
+ }
+
+ expectedDc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (byte)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void DcTopPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcTopPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void DcTopPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcTopPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void DcTopPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcTopPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void DcTopPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcTopPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int i, r, expectedDc, sum = 0;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += above[i];
+ }
+
+ expectedDc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (byte)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void DcPredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcPredictor(dst, stride, 4, above, left);
+ }
+
+ public static unsafe void DcPredictor8x8(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcPredictor(dst, stride, 8, above, left);
+ }
+
+ public static unsafe void DcPredictor16x16(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcPredictor(dst, stride, 16, above, left);
+ }
+
+ public static unsafe void DcPredictor32x32(byte* dst, int stride, byte* above, byte* left)
+ {
+ DcPredictor(dst, stride, 32, above, left);
+ }
+
+ private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left)
+ {
+ int i, r, expectedDc, sum = 0;
+ int count = 2 * bs;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += above[i];
+ sum += left[i];
+ }
+
+ expectedDc = (sum + (count >> 1)) / count;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (byte)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HePredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte h = above[-1];
+ byte I = left[0];
+ byte j = left[1];
+ byte k = left[2];
+ byte l = left[3];
+
+ MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4);
+ MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4);
+ MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4);
+ MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4);
+ }
+
+ public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte h = above[-1];
+ byte I = above[0];
+ byte j = above[1];
+ byte k = above[2];
+ byte l = above[3];
+ byte m = above[4];
+
+ dst[0] = Avg3(h, I, j);
+ dst[1] = Avg3(I, j, k);
+ dst[2] = Avg3(j, k, l);
+ dst[3] = Avg3(k, l, m);
+ MemoryUtil.Copy(dst + stride * 1, dst, 4);
+ MemoryUtil.Copy(dst + stride * 2, dst, 4);
+ MemoryUtil.Copy(dst + stride * 3, dst, 4);
+ }
+
+ public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte I = left[0];
+ byte j = left[1];
+ byte k = left[2];
+ byte l = left[3];
+ Dst(dst, stride, 0, 0) = Avg2(I, j);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 0, 1) = Avg2(j, k);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 0, 2) = Avg2(k, l);
+ Dst(dst, stride, 1, 0) = Avg3(I, j, k);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
+ }
+
+ public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ byte e = above[4];
+ byte f = above[5];
+ byte g = above[6];
+ Dst(dst, stride, 0, 0) = Avg2(a, b);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e);
+ Dst(dst, stride, 3, 2) = Avg2(e, f); // Differs from vp8
+
+ Dst(dst, stride, 0, 1) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
+ }
+
+ public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ byte e = above[4];
+ byte f = above[5];
+ byte g = above[6];
+ byte h = above[7];
+ Dst(dst, stride, 0, 0) = Avg2(a, b);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e);
+ Dst(dst, stride, 3, 2) = Avg3(e, f, g);
+
+ Dst(dst, stride, 0, 1) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 3) = Avg3(f, g, h);
+ }
+
+ public static unsafe void D45Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ byte e = above[4];
+ byte f = above[5];
+ byte g = above[6];
+ byte h = above[7];
+ Dst(dst, stride, 0, 0) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
+ Dst(dst, stride, 3, 3) = h; // differs from vp8
+ }
+
+ public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ byte e = above[4];
+ byte f = above[5];
+ byte g = above[6];
+ byte h = above[7];
+ Dst(dst, stride, 0, 0) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
+ Dst(dst, stride, 3, 3) = Avg3(g, h, h);
+ }
+
+ public static unsafe void D117Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte I = left[0];
+ byte j = left[1];
+ byte k = left[2];
+ byte x = above[-1];
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ Dst(dst, stride, 0, 0) = Dst(dst, stride, 1, 2) = Avg2(x, a);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 2, 2) = Avg2(a, b);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 3, 2) = Avg2(b, c);
+ Dst(dst, stride, 3, 0) = Avg2(c, d);
+
+ Dst(dst, stride, 0, 3) = Avg3(k, j, I);
+ Dst(dst, stride, 0, 2) = Avg3(j, I, x);
+ Dst(dst, stride, 0, 1) = Dst(dst, stride, 1, 3) = Avg3(I, x, a);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 2, 3) = Avg3(x, a, b);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 3, 3) = Avg3(a, b, c);
+ Dst(dst, stride, 3, 1) = Avg3(b, c, d);
+ }
+
+ public static unsafe void D135Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte I = left[0];
+ byte j = left[1];
+ byte k = left[2];
+ byte l = left[3];
+ byte x = above[-1];
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ byte d = above[3];
+ Dst(dst, stride, 0, 3) = Avg3(j, k, l);
+ Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
+ Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
+ Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
+ Dst(dst, stride, 3, 0) = Avg3(d, c, b);
+ }
+
+ public static unsafe void D153Predictor4x4(byte* dst, int stride, byte* above, byte* left)
+ {
+ byte I = left[0];
+ byte j = left[1];
+ byte k = left[2];
+ byte l = left[3];
+ byte x = above[-1];
+ byte a = above[0];
+ byte b = above[1];
+ byte c = above[2];
+ Dst(dst, stride, 0, 0) = Dst(dst, stride, 2, 1) = Avg2(I, x);
+ Dst(dst, stride, 0, 1) = Dst(dst, stride, 2, 2) = Avg2(j, I);
+ Dst(dst, stride, 0, 2) = Dst(dst, stride, 2, 3) = Avg2(k, j);
+ Dst(dst, stride, 0, 3) = Avg2(l, k);
+
+ Dst(dst, stride, 3, 0) = Avg3(a, b, c);
+ Dst(dst, stride, 2, 0) = Avg3(x, a, b);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 3, 1) = Avg3(I, x, a);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 3, 2) = Avg3(j, I, x);
+ Dst(dst, stride, 1, 2) = Dst(dst, stride, 3, 3) = Avg3(k, j, I);
+ Dst(dst, stride, 1, 3) = Avg3(l, k, j);
+ }
+
+ public static unsafe void HighbdD207Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD207Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD207Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD207Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD207Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD207Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r, c;
+
+ // First column.
+ for (r = 0; r < bs - 1; ++r)
+ {
+ dst[r * stride] = Avg2(left[r], left[r + 1]);
+ }
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Second column.
+ for (r = 0; r < bs - 2; ++r)
+ {
+ dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]);
+ }
+ dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]);
+ dst[(bs - 1) * stride] = left[bs - 1];
+ dst++;
+
+ // Rest of last row.
+ for (c = 0; c < bs - 2; ++c)
+ {
+ dst[(bs - 1) * stride + c] = left[bs - 1];
+ }
+
+ for (r = bs - 2; r >= 0; --r)
+ {
+ for (c = 0; c < bs - 2; ++c)
+ {
+ dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
+ }
+ }
+ }
+
+ public static unsafe void HighbdD63Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD63Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD63Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD63Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD63Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD63Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r, c;
+ int size;
+ for (c = 0; c < bs; ++c)
+ {
+ dst[c] = Avg2(above[c], above[c + 1]);
+ dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]);
+ }
+ for (r = 2, size = bs - 2; r < bs; r += 2, --size)
+ {
+ MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size);
+ MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
+ MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
+ MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
+ }
+ }
+
+ public static unsafe void HighbdD45Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD45Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD45Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD45Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD45Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD45Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ ushort aboveRight = above[bs - 1];
+ ushort* dstRow0 = dst;
+ int x, size;
+
+ for (x = 0; x < bs - 1; ++x)
+ {
+ dst[x] = Avg3(above[x], above[x + 1], above[x + 2]);
+ }
+ dst[bs - 1] = aboveRight;
+ dst += stride;
+ for (x = 1, size = bs - 2; x < bs; ++x, --size)
+ {
+ MemoryUtil.Copy(dst, dstRow0 + x, size);
+ MemoryUtil.Fill(dst + size, aboveRight, x + 1);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdD117Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD117Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD117Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD117Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD117Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD117Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r, c;
+
+ // First row
+ for (c = 0; c < bs; c++)
+ {
+ dst[c] = Avg2(above[c - 1], above[c]);
+ }
+
+ dst += stride;
+
+ // Second row
+ dst[0] = Avg3(left[0], above[-1], above[0]);
+ for (c = 1; c < bs; c++)
+ {
+ dst[c] = Avg3(above[c - 2], above[c - 1], above[c]);
+ }
+
+ dst += stride;
+
+ // The rest of first col
+ dst[0] = Avg3(above[-1], left[0], left[1]);
+ for (r = 3; r < bs; ++r)
+ {
+ dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]);
+ }
+
+ // The rest of the block
+ for (r = 2; r < bs; ++r)
+ {
+ for (c = 1; c < bs; c++)
+ {
+ dst[c] = dst[-2 * stride + c - 1];
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdD135Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD135Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD135Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD135Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD135Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD135Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int i;
+ ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right
+
+ // Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left
+ for (i = 0; i < bs - 2; ++i)
+ {
+ border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]);
+ }
+ border[bs - 2] = Avg3(above[-1], left[0], left[1]);
+ border[bs - 1] = Avg3(left[0], above[-1], above[0]);
+ border[bs - 0] = Avg3(above[-1], above[0], above[1]);
+ // dst[0][2, size), i.e., remaining top border ascending
+ for (i = 0; i < bs - 2; ++i)
+ {
+ border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]);
+ }
+
+ for (i = 0; i < bs; ++i)
+ {
+ MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs);
+ }
+ }
+
+ public static unsafe void HighbdD153Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD153Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdD153Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD153Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdD153Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdD153Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r, c;
+ dst[0] = Avg2(above[-1], left[0]);
+ for (r = 1; r < bs; r++)
+ {
+ dst[r * stride] = Avg2(left[r - 1], left[r]);
+ }
+
+ dst++;
+
+ dst[0] = Avg3(left[0], above[-1], above[0]);
+ dst[stride] = Avg3(above[-1], left[0], left[1]);
+ for (r = 2; r < bs; r++)
+ {
+ dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]);
+ }
+
+ dst++;
+
+ for (c = 0; c < bs - 2; c++)
+ {
+ dst[c] = Avg3(above[c - 1], above[c], above[c + 1]);
+ }
+
+ dst += stride;
+
+ for (r = 1; r < bs; ++r)
+ {
+ for (c = 0; c < bs - 2; c++)
+ {
+ dst[c] = dst[-stride + c - 2];
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdVPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdVPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdVPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdVPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdVPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdVPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdVPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdVPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r;
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Copy(dst, above, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdHPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdHPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdHPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdHPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdHPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdHPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdHPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdHPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r;
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, left[r], bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdTMPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdTMPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdTMPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdTMPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r, c;
+ int yTopLeft = above[-1];
+
+ for (r = 0; r < bs; r++)
+ {
+ for (c = 0; c < bs; c++)
+ {
+ dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd);
+ }
+
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdDc128Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDc128Predictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdDc128Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDc128Predictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDc128Predictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDc128Predictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int r;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdDcLeftPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcLeftPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcLeftPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcLeftPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcLeftPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcLeftPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int i, r, expectedDc, sum = 0;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += left[i];
+ }
+
+ expectedDc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdDcTopPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcTopPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcTopPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcTopPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcTopPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcTopPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int i, r, expectedDc, sum = 0;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += above[i];
+ }
+
+ expectedDc = (sum + (bs >> 1)) / bs;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdDcPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcPredictor(dst, stride, 4, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcPredictor(dst, stride, 8, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcPredictor(dst, stride, 16, above, left, bd);
+ }
+
+ public static unsafe void HighbdDcPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ HighbdDcPredictor(dst, stride, 32, above, left, bd);
+ }
+
+ private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd)
+ {
+ int i, r, expectedDc, sum = 0;
+ int count = 2 * bs;
+
+ for (i = 0; i < bs; i++)
+ {
+ sum += above[i];
+ sum += left[i];
+ }
+
+ expectedDc = (sum + (count >> 1)) / count;
+
+ for (r = 0; r < bs; r++)
+ {
+ MemoryUtil.Fill(dst, (ushort)expectedDc, bs);
+ dst += stride;
+ }
+ }
+
+ public static unsafe void HighbdD207Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort I = left[0];
+ ushort j = left[1];
+ ushort k = left[2];
+ ushort l = left[3];
+ Dst(dst, stride, 0, 0) = Avg2(I, j);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 0, 1) = Avg2(j, k);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 0, 2) = Avg2(k, l);
+ Dst(dst, stride, 1, 0) = Avg3(I, j, k);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l;
+ }
+
+ public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort a = above[0];
+ ushort b = above[1];
+ ushort c = above[2];
+ ushort d = above[3];
+ ushort e = above[4];
+ ushort f = above[5];
+ ushort g = above[6];
+ Dst(dst, stride, 0, 0) = Avg2(a, b);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e);
+ Dst(dst, stride, 3, 2) = Avg2(e, f); // Differs from vp8
+
+ Dst(dst, stride, 0, 1) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8
+ }
+
+ public static unsafe void HighbdD45Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort a = above[0];
+ ushort b = above[1];
+ ushort c = above[2];
+ ushort d = above[3];
+ ushort e = above[4];
+ ushort f = above[5];
+ ushort g = above[6];
+ ushort h = above[7];
+ Dst(dst, stride, 0, 0) = Avg3(a, b, c);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e);
+ Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h);
+ Dst(dst, stride, 3, 3) = h; // Differs from vp8
+ }
+
+ public static unsafe void HighbdD117Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort I = left[0];
+ ushort j = left[1];
+ ushort k = left[2];
+ ushort x = above[-1];
+ ushort a = above[0];
+ ushort b = above[1];
+ ushort c = above[2];
+ ushort d = above[3];
+ Dst(dst, stride, 0, 0) = Dst(dst, stride, 1, 2) = Avg2(x, a);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 2, 2) = Avg2(a, b);
+ Dst(dst, stride, 2, 0) = Dst(dst, stride, 3, 2) = Avg2(b, c);
+ Dst(dst, stride, 3, 0) = Avg2(c, d);
+
+ Dst(dst, stride, 0, 3) = Avg3(k, j, I);
+ Dst(dst, stride, 0, 2) = Avg3(j, I, x);
+ Dst(dst, stride, 0, 1) = Dst(dst, stride, 1, 3) = Avg3(I, x, a);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 2, 3) = Avg3(x, a, b);
+ Dst(dst, stride, 2, 1) = Dst(dst, stride, 3, 3) = Avg3(a, b, c);
+ Dst(dst, stride, 3, 1) = Avg3(b, c, d);
+ }
+
+ public static unsafe void HighbdD135Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort I = left[0];
+ ushort j = left[1];
+ ushort k = left[2];
+ ushort l = left[3];
+ ushort x = above[-1];
+ ushort a = above[0];
+ ushort b = above[1];
+ ushort c = above[2];
+ ushort d = above[3];
+ Dst(dst, stride, 0, 3) = Avg3(j, k, l);
+ Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k);
+ Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j);
+ Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I);
+ Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x);
+ Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a);
+ Dst(dst, stride, 3, 0) = Avg3(d, c, b);
+ }
+
+ public static unsafe void HighbdD153Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd)
+ {
+ ushort I = left[0];
+ ushort j = left[1];
+ ushort k = left[2];
+ ushort l = left[3];
+ ushort x = above[-1];
+ ushort a = above[0];
+ ushort b = above[1];
+ ushort c = above[2];
+
+ Dst(dst, stride, 0, 0) = Dst(dst, stride, 2, 1) = Avg2(I, x);
+ Dst(dst, stride, 0, 1) = Dst(dst, stride, 2, 2) = Avg2(j, I);
+ Dst(dst, stride, 0, 2) = Dst(dst, stride, 2, 3) = Avg2(k, j);
+ Dst(dst, stride, 0, 3) = Avg2(l, k);
+
+ Dst(dst, stride, 3, 0) = Avg3(a, b, c);
+ Dst(dst, stride, 2, 0) = Avg3(x, a, b);
+ Dst(dst, stride, 1, 0) = Dst(dst, stride, 3, 1) = Avg3(I, x, a);
+ Dst(dst, stride, 1, 1) = Dst(dst, stride, 3, 2) = Avg3(j, I, x);
+ Dst(dst, stride, 1, 2) = Dst(dst, stride, 3, 3) = Avg3(k, j, I);
+ Dst(dst, stride, 1, 3) = Avg3(l, k, j);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs
new file mode 100644
index 00000000..b4ad4344
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs
@@ -0,0 +1,2868 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.TxfmCommon;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class InvTxfm
+ {
+ // 12 signal input bits + 7 2D forward transform amplify bits + 5 1D inverse
+ // transform amplify bits + 1 bit for contingency in rounding and quantizing
+ private const int HighbdValidTxfmMagnitudeRange = (1 << 25);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int DetectInvalidHighbdInput(ReadOnlySpan<int> input, int size)
+ {
+ int i;
+ for (i = 0; i < size; ++i)
+ {
+ if (Math.Abs(input[i]) >= HighbdValidTxfmMagnitudeRange)
+ {
+ return 1;
+ }
+ }
+
+ return 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static long CheckRange(long input)
+ {
+ // For valid VP9 input streams, intermediate stage coefficients should always
+ // stay within the range of a signed 16 bit integer. Coefficients can go out
+ // of this range for invalid/corrupt VP9 streams.
+ Debug.Assert(short.MinValue <= input);
+ Debug.Assert(input <= short.MaxValue);
+ return input;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static long HighbdCheckRange(long input, int bd)
+ {
+ // For valid highbitdepth VP9 streams, intermediate stage coefficients will
+ // stay within the ranges:
+ // - 8 bit: signed 16 bit integer
+ // - 10 bit: signed 18 bit integer
+ // - 12 bit: signed 20 bit integer
+ int intMax = (1 << (7 + bd)) - 1;
+ int intMin = -intMax - 1;
+ Debug.Assert(intMin <= input);
+ Debug.Assert(input <= intMax);
+
+ return input;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int WrapLow(long x)
+ {
+ return (short)CheckRange(x);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static int HighbdWrapLow(long x, int bd)
+ {
+ return ((int)HighbdCheckRange(x, bd) << (24 - bd)) >> (24 - bd);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static byte ClipPixelAdd(byte dest, long trans)
+ {
+ trans = WrapLow(trans);
+ return BitUtils.ClipPixel(dest + (int)trans);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static ushort HighbdClipPixelAdd(ushort dest, long trans, int bd)
+ {
+ trans = HighbdWrapLow(trans, bd);
+ return BitUtils.ClipPixelHighbd(dest + (int)trans, bd);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static long DctConstRoundShift(long input)
+ {
+ long rv = BitUtils.RoundPowerOfTwo(input, DctConstBits);
+ return rv;
+ }
+
+ public static void Iwht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+ 0.5 shifts per pixel. */
+ int i;
+ Span<int> output = stackalloc int[16];
+ long a1, b1, c1, d1, e1;
+ ReadOnlySpan<int> ip = input;
+ Span<int> op = output;
+
+ for (i = 0; i < 4; i++)
+ {
+ a1 = ip[0] >> UnitQuantShift;
+ c1 = ip[1] >> UnitQuantShift;
+ d1 = ip[2] >> UnitQuantShift;
+ b1 = ip[3] >> UnitQuantShift;
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ op[0] = WrapLow(a1);
+ op[1] = WrapLow(b1);
+ op[2] = WrapLow(c1);
+ op[3] = WrapLow(d1);
+ ip = ip.Slice(4);
+ op = op.Slice(4);
+ }
+
+ Span<int> ip2 = output;
+ for (i = 0; i < 4; i++)
+ {
+ a1 = ip2[4 * 0];
+ c1 = ip2[4 * 1];
+ d1 = ip2[4 * 2];
+ b1 = ip2[4 * 3];
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ dest[stride * 0] = ClipPixelAdd(dest[stride * 0], WrapLow(a1));
+ dest[stride * 1] = ClipPixelAdd(dest[stride * 1], WrapLow(b1));
+ dest[stride * 2] = ClipPixelAdd(dest[stride * 2], WrapLow(c1));
+ dest[stride * 3] = ClipPixelAdd(dest[stride * 3], WrapLow(d1));
+
+ ip2 = ip2.Slice(1);
+ dest = dest.Slice(1);
+ }
+ }
+
+ public static void Iwht4x41Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i;
+ long a1, e1;
+ Span<int> tmp = stackalloc int[4];
+ ReadOnlySpan<int> ip = input;
+ Span<int> op = tmp;
+
+ a1 = ip[0] >> UnitQuantShift;
+ e1 = a1 >> 1;
+ a1 -= e1;
+ op[0] = WrapLow(a1);
+ op[1] = op[2] = op[3] = WrapLow(e1);
+
+ Span<int> ip2 = tmp;
+ for (i = 0; i < 4; i++)
+ {
+ e1 = ip2[0] >> 1;
+ a1 = ip2[0] - e1;
+ dest[stride * 0] = ClipPixelAdd(dest[stride * 0], a1);
+ dest[stride * 1] = ClipPixelAdd(dest[stride * 1], e1);
+ dest[stride * 2] = ClipPixelAdd(dest[stride * 2], e1);
+ dest[stride * 3] = ClipPixelAdd(dest[stride * 3], e1);
+ ip2 = ip2.Slice(1);
+ dest = dest.Slice(1);
+ }
+ }
+
+ public static void Iadst4(ReadOnlySpan<int> input, Span<int> output)
+ {
+ long s0, s1, s2, s3, s4, s5, s6, s7;
+ int x0 = input[0];
+ int x1 = input[1];
+ int x2 = input[2];
+ int x3 = input[3];
+
+ if ((x0 | x1 | x2 | x3) == 0)
+ {
+ output.Slice(0, 4).Fill(0);
+ return;
+ }
+
+ // 32-bit result is enough for the following multiplications.
+ s0 = SinPi1_9 * x0;
+ s1 = SinPi2_9 * x0;
+ s2 = SinPi3_9 * x1;
+ s3 = SinPi4_9 * x2;
+ s4 = SinPi1_9 * x2;
+ s5 = SinPi2_9 * x3;
+ s6 = SinPi4_9 * x3;
+ s7 = WrapLow(x0 - x2 + x3);
+
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = SinPi3_9 * s7;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = WrapLow(DctConstRoundShift(s0 + s3));
+ output[1] = WrapLow(DctConstRoundShift(s1 + s3));
+ output[2] = WrapLow(DctConstRoundShift(s2));
+ output[3] = WrapLow(DctConstRoundShift(s0 + s1 - s3));
+ }
+
+ public static void Idct4(ReadOnlySpan<int> input, Span<int> output)
+ {
+ Span<short> step = stackalloc short[4];
+ long temp1, temp2;
+
+ // stage 1
+ temp1 = ((short)input[0] + (short)input[2]) * CosPi16_64;
+ temp2 = ((short)input[0] - (short)input[2]) * CosPi16_64;
+ step[0] = (short)WrapLow(DctConstRoundShift(temp1));
+ step[1] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (short)input[1] * CosPi24_64 - (short)input[3] * CosPi8_64;
+ temp2 = (short)input[1] * CosPi8_64 + (short)input[3] * CosPi24_64;
+ step[2] = (short)WrapLow(DctConstRoundShift(temp1));
+ step[3] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ // stage 2
+ output[0] = WrapLow(step[0] + step[3]);
+ output[1] = WrapLow(step[1] + step[2]);
+ output[2] = WrapLow(step[1] - step[2]);
+ output[3] = WrapLow(step[0] - step[3]);
+ }
+
+ public static void Idct4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[4 * 4];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[4];
+ Span<int> tempOut = stackalloc int[4];
+
+ // Rows
+ for (i = 0; i < 4; ++i)
+ {
+ Idct4(input, outptr);
+ input = input.Slice(4);
+ outptr = outptr.Slice(4);
+ }
+
+ // Columns
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ tempIn[j] = output[j * 4 + i];
+ }
+
+ Idct4(tempIn, tempOut);
+ for (j = 0; j < 4; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
+ }
+ }
+ }
+
+ public static void Idct4x41Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i;
+ long a1;
+ int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64));
+
+ output = WrapLow(DctConstRoundShift(output * CosPi16_64));
+ a1 = BitUtils.RoundPowerOfTwo(output, 4);
+
+ for (i = 0; i < 4; i++)
+ {
+ dest[0] = ClipPixelAdd(dest[0], a1);
+ dest[1] = ClipPixelAdd(dest[1], a1);
+ dest[2] = ClipPixelAdd(dest[2], a1);
+ dest[3] = ClipPixelAdd(dest[3], a1);
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void Iadst8(ReadOnlySpan<int> input, Span<int> output)
+ {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+ long x0 = input[7];
+ long x1 = input[0];
+ long x2 = input[5];
+ long x3 = input[2];
+ long x4 = input[3];
+ long x5 = input[4];
+ long x6 = input[1];
+ long x7 = input[6];
+
+ if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7) == 0)
+ {
+ output.Slice(0, 8).Fill(0);
+ return;
+ }
+
+ // stage 1
+ s0 = (int)(CosPi2_64 * x0 + CosPi30_64 * x1);
+ s1 = (int)(CosPi30_64 * x0 - CosPi2_64 * x1);
+ s2 = (int)(CosPi10_64 * x2 + CosPi22_64 * x3);
+ s3 = (int)(CosPi22_64 * x2 - CosPi10_64 * x3);
+ s4 = (int)(CosPi18_64 * x4 + CosPi14_64 * x5);
+ s5 = (int)(CosPi14_64 * x4 - CosPi18_64 * x5);
+ s6 = (int)(CosPi26_64 * x6 + CosPi6_64 * x7);
+ s7 = (int)(CosPi6_64 * x6 - CosPi26_64 * x7);
+
+ x0 = WrapLow(DctConstRoundShift(s0 + s4));
+ x1 = WrapLow(DctConstRoundShift(s1 + s5));
+ x2 = WrapLow(DctConstRoundShift(s2 + s6));
+ x3 = WrapLow(DctConstRoundShift(s3 + s7));
+ x4 = WrapLow(DctConstRoundShift(s0 - s4));
+ x5 = WrapLow(DctConstRoundShift(s1 - s5));
+ x6 = WrapLow(DctConstRoundShift(s2 - s6));
+ x7 = WrapLow(DctConstRoundShift(s3 - s7));
+
+ // stage 2
+ s0 = (int)x0;
+ s1 = (int)x1;
+ s2 = (int)x2;
+ s3 = (int)x3;
+ s4 = (int)(CosPi8_64 * x4 + CosPi24_64 * x5);
+ s5 = (int)(CosPi24_64 * x4 - CosPi8_64 * x5);
+ s6 = (int)(-CosPi24_64 * x6 + CosPi8_64 * x7);
+ s7 = (int)(CosPi8_64 * x6 + CosPi24_64 * x7);
+
+ x0 = WrapLow(s0 + s2);
+ x1 = WrapLow(s1 + s3);
+ x2 = WrapLow(s0 - s2);
+ x3 = WrapLow(s1 - s3);
+ x4 = WrapLow(DctConstRoundShift(s4 + s6));
+ x5 = WrapLow(DctConstRoundShift(s5 + s7));
+ x6 = WrapLow(DctConstRoundShift(s4 - s6));
+ x7 = WrapLow(DctConstRoundShift(s5 - s7));
+
+ // stage 3
+ s2 = (int)(CosPi16_64 * (x2 + x3));
+ s3 = (int)(CosPi16_64 * (x2 - x3));
+ s6 = (int)(CosPi16_64 * (x6 + x7));
+ s7 = (int)(CosPi16_64 * (x6 - x7));
+
+ x2 = WrapLow(DctConstRoundShift(s2));
+ x3 = WrapLow(DctConstRoundShift(s3));
+ x6 = WrapLow(DctConstRoundShift(s6));
+ x7 = WrapLow(DctConstRoundShift(s7));
+
+ output[0] = WrapLow(x0);
+ output[1] = WrapLow(-x4);
+ output[2] = WrapLow(x6);
+ output[3] = WrapLow(-x2);
+ output[4] = WrapLow(x3);
+ output[5] = WrapLow(-x7);
+ output[6] = WrapLow(x5);
+ output[7] = WrapLow(-x1);
+ }
+
+ public static void Idct8(ReadOnlySpan<int> input, Span<int> output)
+ {
+ Span<short> step1 = stackalloc short[8];
+ Span<short> step2 = stackalloc short[8];
+ long temp1, temp2;
+
+ // stage 1
+ step1[0] = (short)input[0];
+ step1[2] = (short)input[4];
+ step1[1] = (short)input[2];
+ step1[3] = (short)input[6];
+ temp1 = (short)input[1] * CosPi28_64 - (short)input[7] * CosPi4_64;
+ temp2 = (short)input[1] * CosPi4_64 + (short)input[7] * CosPi28_64;
+ step1[4] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[7] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (short)input[5] * CosPi12_64 - (short)input[3] * CosPi20_64;
+ temp2 = (short)input[5] * CosPi20_64 + (short)input[3] * CosPi12_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * CosPi16_64;
+ temp2 = (step1[0] - step1[2]) * CosPi16_64;
+ step2[0] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[1] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = step1[1] * CosPi24_64 - step1[3] * CosPi8_64;
+ temp2 = step1[1] * CosPi8_64 + step1[3] * CosPi24_64;
+ step2[2] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[3] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[4] = (short)WrapLow(step1[4] + step1[5]);
+ step2[5] = (short)WrapLow(step1[4] - step1[5]);
+ step2[6] = (short)WrapLow(-step1[6] + step1[7]);
+ step2[7] = (short)WrapLow(step1[6] + step1[7]);
+
+ // stage 3
+ step1[0] = (short)WrapLow(step2[0] + step2[3]);
+ step1[1] = (short)WrapLow(step2[1] + step2[2]);
+ step1[2] = (short)WrapLow(step2[1] - step2[2]);
+ step1[3] = (short)WrapLow(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * CosPi16_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = WrapLow(step1[0] + step1[7]);
+ output[1] = WrapLow(step1[1] + step1[6]);
+ output[2] = WrapLow(step1[2] + step1[5]);
+ output[3] = WrapLow(step1[3] + step1[4]);
+ output[4] = WrapLow(step1[3] - step1[4]);
+ output[5] = WrapLow(step1[2] - step1[5]);
+ output[6] = WrapLow(step1[1] - step1[6]);
+ output[7] = WrapLow(step1[0] - step1[7]);
+ }
+
+ public static void Idct8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+
+ // First transform rows
+ for (i = 0; i < 8; ++i)
+ {
+ Idct8(input, outptr);
+ input = input.Slice(8);
+ outptr = outptr.Slice(8);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ Idct8(tempIn, tempOut);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i],
+ BitUtils.RoundPowerOfTwo(tempOut[j], 5));
+ }
+ }
+ }
+
+ public static void Idct8x812Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+
+ // First transform rows
+ // Only first 4 row has non-zero coefs
+ for (i = 0; i < 4; ++i)
+ {
+ Idct8(input, outptr);
+ input = input.Slice(8);
+ outptr = outptr.Slice(8);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ Idct8(tempIn, tempOut);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i],
+ BitUtils.RoundPowerOfTwo(tempOut[j], 5));
+ }
+ }
+ }
+
+ public static void Idct8x81Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ long a1;
+ int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64));
+
+ output = WrapLow(DctConstRoundShift(output * CosPi16_64));
+ a1 = BitUtils.RoundPowerOfTwo(output, 5);
+ for (j = 0; j < 8; ++j)
+ {
+ for (i = 0; i < 8; ++i)
+ {
+ dest[i] = ClipPixelAdd(dest[i], a1);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void Iadst16(ReadOnlySpan<int> input, Span<int> output)
+ {
+ long s0, s1, s2, s3, s4, s5, s6, s7, s8;
+ long s9, s10, s11, s12, s13, s14, s15;
+ long x0 = input[15];
+ long x1 = input[0];
+ long x2 = input[13];
+ long x3 = input[2];
+ long x4 = input[11];
+ long x5 = input[4];
+ long x6 = input[9];
+ long x7 = input[6];
+ long x8 = input[7];
+ long x9 = input[8];
+ long x10 = input[5];
+ long x11 = input[10];
+ long x12 = input[3];
+ long x13 = input[12];
+ long x14 = input[1];
+ long x15 = input[14];
+
+ if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15) == 0)
+ {
+ output.Slice(0, 16).Fill(0);
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * CosPi1_64 + x1 * CosPi31_64;
+ s1 = x0 * CosPi31_64 - x1 * CosPi1_64;
+ s2 = x2 * CosPi5_64 + x3 * CosPi27_64;
+ s3 = x2 * CosPi27_64 - x3 * CosPi5_64;
+ s4 = x4 * CosPi9_64 + x5 * CosPi23_64;
+ s5 = x4 * CosPi23_64 - x5 * CosPi9_64;
+ s6 = x6 * CosPi13_64 + x7 * CosPi19_64;
+ s7 = x6 * CosPi19_64 - x7 * CosPi13_64;
+ s8 = x8 * CosPi17_64 + x9 * CosPi15_64;
+ s9 = x8 * CosPi15_64 - x9 * CosPi17_64;
+ s10 = x10 * CosPi21_64 + x11 * CosPi11_64;
+ s11 = x10 * CosPi11_64 - x11 * CosPi21_64;
+ s12 = x12 * CosPi25_64 + x13 * CosPi7_64;
+ s13 = x12 * CosPi7_64 - x13 * CosPi25_64;
+ s14 = x14 * CosPi29_64 + x15 * CosPi3_64;
+ s15 = x14 * CosPi3_64 - x15 * CosPi29_64;
+
+ x0 = WrapLow(DctConstRoundShift(s0 + s8));
+ x1 = WrapLow(DctConstRoundShift(s1 + s9));
+ x2 = WrapLow(DctConstRoundShift(s2 + s10));
+ x3 = WrapLow(DctConstRoundShift(s3 + s11));
+ x4 = WrapLow(DctConstRoundShift(s4 + s12));
+ x5 = WrapLow(DctConstRoundShift(s5 + s13));
+ x6 = WrapLow(DctConstRoundShift(s6 + s14));
+ x7 = WrapLow(DctConstRoundShift(s7 + s15));
+ x8 = WrapLow(DctConstRoundShift(s0 - s8));
+ x9 = WrapLow(DctConstRoundShift(s1 - s9));
+ x10 = WrapLow(DctConstRoundShift(s2 - s10));
+ x11 = WrapLow(DctConstRoundShift(s3 - s11));
+ x12 = WrapLow(DctConstRoundShift(s4 - s12));
+ x13 = WrapLow(DctConstRoundShift(s5 - s13));
+ x14 = WrapLow(DctConstRoundShift(s6 - s14));
+ x15 = WrapLow(DctConstRoundShift(s7 - s15));
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * CosPi4_64 + x9 * CosPi28_64;
+ s9 = x8 * CosPi28_64 - x9 * CosPi4_64;
+ s10 = x10 * CosPi20_64 + x11 * CosPi12_64;
+ s11 = x10 * CosPi12_64 - x11 * CosPi20_64;
+ s12 = -x12 * CosPi28_64 + x13 * CosPi4_64;
+ s13 = x12 * CosPi4_64 + x13 * CosPi28_64;
+ s14 = -x14 * CosPi12_64 + x15 * CosPi20_64;
+ s15 = x14 * CosPi20_64 + x15 * CosPi12_64;
+
+ x0 = WrapLow(s0 + s4);
+ x1 = WrapLow(s1 + s5);
+ x2 = WrapLow(s2 + s6);
+ x3 = WrapLow(s3 + s7);
+ x4 = WrapLow(s0 - s4);
+ x5 = WrapLow(s1 - s5);
+ x6 = WrapLow(s2 - s6);
+ x7 = WrapLow(s3 - s7);
+ x8 = WrapLow(DctConstRoundShift(s8 + s12));
+ x9 = WrapLow(DctConstRoundShift(s9 + s13));
+ x10 = WrapLow(DctConstRoundShift(s10 + s14));
+ x11 = WrapLow(DctConstRoundShift(s11 + s15));
+ x12 = WrapLow(DctConstRoundShift(s8 - s12));
+ x13 = WrapLow(DctConstRoundShift(s9 - s13));
+ x14 = WrapLow(DctConstRoundShift(s10 - s14));
+ x15 = WrapLow(DctConstRoundShift(s11 - s15));
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * CosPi8_64 + x5 * CosPi24_64;
+ s5 = x4 * CosPi24_64 - x5 * CosPi8_64;
+ s6 = -x6 * CosPi24_64 + x7 * CosPi8_64;
+ s7 = x6 * CosPi8_64 + x7 * CosPi24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * CosPi8_64 + x13 * CosPi24_64;
+ s13 = x12 * CosPi24_64 - x13 * CosPi8_64;
+ s14 = -x14 * CosPi24_64 + x15 * CosPi8_64;
+ s15 = x14 * CosPi8_64 + x15 * CosPi24_64;
+
+ x0 = WrapLow(s0 + s2);
+ x1 = WrapLow(s1 + s3);
+ x2 = WrapLow(s0 - s2);
+ x3 = WrapLow(s1 - s3);
+ x4 = WrapLow(DctConstRoundShift(s4 + s6));
+ x5 = WrapLow(DctConstRoundShift(s5 + s7));
+ x6 = WrapLow(DctConstRoundShift(s4 - s6));
+ x7 = WrapLow(DctConstRoundShift(s5 - s7));
+ x8 = WrapLow(s8 + s10);
+ x9 = WrapLow(s9 + s11);
+ x10 = WrapLow(s8 - s10);
+ x11 = WrapLow(s9 - s11);
+ x12 = WrapLow(DctConstRoundShift(s12 + s14));
+ x13 = WrapLow(DctConstRoundShift(s13 + s15));
+ x14 = WrapLow(DctConstRoundShift(s12 - s14));
+ x15 = WrapLow(DctConstRoundShift(s13 - s15));
+
+ // stage 4
+ s2 = (-CosPi16_64) * (x2 + x3);
+ s3 = CosPi16_64 * (x2 - x3);
+ s6 = CosPi16_64 * (x6 + x7);
+ s7 = CosPi16_64 * (-x6 + x7);
+ s10 = CosPi16_64 * (x10 + x11);
+ s11 = CosPi16_64 * (-x10 + x11);
+ s14 = (-CosPi16_64) * (x14 + x15);
+ s15 = CosPi16_64 * (x14 - x15);
+
+ x2 = WrapLow(DctConstRoundShift(s2));
+ x3 = WrapLow(DctConstRoundShift(s3));
+ x6 = WrapLow(DctConstRoundShift(s6));
+ x7 = WrapLow(DctConstRoundShift(s7));
+ x10 = WrapLow(DctConstRoundShift(s10));
+ x11 = WrapLow(DctConstRoundShift(s11));
+ x14 = WrapLow(DctConstRoundShift(s14));
+ x15 = WrapLow(DctConstRoundShift(s15));
+
+ output[0] = WrapLow(x0);
+ output[1] = WrapLow(-x8);
+ output[2] = WrapLow(x12);
+ output[3] = WrapLow(-x4);
+ output[4] = WrapLow(x6);
+ output[5] = WrapLow(x14);
+ output[6] = WrapLow(x10);
+ output[7] = WrapLow(x2);
+ output[8] = WrapLow(x3);
+ output[9] = WrapLow(x11);
+ output[10] = WrapLow(x15);
+ output[11] = WrapLow(x7);
+ output[12] = WrapLow(x5);
+ output[13] = WrapLow(-x13);
+ output[14] = WrapLow(x9);
+ output[15] = WrapLow(-x1);
+ }
+
+ public static void Idct16(ReadOnlySpan<int> input, Span<int> output)
+ {
+ Span<short> step1 = stackalloc short[16];
+ Span<short> step2 = stackalloc short[16];
+ long temp1, temp2;
+
+ // stage 1
+ step1[0] = (short)input[0 / 2];
+ step1[1] = (short)input[16 / 2];
+ step1[2] = (short)input[8 / 2];
+ step1[3] = (short)input[24 / 2];
+ step1[4] = (short)input[4 / 2];
+ step1[5] = (short)input[20 / 2];
+ step1[6] = (short)input[12 / 2];
+ step1[7] = (short)input[28 / 2];
+ step1[8] = (short)input[2 / 2];
+ step1[9] = (short)input[18 / 2];
+ step1[10] = (short)input[10 / 2];
+ step1[11] = (short)input[26 / 2];
+ step1[12] = (short)input[6 / 2];
+ step1[13] = (short)input[22 / 2];
+ step1[14] = (short)input[14 / 2];
+ step1[15] = (short)input[30 / 2];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * CosPi30_64 - step1[15] * CosPi2_64;
+ temp2 = step1[8] * CosPi2_64 + step1[15] * CosPi30_64;
+ step2[8] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[15] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[9] * CosPi14_64 - step1[14] * CosPi18_64;
+ temp2 = step1[9] * CosPi18_64 + step1[14] * CosPi14_64;
+ step2[9] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[14] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[10] * CosPi22_64 - step1[13] * CosPi10_64;
+ temp2 = step1[10] * CosPi10_64 + step1[13] * CosPi22_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[11] * CosPi6_64 - step1[12] * CosPi26_64;
+ temp2 = step1[11] * CosPi26_64 + step1[12] * CosPi6_64;
+ step2[11] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[12] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * CosPi28_64 - step2[7] * CosPi4_64;
+ temp2 = step2[4] * CosPi4_64 + step2[7] * CosPi28_64;
+ step1[4] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[7] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = step2[5] * CosPi12_64 - step2[6] * CosPi20_64;
+ temp2 = step2[5] * CosPi20_64 + step2[6] * CosPi12_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ step1[8] = (short)WrapLow(step2[8] + step2[9]);
+ step1[9] = (short)WrapLow(step2[8] - step2[9]);
+ step1[10] = (short)WrapLow(-step2[10] + step2[11]);
+ step1[11] = (short)WrapLow(step2[10] + step2[11]);
+ step1[12] = (short)WrapLow(step2[12] + step2[13]);
+ step1[13] = (short)WrapLow(step2[12] - step2[13]);
+ step1[14] = (short)WrapLow(-step2[14] + step2[15]);
+ step1[15] = (short)WrapLow(step2[14] + step2[15]);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * CosPi16_64;
+ temp2 = (step1[0] - step1[1]) * CosPi16_64;
+ step2[0] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[1] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = step1[2] * CosPi24_64 - step1[3] * CosPi8_64;
+ temp2 = step1[2] * CosPi8_64 + step1[3] * CosPi24_64;
+ step2[2] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[3] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[4] = (short)WrapLow(step1[4] + step1[5]);
+ step2[5] = (short)WrapLow(step1[4] - step1[5]);
+ step2[6] = (short)WrapLow(-step1[6] + step1[7]);
+ step2[7] = (short)WrapLow(step1[6] + step1[7]);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * CosPi8_64 + step1[14] * CosPi24_64;
+ temp2 = step1[9] * CosPi24_64 + step1[14] * CosPi8_64;
+ step2[9] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[14] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step1[10] * CosPi24_64 - step1[13] * CosPi8_64;
+ temp2 = -step1[10] * CosPi8_64 + step1[13] * CosPi24_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = (short)WrapLow(step2[0] + step2[3]);
+ step1[1] = (short)WrapLow(step2[1] + step2[2]);
+ step1[2] = (short)WrapLow(step2[1] - step2[2]);
+ step1[3] = (short)WrapLow(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * CosPi16_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[7] = step2[7];
+
+ step1[8] = (short)WrapLow(step2[8] + step2[11]);
+ step1[9] = (short)WrapLow(step2[9] + step2[10]);
+ step1[10] = (short)WrapLow(step2[9] - step2[10]);
+ step1[11] = (short)WrapLow(step2[8] - step2[11]);
+ step1[12] = (short)WrapLow(-step2[12] + step2[15]);
+ step1[13] = (short)WrapLow(-step2[13] + step2[14]);
+ step1[14] = (short)WrapLow(step2[13] + step2[14]);
+ step1[15] = (short)WrapLow(step2[12] + step2[15]);
+
+ // stage 6
+ step2[0] = (short)WrapLow(step1[0] + step1[7]);
+ step2[1] = (short)WrapLow(step1[1] + step1[6]);
+ step2[2] = (short)WrapLow(step1[2] + step1[5]);
+ step2[3] = (short)WrapLow(step1[3] + step1[4]);
+ step2[4] = (short)WrapLow(step1[3] - step1[4]);
+ step2[5] = (short)WrapLow(step1[2] - step1[5]);
+ step2[6] = (short)WrapLow(step1[1] - step1[6]);
+ step2[7] = (short)WrapLow(step1[0] - step1[7]);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * CosPi16_64;
+ temp2 = (step1[10] + step1[13]) * CosPi16_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (-step1[11] + step1[12]) * CosPi16_64;
+ temp2 = (step1[11] + step1[12]) * CosPi16_64;
+ step2[11] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[12] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = WrapLow(step2[0] + step2[15]);
+ output[1] = WrapLow(step2[1] + step2[14]);
+ output[2] = WrapLow(step2[2] + step2[13]);
+ output[3] = WrapLow(step2[3] + step2[12]);
+ output[4] = WrapLow(step2[4] + step2[11]);
+ output[5] = WrapLow(step2[5] + step2[10]);
+ output[6] = WrapLow(step2[6] + step2[9]);
+ output[7] = WrapLow(step2[7] + step2[8]);
+ output[8] = WrapLow(step2[7] - step2[8]);
+ output[9] = WrapLow(step2[6] - step2[9]);
+ output[10] = WrapLow(step2[5] - step2[10]);
+ output[11] = WrapLow(step2[4] - step2[11]);
+ output[12] = WrapLow(step2[3] - step2[12]);
+ output[13] = WrapLow(step2[2] - step2[13]);
+ output[14] = WrapLow(step2[1] - step2[14]);
+ output[15] = WrapLow(step2[0] - step2[15]);
+ }
+
+ public static void Idct16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows
+ for (i = 0; i < 16; ++i)
+ {
+ Idct16(input, outptr);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ Idct16(tempIn, tempOut);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct16x1638Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 8x8 area, we only need to calculate first 8 rows here.
+ for (i = 0; i < 8; ++i)
+ {
+ Idct16(input, outptr);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ Idct16(tempIn, tempOut);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct16x1610Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 4x4 area, we only need to calculate first 4 rows here.
+ for (i = 0; i < 4; ++i)
+ {
+ Idct16(input, outptr);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ Idct16(tempIn, tempOut);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct16x161Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ long a1;
+ int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64));
+
+ output = WrapLow(DctConstRoundShift(output * CosPi16_64));
+ a1 = BitUtils.RoundPowerOfTwo(output, 6);
+ for (j = 0; j < 16; ++j)
+ {
+ for (i = 0; i < 16; ++i)
+ {
+ dest[i] = ClipPixelAdd(dest[i], a1);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void Idct32(ReadOnlySpan<int> input, Span<int> output)
+ {
+ Span<short> step1 = stackalloc short[32];
+ Span<short> step2 = stackalloc short[32];
+ long temp1, temp2;
+
+ // stage 1
+ step1[0] = (short)input[0];
+ step1[1] = (short)input[16];
+ step1[2] = (short)input[8];
+ step1[3] = (short)input[24];
+ step1[4] = (short)input[4];
+ step1[5] = (short)input[20];
+ step1[6] = (short)input[12];
+ step1[7] = (short)input[28];
+ step1[8] = (short)input[2];
+ step1[9] = (short)input[18];
+ step1[10] = (short)input[10];
+ step1[11] = (short)input[26];
+ step1[12] = (short)input[6];
+ step1[13] = (short)input[22];
+ step1[14] = (short)input[14];
+ step1[15] = (short)input[30];
+
+ temp1 = (short)input[1] * CosPi31_64 - (short)input[31] * CosPi1_64;
+ temp2 = (short)input[1] * CosPi1_64 + (short)input[31] * CosPi31_64;
+ step1[16] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[31] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[17] * CosPi15_64 - (short)input[15] * CosPi17_64;
+ temp2 = (short)input[17] * CosPi17_64 + (short)input[15] * CosPi15_64;
+ step1[17] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[30] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[9] * CosPi23_64 - (short)input[23] * CosPi9_64;
+ temp2 = (short)input[9] * CosPi9_64 + (short)input[23] * CosPi23_64;
+ step1[18] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[29] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[25] * CosPi7_64 - (short)input[7] * CosPi25_64;
+ temp2 = (short)input[25] * CosPi25_64 + (short)input[7] * CosPi7_64;
+ step1[19] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[28] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[5] * CosPi27_64 - (short)input[27] * CosPi5_64;
+ temp2 = (short)input[5] * CosPi5_64 + (short)input[27] * CosPi27_64;
+ step1[20] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[27] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[21] * CosPi11_64 - (short)input[11] * CosPi21_64;
+ temp2 = (short)input[21] * CosPi21_64 + (short)input[11] * CosPi11_64;
+ step1[21] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[26] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[13] * CosPi19_64 - (short)input[19] * CosPi13_64;
+ temp2 = (short)input[13] * CosPi13_64 + (short)input[19] * CosPi19_64;
+ step1[22] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[25] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = (short)input[29] * CosPi3_64 - (short)input[3] * CosPi29_64;
+ temp2 = (short)input[29] * CosPi29_64 + (short)input[3] * CosPi3_64;
+ step1[23] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[24] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * CosPi30_64 - step1[15] * CosPi2_64;
+ temp2 = step1[8] * CosPi2_64 + step1[15] * CosPi30_64;
+ step2[8] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[15] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[9] * CosPi14_64 - step1[14] * CosPi18_64;
+ temp2 = step1[9] * CosPi18_64 + step1[14] * CosPi14_64;
+ step2[9] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[14] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[10] * CosPi22_64 - step1[13] * CosPi10_64;
+ temp2 = step1[10] * CosPi10_64 + step1[13] * CosPi22_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ temp1 = step1[11] * CosPi6_64 - step1[12] * CosPi26_64;
+ temp2 = step1[11] * CosPi26_64 + step1[12] * CosPi6_64;
+ step2[11] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[12] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ step2[16] = (short)WrapLow(step1[16] + step1[17]);
+ step2[17] = (short)WrapLow(step1[16] - step1[17]);
+ step2[18] = (short)WrapLow(-step1[18] + step1[19]);
+ step2[19] = (short)WrapLow(step1[18] + step1[19]);
+ step2[20] = (short)WrapLow(step1[20] + step1[21]);
+ step2[21] = (short)WrapLow(step1[20] - step1[21]);
+ step2[22] = (short)WrapLow(-step1[22] + step1[23]);
+ step2[23] = (short)WrapLow(step1[22] + step1[23]);
+ step2[24] = (short)WrapLow(step1[24] + step1[25]);
+ step2[25] = (short)WrapLow(step1[24] - step1[25]);
+ step2[26] = (short)WrapLow(-step1[26] + step1[27]);
+ step2[27] = (short)WrapLow(step1[26] + step1[27]);
+ step2[28] = (short)WrapLow(step1[28] + step1[29]);
+ step2[29] = (short)WrapLow(step1[28] - step1[29]);
+ step2[30] = (short)WrapLow(-step1[30] + step1[31]);
+ step2[31] = (short)WrapLow(step1[30] + step1[31]);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * CosPi28_64 - step2[7] * CosPi4_64;
+ temp2 = step2[4] * CosPi4_64 + step2[7] * CosPi28_64;
+ step1[4] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[7] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = step2[5] * CosPi12_64 - step2[6] * CosPi20_64;
+ temp2 = step2[5] * CosPi20_64 + step2[6] * CosPi12_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+
+ step1[8] = (short)WrapLow(step2[8] + step2[9]);
+ step1[9] = (short)WrapLow(step2[8] - step2[9]);
+ step1[10] = (short)WrapLow(-step2[10] + step2[11]);
+ step1[11] = (short)WrapLow(step2[10] + step2[11]);
+ step1[12] = (short)WrapLow(step2[12] + step2[13]);
+ step1[13] = (short)WrapLow(step2[12] - step2[13]);
+ step1[14] = (short)WrapLow(-step2[14] + step2[15]);
+ step1[15] = (short)WrapLow(step2[14] + step2[15]);
+
+ step1[16] = step2[16];
+ step1[31] = step2[31];
+ temp1 = -step2[17] * CosPi4_64 + step2[30] * CosPi28_64;
+ temp2 = step2[17] * CosPi28_64 + step2[30] * CosPi4_64;
+ step1[17] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[30] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step2[18] * CosPi28_64 - step2[29] * CosPi4_64;
+ temp2 = -step2[18] * CosPi4_64 + step2[29] * CosPi28_64;
+ step1[18] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[29] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[19] = step2[19];
+ step1[20] = step2[20];
+ temp1 = -step2[21] * CosPi20_64 + step2[26] * CosPi12_64;
+ temp2 = step2[21] * CosPi12_64 + step2[26] * CosPi20_64;
+ step1[21] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[26] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step2[22] * CosPi12_64 - step2[25] * CosPi20_64;
+ temp2 = -step2[22] * CosPi20_64 + step2[25] * CosPi12_64;
+ step1[22] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[25] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[27] = step2[27];
+ step1[28] = step2[28];
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * CosPi16_64;
+ temp2 = (step1[0] - step1[1]) * CosPi16_64;
+ step2[0] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[1] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = step1[2] * CosPi24_64 - step1[3] * CosPi8_64;
+ temp2 = step1[2] * CosPi8_64 + step1[3] * CosPi24_64;
+ step2[2] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[3] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[4] = (short)WrapLow(step1[4] + step1[5]);
+ step2[5] = (short)WrapLow(step1[4] - step1[5]);
+ step2[6] = (short)WrapLow(-step1[6] + step1[7]);
+ step2[7] = (short)WrapLow(step1[6] + step1[7]);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * CosPi8_64 + step1[14] * CosPi24_64;
+ temp2 = step1[9] * CosPi24_64 + step1[14] * CosPi8_64;
+ step2[9] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[14] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step1[10] * CosPi24_64 - step1[13] * CosPi8_64;
+ temp2 = -step1[10] * CosPi8_64 + step1[13] * CosPi24_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ step2[16] = (short)WrapLow(step1[16] + step1[19]);
+ step2[17] = (short)WrapLow(step1[17] + step1[18]);
+ step2[18] = (short)WrapLow(step1[17] - step1[18]);
+ step2[19] = (short)WrapLow(step1[16] - step1[19]);
+ step2[20] = (short)WrapLow(-step1[20] + step1[23]);
+ step2[21] = (short)WrapLow(-step1[21] + step1[22]);
+ step2[22] = (short)WrapLow(step1[21] + step1[22]);
+ step2[23] = (short)WrapLow(step1[20] + step1[23]);
+
+ step2[24] = (short)WrapLow(step1[24] + step1[27]);
+ step2[25] = (short)WrapLow(step1[25] + step1[26]);
+ step2[26] = (short)WrapLow(step1[25] - step1[26]);
+ step2[27] = (short)WrapLow(step1[24] - step1[27]);
+ step2[28] = (short)WrapLow(-step1[28] + step1[31]);
+ step2[29] = (short)WrapLow(-step1[29] + step1[30]);
+ step2[30] = (short)WrapLow(step1[29] + step1[30]);
+ step2[31] = (short)WrapLow(step1[28] + step1[31]);
+
+ // stage 5
+ step1[0] = (short)WrapLow(step2[0] + step2[3]);
+ step1[1] = (short)WrapLow(step2[1] + step2[2]);
+ step1[2] = (short)WrapLow(step2[1] - step2[2]);
+ step1[3] = (short)WrapLow(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * CosPi16_64;
+ step1[5] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[6] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[7] = step2[7];
+
+ step1[8] = (short)WrapLow(step2[8] + step2[11]);
+ step1[9] = (short)WrapLow(step2[9] + step2[10]);
+ step1[10] = (short)WrapLow(step2[9] - step2[10]);
+ step1[11] = (short)WrapLow(step2[8] - step2[11]);
+ step1[12] = (short)WrapLow(-step2[12] + step2[15]);
+ step1[13] = (short)WrapLow(-step2[13] + step2[14]);
+ step1[14] = (short)WrapLow(step2[13] + step2[14]);
+ step1[15] = (short)WrapLow(step2[12] + step2[15]);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ temp1 = -step2[18] * CosPi8_64 + step2[29] * CosPi24_64;
+ temp2 = step2[18] * CosPi24_64 + step2[29] * CosPi8_64;
+ step1[18] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[29] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step2[19] * CosPi8_64 + step2[28] * CosPi24_64;
+ temp2 = step2[19] * CosPi24_64 + step2[28] * CosPi8_64;
+ step1[19] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[28] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step2[20] * CosPi24_64 - step2[27] * CosPi8_64;
+ temp2 = -step2[20] * CosPi8_64 + step2[27] * CosPi24_64;
+ step1[20] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[27] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = -step2[21] * CosPi24_64 - step2[26] * CosPi8_64;
+ temp2 = -step2[21] * CosPi8_64 + step2[26] * CosPi24_64;
+ step1[21] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[26] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[22] = step2[22];
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[25] = step2[25];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // stage 6
+ step2[0] = (short)WrapLow(step1[0] + step1[7]);
+ step2[1] = (short)WrapLow(step1[1] + step1[6]);
+ step2[2] = (short)WrapLow(step1[2] + step1[5]);
+ step2[3] = (short)WrapLow(step1[3] + step1[4]);
+ step2[4] = (short)WrapLow(step1[3] - step1[4]);
+ step2[5] = (short)WrapLow(step1[2] - step1[5]);
+ step2[6] = (short)WrapLow(step1[1] - step1[6]);
+ step2[7] = (short)WrapLow(step1[0] - step1[7]);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * CosPi16_64;
+ temp2 = (step1[10] + step1[13]) * CosPi16_64;
+ step2[10] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[13] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (-step1[11] + step1[12]) * CosPi16_64;
+ temp2 = (step1[11] + step1[12]) * CosPi16_64;
+ step2[11] = (short)WrapLow(DctConstRoundShift(temp1));
+ step2[12] = (short)WrapLow(DctConstRoundShift(temp2));
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ step2[16] = (short)WrapLow(step1[16] + step1[23]);
+ step2[17] = (short)WrapLow(step1[17] + step1[22]);
+ step2[18] = (short)WrapLow(step1[18] + step1[21]);
+ step2[19] = (short)WrapLow(step1[19] + step1[20]);
+ step2[20] = (short)WrapLow(step1[19] - step1[20]);
+ step2[21] = (short)WrapLow(step1[18] - step1[21]);
+ step2[22] = (short)WrapLow(step1[17] - step1[22]);
+ step2[23] = (short)WrapLow(step1[16] - step1[23]);
+
+ step2[24] = (short)WrapLow(-step1[24] + step1[31]);
+ step2[25] = (short)WrapLow(-step1[25] + step1[30]);
+ step2[26] = (short)WrapLow(-step1[26] + step1[29]);
+ step2[27] = (short)WrapLow(-step1[27] + step1[28]);
+ step2[28] = (short)WrapLow(step1[27] + step1[28]);
+ step2[29] = (short)WrapLow(step1[26] + step1[29]);
+ step2[30] = (short)WrapLow(step1[25] + step1[30]);
+ step2[31] = (short)WrapLow(step1[24] + step1[31]);
+
+ // stage 7
+ step1[0] = (short)WrapLow(step2[0] + step2[15]);
+ step1[1] = (short)WrapLow(step2[1] + step2[14]);
+ step1[2] = (short)WrapLow(step2[2] + step2[13]);
+ step1[3] = (short)WrapLow(step2[3] + step2[12]);
+ step1[4] = (short)WrapLow(step2[4] + step2[11]);
+ step1[5] = (short)WrapLow(step2[5] + step2[10]);
+ step1[6] = (short)WrapLow(step2[6] + step2[9]);
+ step1[7] = (short)WrapLow(step2[7] + step2[8]);
+ step1[8] = (short)WrapLow(step2[7] - step2[8]);
+ step1[9] = (short)WrapLow(step2[6] - step2[9]);
+ step1[10] = (short)WrapLow(step2[5] - step2[10]);
+ step1[11] = (short)WrapLow(step2[4] - step2[11]);
+ step1[12] = (short)WrapLow(step2[3] - step2[12]);
+ step1[13] = (short)WrapLow(step2[2] - step2[13]);
+ step1[14] = (short)WrapLow(step2[1] - step2[14]);
+ step1[15] = (short)WrapLow(step2[0] - step2[15]);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ step1[18] = step2[18];
+ step1[19] = step2[19];
+ temp1 = (-step2[20] + step2[27]) * CosPi16_64;
+ temp2 = (step2[20] + step2[27]) * CosPi16_64;
+ step1[20] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[27] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (-step2[21] + step2[26]) * CosPi16_64;
+ temp2 = (step2[21] + step2[26]) * CosPi16_64;
+ step1[21] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[26] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (-step2[22] + step2[25]) * CosPi16_64;
+ temp2 = (step2[22] + step2[25]) * CosPi16_64;
+ step1[22] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[25] = (short)WrapLow(DctConstRoundShift(temp2));
+ temp1 = (-step2[23] + step2[24]) * CosPi16_64;
+ temp2 = (step2[23] + step2[24]) * CosPi16_64;
+ step1[23] = (short)WrapLow(DctConstRoundShift(temp1));
+ step1[24] = (short)WrapLow(DctConstRoundShift(temp2));
+ step1[28] = step2[28];
+ step1[29] = step2[29];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // final stage
+ output[0] = WrapLow(step1[0] + step1[31]);
+ output[1] = WrapLow(step1[1] + step1[30]);
+ output[2] = WrapLow(step1[2] + step1[29]);
+ output[3] = WrapLow(step1[3] + step1[28]);
+ output[4] = WrapLow(step1[4] + step1[27]);
+ output[5] = WrapLow(step1[5] + step1[26]);
+ output[6] = WrapLow(step1[6] + step1[25]);
+ output[7] = WrapLow(step1[7] + step1[24]);
+ output[8] = WrapLow(step1[8] + step1[23]);
+ output[9] = WrapLow(step1[9] + step1[22]);
+ output[10] = WrapLow(step1[10] + step1[21]);
+ output[11] = WrapLow(step1[11] + step1[20]);
+ output[12] = WrapLow(step1[12] + step1[19]);
+ output[13] = WrapLow(step1[13] + step1[18]);
+ output[14] = WrapLow(step1[14] + step1[17]);
+ output[15] = WrapLow(step1[15] + step1[16]);
+ output[16] = WrapLow(step1[15] - step1[16]);
+ output[17] = WrapLow(step1[14] - step1[17]);
+ output[18] = WrapLow(step1[13] - step1[18]);
+ output[19] = WrapLow(step1[12] - step1[19]);
+ output[20] = WrapLow(step1[11] - step1[20]);
+ output[21] = WrapLow(step1[10] - step1[21]);
+ output[22] = WrapLow(step1[9] - step1[22]);
+ output[23] = WrapLow(step1[8] - step1[23]);
+ output[24] = WrapLow(step1[7] - step1[24]);
+ output[25] = WrapLow(step1[6] - step1[25]);
+ output[26] = WrapLow(step1[5] - step1[26]);
+ output[27] = WrapLow(step1[4] - step1[27]);
+ output[28] = WrapLow(step1[3] - step1[28]);
+ output[29] = WrapLow(step1[2] - step1[29]);
+ output[30] = WrapLow(step1[1] - step1[30]);
+ output[31] = WrapLow(step1[0] - step1[31]);
+ }
+
+ public static void Idct32x321024Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ for (i = 0; i < 32; ++i)
+ {
+ short zeroCoeff = 0;
+ for (j = 0; j < 32; ++j)
+ {
+ zeroCoeff |= (short)input[j];
+ }
+
+ if (zeroCoeff != 0)
+ {
+ Idct32(input, outptr);
+ }
+ else
+ {
+ outptr.Slice(0, 32).Fill(0);
+ }
+
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ Idct32(tempIn, tempOut);
+ for (j = 0; j < 32; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct32x32135Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ // Only upper-left 16x16 has non-zero coeff
+ for (i = 0; i < 16; ++i)
+ {
+ Idct32(input, outptr);
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ Idct32(tempIn, tempOut);
+ for (j = 0; j < 32; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct32x3234Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ // Only upper-left 8x8 has non-zero coeff
+ for (i = 0; i < 8; ++i)
+ {
+ Idct32(input, outptr);
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ Idct32(tempIn, tempOut);
+ for (j = 0; j < 32; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ public static void Idct32x321Add(ReadOnlySpan<int> input, Span<byte> dest, int stride)
+ {
+ int i, j;
+ long a1;
+ int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64));
+
+ output = WrapLow(DctConstRoundShift(output * CosPi16_64));
+ a1 = BitUtils.RoundPowerOfTwo(output, 6);
+
+ for (j = 0; j < 32; ++j)
+ {
+ for (i = 0; i < 32; ++i)
+ {
+ dest[i] = ClipPixelAdd(dest[i], a1);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void HighbdIwht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+ 0.5 shifts per pixel. */
+ int i;
+ Span<int> output = stackalloc int[16];
+ long a1, b1, c1, d1, e1;
+ ReadOnlySpan<int> ip = input;
+ Span<int> op = output;
+
+ for (i = 0; i < 4; i++)
+ {
+ a1 = ip[0] >> UnitQuantShift;
+ c1 = ip[1] >> UnitQuantShift;
+ d1 = ip[2] >> UnitQuantShift;
+ b1 = ip[3] >> UnitQuantShift;
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ op[0] = HighbdWrapLow(a1, bd);
+ op[1] = HighbdWrapLow(b1, bd);
+ op[2] = HighbdWrapLow(c1, bd);
+ op[3] = HighbdWrapLow(d1, bd);
+ ip = ip.Slice(4);
+ op = op.Slice(4);
+ }
+
+ ReadOnlySpan<int> ip2 = output;
+ for (i = 0; i < 4; i++)
+ {
+ a1 = ip2[4 * 0];
+ c1 = ip2[4 * 1];
+ d1 = ip2[4 * 2];
+ b1 = ip2[4 * 3];
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ dest[stride * 0] = HighbdClipPixelAdd(dest[stride * 0], HighbdWrapLow(a1, bd), bd);
+ dest[stride * 1] = HighbdClipPixelAdd(dest[stride * 1], HighbdWrapLow(b1, bd), bd);
+ dest[stride * 2] = HighbdClipPixelAdd(dest[stride * 2], HighbdWrapLow(c1, bd), bd);
+ dest[stride * 3] = HighbdClipPixelAdd(dest[stride * 3], HighbdWrapLow(d1, bd), bd);
+
+ ip2 = ip2.Slice(1);
+ dest = dest.Slice(1);
+ }
+ }
+
+ public static void HighbdIwht4x41Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i;
+ long a1, e1;
+ Span<int> tmp = stackalloc int[4];
+ ReadOnlySpan<int> ip = input;
+ Span<int> op = tmp;
+
+ a1 = ip[0] >> UnitQuantShift;
+ e1 = a1 >> 1;
+ a1 -= e1;
+ op[0] = HighbdWrapLow(a1, bd);
+ op[1] = op[2] = op[3] = HighbdWrapLow(e1, bd);
+
+ ReadOnlySpan<int> ip2 = tmp;
+ for (i = 0; i < 4; i++)
+ {
+ e1 = ip2[0] >> 1;
+ a1 = ip2[0] - e1;
+ dest[stride * 0] = HighbdClipPixelAdd(dest[stride * 0], a1, bd);
+ dest[stride * 1] = HighbdClipPixelAdd(dest[stride * 1], e1, bd);
+ dest[stride * 2] = HighbdClipPixelAdd(dest[stride * 2], e1, bd);
+ dest[stride * 3] = HighbdClipPixelAdd(dest[stride * 3], e1, bd);
+ ip2 = ip2.Slice(1);
+ dest = dest.Slice(1);
+ }
+ }
+
+ public static void HighbdIadst4(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ long s0, s1, s2, s3, s4, s5, s6, s7;
+ int x0 = input[0];
+ int x1 = input[1];
+ int x2 = input[2];
+ int x3 = input[3];
+
+ if (DetectInvalidHighbdInput(input, 4) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 4).Fill(0);
+ return;
+ }
+
+ if ((x0 | x1 | x2 | x3) == 0)
+ {
+ output.Slice(0, 4).Fill(0);
+ return;
+ }
+
+ s0 = (long)SinPi1_9 * x0;
+ s1 = (long)SinPi2_9 * x0;
+ s2 = (long)SinPi3_9 * x1;
+ s3 = (long)SinPi4_9 * x2;
+ s4 = (long)SinPi1_9 * x2;
+ s5 = (long)SinPi2_9 * x3;
+ s6 = (long)SinPi4_9 * x3;
+ s7 = HighbdWrapLow(x0 - x2 + x3, bd);
+
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = SinPi3_9 * s7;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = HighbdWrapLow(DctConstRoundShift(s0 + s3), bd);
+ output[1] = HighbdWrapLow(DctConstRoundShift(s1 + s3), bd);
+ output[2] = HighbdWrapLow(DctConstRoundShift(s2), bd);
+ output[3] = HighbdWrapLow(DctConstRoundShift(s0 + s1 - s3), bd);
+ }
+
+ public static void HighbdIdct4(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ Span<int> step = stackalloc int[4];
+ long temp1, temp2;
+
+ if (DetectInvalidHighbdInput(input, 4) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 4).Fill(0);
+ return;
+ }
+
+ // stage 1
+ temp1 = (input[0] + input[2]) * (long)CosPi16_64;
+ temp2 = (input[0] - input[2]) * (long)CosPi16_64;
+ step[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = input[1] * (long)CosPi24_64 - input[3] * (long)CosPi8_64;
+ temp2 = input[1] * (long)CosPi8_64 + input[3] * (long)CosPi24_64;
+ step[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ // stage 2
+ output[0] = HighbdWrapLow(step[0] + step[3], bd);
+ output[1] = HighbdWrapLow(step[1] + step[2], bd);
+ output[2] = HighbdWrapLow(step[1] - step[2], bd);
+ output[3] = HighbdWrapLow(step[0] - step[3], bd);
+ }
+
+ public static void HighbdIdct4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[4 * 4];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[4];
+ Span<int> tempOut = stackalloc int[4];
+
+ // Rows
+ for (i = 0; i < 4; ++i)
+ {
+ HighbdIdct4(input, outptr, bd);
+ input = input.Slice(4);
+ outptr = outptr.Slice(4);
+ }
+
+ // Columns
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ tempIn[j] = output[j * 4 + i];
+ }
+
+ HighbdIdct4(tempIn, tempOut, bd);
+ for (j = 0; j < 4; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct4x41Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i;
+ long a1;
+ int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd);
+
+ output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd);
+ a1 = BitUtils.RoundPowerOfTwo(output, 4);
+
+ for (i = 0; i < 4; i++)
+ {
+ dest[0] = HighbdClipPixelAdd(dest[0], a1, bd);
+ dest[1] = HighbdClipPixelAdd(dest[1], a1, bd);
+ dest[2] = HighbdClipPixelAdd(dest[2], a1, bd);
+ dest[3] = HighbdClipPixelAdd(dest[3], a1, bd);
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void HighbdIadst8(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ long s0, s1, s2, s3, s4, s5, s6, s7;
+ int x0 = input[7];
+ int x1 = input[0];
+ int x2 = input[5];
+ int x3 = input[2];
+ int x4 = input[3];
+ int x5 = input[4];
+ int x6 = input[1];
+ int x7 = input[6];
+
+ if (DetectInvalidHighbdInput(input, 8) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 8).Fill(0);
+ return;
+ }
+
+ if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7) == 0)
+ {
+ output.Slice(0, 8).Fill(0);
+ return;
+ }
+
+ // stage 1
+ s0 = (long)CosPi2_64 * x0 + (long)CosPi30_64 * x1;
+ s1 = (long)CosPi30_64 * x0 - (long)CosPi2_64 * x1;
+ s2 = (long)CosPi10_64 * x2 + (long)CosPi22_64 * x3;
+ s3 = (long)CosPi22_64 * x2 - (long)CosPi10_64 * x3;
+ s4 = (long)CosPi18_64 * x4 + (long)CosPi14_64 * x5;
+ s5 = (long)CosPi14_64 * x4 - (long)CosPi18_64 * x5;
+ s6 = (long)CosPi26_64 * x6 + (long)CosPi6_64 * x7;
+ s7 = (long)CosPi6_64 * x6 - (long)CosPi26_64 * x7;
+
+ x0 = HighbdWrapLow(DctConstRoundShift(s0 + s4), bd);
+ x1 = HighbdWrapLow(DctConstRoundShift(s1 + s5), bd);
+ x2 = HighbdWrapLow(DctConstRoundShift(s2 + s6), bd);
+ x3 = HighbdWrapLow(DctConstRoundShift(s3 + s7), bd);
+ x4 = HighbdWrapLow(DctConstRoundShift(s0 - s4), bd);
+ x5 = HighbdWrapLow(DctConstRoundShift(s1 - s5), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s2 - s6), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s3 - s7), bd);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = (long)CosPi8_64 * x4 + (long)CosPi24_64 * x5;
+ s5 = (long)CosPi24_64 * x4 - (long)CosPi8_64 * x5;
+ s6 = (long)(-CosPi24_64) * x6 + (long)CosPi8_64 * x7;
+ s7 = (long)CosPi8_64 * x6 + (long)CosPi24_64 * x7;
+
+ x0 = HighbdWrapLow(s0 + s2, bd);
+ x1 = HighbdWrapLow(s1 + s3, bd);
+ x2 = HighbdWrapLow(s0 - s2, bd);
+ x3 = HighbdWrapLow(s1 - s3, bd);
+ x4 = HighbdWrapLow(DctConstRoundShift(s4 + s6), bd);
+ x5 = HighbdWrapLow(DctConstRoundShift(s5 + s7), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s4 - s6), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s5 - s7), bd);
+
+ // stage 3
+ s2 = (long)CosPi16_64 * (x2 + x3);
+ s3 = (long)CosPi16_64 * (x2 - x3);
+ s6 = (long)CosPi16_64 * (x6 + x7);
+ s7 = (long)CosPi16_64 * (x6 - x7);
+
+ x2 = HighbdWrapLow(DctConstRoundShift(s2), bd);
+ x3 = HighbdWrapLow(DctConstRoundShift(s3), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s6), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s7), bd);
+
+ output[0] = HighbdWrapLow(x0, bd);
+ output[1] = HighbdWrapLow(-x4, bd);
+ output[2] = HighbdWrapLow(x6, bd);
+ output[3] = HighbdWrapLow(-x2, bd);
+ output[4] = HighbdWrapLow(x3, bd);
+ output[5] = HighbdWrapLow(-x7, bd);
+ output[6] = HighbdWrapLow(x5, bd);
+ output[7] = HighbdWrapLow(-x1, bd);
+ }
+
+ public static void HighbdIdct8(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ Span<int> step1 = stackalloc int[8];
+ Span<int> step2 = stackalloc int[8];
+ long temp1, temp2;
+
+ if (DetectInvalidHighbdInput(input, 8) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 8).Fill(0);
+ return;
+ }
+
+ // stage 1
+ step1[0] = input[0];
+ step1[2] = input[4];
+ step1[1] = input[2];
+ step1[3] = input[6];
+ temp1 = input[1] * (long)CosPi28_64 - input[7] * (long)CosPi4_64;
+ temp2 = input[1] * (long)CosPi4_64 + input[7] * (long)CosPi28_64;
+ step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = input[5] * (long)CosPi12_64 - input[3] * (long)CosPi20_64;
+ temp2 = input[5] * (long)CosPi20_64 + input[3] * (long)CosPi12_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ // stage 2 & stage 3 - even half
+ HighbdIdct4(step1, step1, bd);
+
+ // stage 2 - odd half
+ step2[4] = HighbdWrapLow(step1[4] + step1[5], bd);
+ step2[5] = HighbdWrapLow(step1[4] - step1[5], bd);
+ step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd);
+ step2[7] = HighbdWrapLow(step1[6] + step1[7], bd);
+
+ // stage 3 - odd half
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * (long)CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * (long)CosPi16_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = HighbdWrapLow(step1[0] + step1[7], bd);
+ output[1] = HighbdWrapLow(step1[1] + step1[6], bd);
+ output[2] = HighbdWrapLow(step1[2] + step1[5], bd);
+ output[3] = HighbdWrapLow(step1[3] + step1[4], bd);
+ output[4] = HighbdWrapLow(step1[3] - step1[4], bd);
+ output[5] = HighbdWrapLow(step1[2] - step1[5], bd);
+ output[6] = HighbdWrapLow(step1[1] - step1[6], bd);
+ output[7] = HighbdWrapLow(step1[0] - step1[7], bd);
+ }
+
+ public static void HighbdIdct8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+
+ // First transform rows
+ for (i = 0; i < 8; ++i)
+ {
+ HighbdIdct8(input, outptr, bd);
+ input = input.Slice(8);
+ outptr = outptr.Slice(8);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ HighbdIdct8(tempIn, tempOut, bd);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct8x812Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+
+ // First transform rows
+ // Only first 4 row has non-zero coefs
+ for (i = 0; i < 4; ++i)
+ {
+ HighbdIdct8(input, outptr, bd);
+ input = input.Slice(8);
+ outptr = outptr.Slice(8);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ HighbdIdct8(tempIn, tempOut, bd);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
+ }
+ }
+ }
+
+ public static void vpx_Highbdidct8x8_1_add_c(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ long a1;
+ int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd);
+
+ output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd);
+ a1 = BitUtils.RoundPowerOfTwo(output, 5);
+ for (j = 0; j < 8; ++j)
+ {
+ for (i = 0; i < 8; ++i)
+ {
+ dest[i] = HighbdClipPixelAdd(dest[i], a1, bd);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void HighbdIadst16(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ long s0, s1, s2, s3, s4, s5, s6, s7, s8;
+ long s9, s10, s11, s12, s13, s14, s15;
+ int x0 = input[15];
+ int x1 = input[0];
+ int x2 = input[13];
+ int x3 = input[2];
+ int x4 = input[11];
+ int x5 = input[4];
+ int x6 = input[9];
+ int x7 = input[6];
+ int x8 = input[7];
+ int x9 = input[8];
+ int x10 = input[5];
+ int x11 = input[10];
+ int x12 = input[3];
+ int x13 = input[12];
+ int x14 = input[1];
+ int x15 = input[14];
+ if (DetectInvalidHighbdInput(input, 16) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 16).Fill(0);
+ return;
+ }
+
+ if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15) == 0)
+ {
+ output.Slice(0, 16).Fill(0);
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * (long)CosPi1_64 + x1 * (long)CosPi31_64;
+ s1 = x0 * (long)CosPi31_64 - x1 * (long)CosPi1_64;
+ s2 = x2 * (long)CosPi5_64 + x3 * (long)CosPi27_64;
+ s3 = x2 * (long)CosPi27_64 - x3 * (long)CosPi5_64;
+ s4 = x4 * (long)CosPi9_64 + x5 * (long)CosPi23_64;
+ s5 = x4 * (long)CosPi23_64 - x5 * (long)CosPi9_64;
+ s6 = x6 * (long)CosPi13_64 + x7 * (long)CosPi19_64;
+ s7 = x6 * (long)CosPi19_64 - x7 * (long)CosPi13_64;
+ s8 = x8 * (long)CosPi17_64 + x9 * (long)CosPi15_64;
+ s9 = x8 * (long)CosPi15_64 - x9 * (long)CosPi17_64;
+ s10 = x10 * (long)CosPi21_64 + x11 * (long)CosPi11_64;
+ s11 = x10 * (long)CosPi11_64 - x11 * (long)CosPi21_64;
+ s12 = x12 * (long)CosPi25_64 + x13 * (long)CosPi7_64;
+ s13 = x12 * (long)CosPi7_64 - x13 * (long)CosPi25_64;
+ s14 = x14 * (long)CosPi29_64 + x15 * (long)CosPi3_64;
+ s15 = x14 * (long)CosPi3_64 - x15 * (long)CosPi29_64;
+
+ x0 = HighbdWrapLow(DctConstRoundShift(s0 + s8), bd);
+ x1 = HighbdWrapLow(DctConstRoundShift(s1 + s9), bd);
+ x2 = HighbdWrapLow(DctConstRoundShift(s2 + s10), bd);
+ x3 = HighbdWrapLow(DctConstRoundShift(s3 + s11), bd);
+ x4 = HighbdWrapLow(DctConstRoundShift(s4 + s12), bd);
+ x5 = HighbdWrapLow(DctConstRoundShift(s5 + s13), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s6 + s14), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s7 + s15), bd);
+ x8 = HighbdWrapLow(DctConstRoundShift(s0 - s8), bd);
+ x9 = HighbdWrapLow(DctConstRoundShift(s1 - s9), bd);
+ x10 = HighbdWrapLow(DctConstRoundShift(s2 - s10), bd);
+ x11 = HighbdWrapLow(DctConstRoundShift(s3 - s11), bd);
+ x12 = HighbdWrapLow(DctConstRoundShift(s4 - s12), bd);
+ x13 = HighbdWrapLow(DctConstRoundShift(s5 - s13), bd);
+ x14 = HighbdWrapLow(DctConstRoundShift(s6 - s14), bd);
+ x15 = HighbdWrapLow(DctConstRoundShift(s7 - s15), bd);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * (long)CosPi4_64 + x9 * (long)CosPi28_64;
+ s9 = x8 * (long)CosPi28_64 - x9 * (long)CosPi4_64;
+ s10 = x10 * (long)CosPi20_64 + x11 * (long)CosPi12_64;
+ s11 = x10 * (long)CosPi12_64 - x11 * (long)CosPi20_64;
+ s12 = -x12 * (long)CosPi28_64 + x13 * (long)CosPi4_64;
+ s13 = x12 * (long)CosPi4_64 + x13 * (long)CosPi28_64;
+ s14 = -x14 * (long)CosPi12_64 + x15 * (long)CosPi20_64;
+ s15 = x14 * (long)CosPi20_64 + x15 * (long)CosPi12_64;
+
+ x0 = HighbdWrapLow(s0 + s4, bd);
+ x1 = HighbdWrapLow(s1 + s5, bd);
+ x2 = HighbdWrapLow(s2 + s6, bd);
+ x3 = HighbdWrapLow(s3 + s7, bd);
+ x4 = HighbdWrapLow(s0 - s4, bd);
+ x5 = HighbdWrapLow(s1 - s5, bd);
+ x6 = HighbdWrapLow(s2 - s6, bd);
+ x7 = HighbdWrapLow(s3 - s7, bd);
+ x8 = HighbdWrapLow(DctConstRoundShift(s8 + s12), bd);
+ x9 = HighbdWrapLow(DctConstRoundShift(s9 + s13), bd);
+ x10 = HighbdWrapLow(DctConstRoundShift(s10 + s14), bd);
+ x11 = HighbdWrapLow(DctConstRoundShift(s11 + s15), bd);
+ x12 = HighbdWrapLow(DctConstRoundShift(s8 - s12), bd);
+ x13 = HighbdWrapLow(DctConstRoundShift(s9 - s13), bd);
+ x14 = HighbdWrapLow(DctConstRoundShift(s10 - s14), bd);
+ x15 = HighbdWrapLow(DctConstRoundShift(s11 - s15), bd);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * (long)CosPi8_64 + x5 * (long)CosPi24_64;
+ s5 = x4 * (long)CosPi24_64 - x5 * (long)CosPi8_64;
+ s6 = -x6 * (long)CosPi24_64 + x7 * (long)CosPi8_64;
+ s7 = x6 * (long)CosPi8_64 + x7 * (long)CosPi24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * (long)CosPi8_64 + x13 * (long)CosPi24_64;
+ s13 = x12 * (long)CosPi24_64 - x13 * (long)CosPi8_64;
+ s14 = -x14 * (long)CosPi24_64 + x15 * (long)CosPi8_64;
+ s15 = x14 * (long)CosPi8_64 + x15 * (long)CosPi24_64;
+
+ x0 = HighbdWrapLow(s0 + s2, bd);
+ x1 = HighbdWrapLow(s1 + s3, bd);
+ x2 = HighbdWrapLow(s0 - s2, bd);
+ x3 = HighbdWrapLow(s1 - s3, bd);
+ x4 = HighbdWrapLow(DctConstRoundShift(s4 + s6), bd);
+ x5 = HighbdWrapLow(DctConstRoundShift(s5 + s7), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s4 - s6), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s5 - s7), bd);
+ x8 = HighbdWrapLow(s8 + s10, bd);
+ x9 = HighbdWrapLow(s9 + s11, bd);
+ x10 = HighbdWrapLow(s8 - s10, bd);
+ x11 = HighbdWrapLow(s9 - s11, bd);
+ x12 = HighbdWrapLow(DctConstRoundShift(s12 + s14), bd);
+ x13 = HighbdWrapLow(DctConstRoundShift(s13 + s15), bd);
+ x14 = HighbdWrapLow(DctConstRoundShift(s12 - s14), bd);
+ x15 = HighbdWrapLow(DctConstRoundShift(s13 - s15), bd);
+
+ // stage 4
+ s2 = (long)(-CosPi16_64) * (x2 + x3);
+ s3 = (long)CosPi16_64 * (x2 - x3);
+ s6 = (long)CosPi16_64 * (x6 + x7);
+ s7 = (long)CosPi16_64 * (-x6 + x7);
+ s10 = (long)CosPi16_64 * (x10 + x11);
+ s11 = (long)CosPi16_64 * (-x10 + x11);
+ s14 = (long)(-CosPi16_64) * (x14 + x15);
+ s15 = (long)CosPi16_64 * (x14 - x15);
+
+ x2 = HighbdWrapLow(DctConstRoundShift(s2), bd);
+ x3 = HighbdWrapLow(DctConstRoundShift(s3), bd);
+ x6 = HighbdWrapLow(DctConstRoundShift(s6), bd);
+ x7 = HighbdWrapLow(DctConstRoundShift(s7), bd);
+ x10 = HighbdWrapLow(DctConstRoundShift(s10), bd);
+ x11 = HighbdWrapLow(DctConstRoundShift(s11), bd);
+ x14 = HighbdWrapLow(DctConstRoundShift(s14), bd);
+ x15 = HighbdWrapLow(DctConstRoundShift(s15), bd);
+
+ output[0] = HighbdWrapLow(x0, bd);
+ output[1] = HighbdWrapLow(-x8, bd);
+ output[2] = HighbdWrapLow(x12, bd);
+ output[3] = HighbdWrapLow(-x4, bd);
+ output[4] = HighbdWrapLow(x6, bd);
+ output[5] = HighbdWrapLow(x14, bd);
+ output[6] = HighbdWrapLow(x10, bd);
+ output[7] = HighbdWrapLow(x2, bd);
+ output[8] = HighbdWrapLow(x3, bd);
+ output[9] = HighbdWrapLow(x11, bd);
+ output[10] = HighbdWrapLow(x15, bd);
+ output[11] = HighbdWrapLow(x7, bd);
+ output[12] = HighbdWrapLow(x5, bd);
+ output[13] = HighbdWrapLow(-x13, bd);
+ output[14] = HighbdWrapLow(x9, bd);
+ output[15] = HighbdWrapLow(-x1, bd);
+ }
+
+ public static void HighbdIdct16(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ Span<int> step1 = stackalloc int[16];
+ Span<int> step2 = stackalloc int[16];
+ long temp1, temp2;
+
+ if (DetectInvalidHighbdInput(input, 16) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 16).Fill(0);
+ return;
+ }
+
+ // stage 1
+ step1[0] = input[0 / 2];
+ step1[1] = input[16 / 2];
+ step1[2] = input[8 / 2];
+ step1[3] = input[24 / 2];
+ step1[4] = input[4 / 2];
+ step1[5] = input[20 / 2];
+ step1[6] = input[12 / 2];
+ step1[7] = input[28 / 2];
+ step1[8] = input[2 / 2];
+ step1[9] = input[18 / 2];
+ step1[10] = input[10 / 2];
+ step1[11] = input[26 / 2];
+ step1[12] = input[6 / 2];
+ step1[13] = input[22 / 2];
+ step1[14] = input[14 / 2];
+ step1[15] = input[30 / 2];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * (long)CosPi30_64 - step1[15] * (long)CosPi2_64;
+ temp2 = step1[8] * (long)CosPi2_64 + step1[15] * (long)CosPi30_64;
+ step2[8] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[15] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[9] * (long)CosPi14_64 - step1[14] * (long)CosPi18_64;
+ temp2 = step1[9] * (long)CosPi18_64 + step1[14] * (long)CosPi14_64;
+ step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[10] * (long)CosPi22_64 - step1[13] * (long)CosPi10_64;
+ temp2 = step1[10] * (long)CosPi10_64 + step1[13] * (long)CosPi22_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[11] * (long)CosPi6_64 - step1[12] * (long)CosPi26_64;
+ temp2 = step1[11] * (long)CosPi26_64 + step1[12] * (long)CosPi6_64;
+ step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * (long)CosPi28_64 - step2[7] * (long)CosPi4_64;
+ temp2 = step2[4] * (long)CosPi4_64 + step2[7] * (long)CosPi28_64;
+ step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = step2[5] * (long)CosPi12_64 - step2[6] * (long)CosPi20_64;
+ temp2 = step2[5] * (long)CosPi20_64 + step2[6] * (long)CosPi12_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ step1[8] = HighbdWrapLow(step2[8] + step2[9], bd);
+ step1[9] = HighbdWrapLow(step2[8] - step2[9], bd);
+ step1[10] = HighbdWrapLow(-step2[10] + step2[11], bd);
+ step1[11] = HighbdWrapLow(step2[10] + step2[11], bd);
+ step1[12] = HighbdWrapLow(step2[12] + step2[13], bd);
+ step1[13] = HighbdWrapLow(step2[12] - step2[13], bd);
+ step1[14] = HighbdWrapLow(-step2[14] + step2[15], bd);
+ step1[15] = HighbdWrapLow(step2[14] + step2[15], bd);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * (long)CosPi16_64;
+ temp2 = (step1[0] - step1[1]) * (long)CosPi16_64;
+ step2[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = step1[2] * (long)CosPi24_64 - step1[3] * (long)CosPi8_64;
+ temp2 = step1[2] * (long)CosPi8_64 + step1[3] * (long)CosPi24_64;
+ step2[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[4] = HighbdWrapLow(step1[4] + step1[5], bd);
+ step2[5] = HighbdWrapLow(step1[4] - step1[5], bd);
+ step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd);
+ step2[7] = HighbdWrapLow(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * (long)CosPi8_64 + step1[14] * (long)CosPi24_64;
+ temp2 = step1[9] * (long)CosPi24_64 + step1[14] * (long)CosPi8_64;
+ step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step1[10] * (long)CosPi24_64 - step1[13] * (long)CosPi8_64;
+ temp2 = -step1[10] * (long)CosPi8_64 + step1[13] * (long)CosPi24_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = HighbdWrapLow(step2[0] + step2[3], bd);
+ step1[1] = HighbdWrapLow(step2[1] + step2[2], bd);
+ step1[2] = HighbdWrapLow(step2[1] - step2[2], bd);
+ step1[3] = HighbdWrapLow(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * (long)CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * (long)CosPi16_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = HighbdWrapLow(step2[8] + step2[11], bd);
+ step1[9] = HighbdWrapLow(step2[9] + step2[10], bd);
+ step1[10] = HighbdWrapLow(step2[9] - step2[10], bd);
+ step1[11] = HighbdWrapLow(step2[8] - step2[11], bd);
+ step1[12] = HighbdWrapLow(-step2[12] + step2[15], bd);
+ step1[13] = HighbdWrapLow(-step2[13] + step2[14], bd);
+ step1[14] = HighbdWrapLow(step2[13] + step2[14], bd);
+ step1[15] = HighbdWrapLow(step2[12] + step2[15], bd);
+
+ // stage 6
+ step2[0] = HighbdWrapLow(step1[0] + step1[7], bd);
+ step2[1] = HighbdWrapLow(step1[1] + step1[6], bd);
+ step2[2] = HighbdWrapLow(step1[2] + step1[5], bd);
+ step2[3] = HighbdWrapLow(step1[3] + step1[4], bd);
+ step2[4] = HighbdWrapLow(step1[3] - step1[4], bd);
+ step2[5] = HighbdWrapLow(step1[2] - step1[5], bd);
+ step2[6] = HighbdWrapLow(step1[1] - step1[6], bd);
+ step2[7] = HighbdWrapLow(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * (long)CosPi16_64;
+ temp2 = (step1[10] + step1[13]) * (long)CosPi16_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * (long)CosPi16_64;
+ temp2 = (step1[11] + step1[12]) * (long)CosPi16_64;
+ step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = HighbdWrapLow(step2[0] + step2[15], bd);
+ output[1] = HighbdWrapLow(step2[1] + step2[14], bd);
+ output[2] = HighbdWrapLow(step2[2] + step2[13], bd);
+ output[3] = HighbdWrapLow(step2[3] + step2[12], bd);
+ output[4] = HighbdWrapLow(step2[4] + step2[11], bd);
+ output[5] = HighbdWrapLow(step2[5] + step2[10], bd);
+ output[6] = HighbdWrapLow(step2[6] + step2[9], bd);
+ output[7] = HighbdWrapLow(step2[7] + step2[8], bd);
+ output[8] = HighbdWrapLow(step2[7] - step2[8], bd);
+ output[9] = HighbdWrapLow(step2[6] - step2[9], bd);
+ output[10] = HighbdWrapLow(step2[5] - step2[10], bd);
+ output[11] = HighbdWrapLow(step2[4] - step2[11], bd);
+ output[12] = HighbdWrapLow(step2[3] - step2[12], bd);
+ output[13] = HighbdWrapLow(step2[2] - step2[13], bd);
+ output[14] = HighbdWrapLow(step2[1] - step2[14], bd);
+ output[15] = HighbdWrapLow(step2[0] - step2[15], bd);
+ }
+
+ public static void HighbdIdct16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows
+ for (i = 0; i < 16; ++i)
+ {
+ HighbdIdct16(input, outptr, bd);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ HighbdIdct16(tempIn, tempOut, bd);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct16x1638Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 8x8 area, we only need to calculate first 8 rows here.
+ for (i = 0; i < 8; ++i)
+ {
+ HighbdIdct16(input, outptr, bd);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ Span<ushort> destT = dest;
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ HighbdIdct16(tempIn, tempOut, bd);
+ for (j = 0; j < 16; ++j)
+ {
+ destT[i] = HighbdClipPixelAdd(destT[i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ destT = destT.Slice(stride);
+ }
+ }
+ }
+
+ public static void HighbdIdct16x1610Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 4x4 area, we only need to calculate first 4 rows here.
+ for (i = 0; i < 4; ++i)
+ {
+ HighbdIdct16(input, outptr, bd);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ HighbdIdct16(tempIn, tempOut, bd);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct16x161Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ long a1;
+ int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd);
+
+ output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd);
+ a1 = BitUtils.RoundPowerOfTwo(output, 6);
+ for (j = 0; j < 16; ++j)
+ {
+ for (i = 0; i < 16; ++i)
+ {
+ dest[i] = HighbdClipPixelAdd(dest[i], a1, bd);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+
+ public static void HighbdIdct32(ReadOnlySpan<int> input, Span<int> output, int bd)
+ {
+ Span<int> step1 = stackalloc int[32];
+ Span<int> step2 = stackalloc int[32];
+ long temp1, temp2;
+
+ if (DetectInvalidHighbdInput(input, 32) != 0)
+ {
+ Debug.Assert(false, "invalid highbd txfm input");
+ output.Slice(0, 32).Fill(0);
+ return;
+ }
+
+ // stage 1
+ step1[0] = input[0];
+ step1[1] = input[16];
+ step1[2] = input[8];
+ step1[3] = input[24];
+ step1[4] = input[4];
+ step1[5] = input[20];
+ step1[6] = input[12];
+ step1[7] = input[28];
+ step1[8] = input[2];
+ step1[9] = input[18];
+ step1[10] = input[10];
+ step1[11] = input[26];
+ step1[12] = input[6];
+ step1[13] = input[22];
+ step1[14] = input[14];
+ step1[15] = input[30];
+
+ temp1 = input[1] * (long)CosPi31_64 - input[31] * (long)CosPi1_64;
+ temp2 = input[1] * (long)CosPi1_64 + input[31] * (long)CosPi31_64;
+ step1[16] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[31] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[17] * (long)CosPi15_64 - input[15] * (long)CosPi17_64;
+ temp2 = input[17] * (long)CosPi17_64 + input[15] * (long)CosPi15_64;
+ step1[17] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[30] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[9] * (long)CosPi23_64 - input[23] * (long)CosPi9_64;
+ temp2 = input[9] * (long)CosPi9_64 + input[23] * (long)CosPi23_64;
+ step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[25] * (long)CosPi7_64 - input[7] * (long)CosPi25_64;
+ temp2 = input[25] * (long)CosPi25_64 + input[7] * (long)CosPi7_64;
+ step1[19] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[28] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[5] * (long)CosPi27_64 - input[27] * (long)CosPi5_64;
+ temp2 = input[5] * (long)CosPi5_64 + input[27] * (long)CosPi27_64;
+ step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[21] * (long)CosPi11_64 - input[11] * (long)CosPi21_64;
+ temp2 = input[21] * (long)CosPi21_64 + input[11] * (long)CosPi11_64;
+ step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[13] * (long)CosPi19_64 - input[19] * (long)CosPi13_64;
+ temp2 = input[13] * (long)CosPi13_64 + input[19] * (long)CosPi19_64;
+ step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = input[29] * (long)CosPi3_64 - input[3] * (long)CosPi29_64;
+ temp2 = input[29] * (long)CosPi29_64 + input[3] * (long)CosPi3_64;
+ step1[23] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[24] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * (long)CosPi30_64 - step1[15] * (long)CosPi2_64;
+ temp2 = step1[8] * (long)CosPi2_64 + step1[15] * (long)CosPi30_64;
+ step2[8] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[15] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[9] * (long)CosPi14_64 - step1[14] * (long)CosPi18_64;
+ temp2 = step1[9] * (long)CosPi18_64 + step1[14] * (long)CosPi14_64;
+ step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[10] * (long)CosPi22_64 - step1[13] * (long)CosPi10_64;
+ temp2 = step1[10] * (long)CosPi10_64 + step1[13] * (long)CosPi22_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ temp1 = step1[11] * (long)CosPi6_64 - step1[12] * (long)CosPi26_64;
+ temp2 = step1[11] * (long)CosPi26_64 + step1[12] * (long)CosPi6_64;
+ step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ step2[16] = HighbdWrapLow(step1[16] + step1[17], bd);
+ step2[17] = HighbdWrapLow(step1[16] - step1[17], bd);
+ step2[18] = HighbdWrapLow(-step1[18] + step1[19], bd);
+ step2[19] = HighbdWrapLow(step1[18] + step1[19], bd);
+ step2[20] = HighbdWrapLow(step1[20] + step1[21], bd);
+ step2[21] = HighbdWrapLow(step1[20] - step1[21], bd);
+ step2[22] = HighbdWrapLow(-step1[22] + step1[23], bd);
+ step2[23] = HighbdWrapLow(step1[22] + step1[23], bd);
+ step2[24] = HighbdWrapLow(step1[24] + step1[25], bd);
+ step2[25] = HighbdWrapLow(step1[24] - step1[25], bd);
+ step2[26] = HighbdWrapLow(-step1[26] + step1[27], bd);
+ step2[27] = HighbdWrapLow(step1[26] + step1[27], bd);
+ step2[28] = HighbdWrapLow(step1[28] + step1[29], bd);
+ step2[29] = HighbdWrapLow(step1[28] - step1[29], bd);
+ step2[30] = HighbdWrapLow(-step1[30] + step1[31], bd);
+ step2[31] = HighbdWrapLow(step1[30] + step1[31], bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * (long)CosPi28_64 - step2[7] * (long)CosPi4_64;
+ temp2 = step2[4] * (long)CosPi4_64 + step2[7] * (long)CosPi28_64;
+ step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = step2[5] * (long)CosPi12_64 - step2[6] * (long)CosPi20_64;
+ temp2 = step2[5] * (long)CosPi20_64 + step2[6] * (long)CosPi12_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+
+ step1[8] = HighbdWrapLow(step2[8] + step2[9], bd);
+ step1[9] = HighbdWrapLow(step2[8] - step2[9], bd);
+ step1[10] = HighbdWrapLow(-step2[10] + step2[11], bd);
+ step1[11] = HighbdWrapLow(step2[10] + step2[11], bd);
+ step1[12] = HighbdWrapLow(step2[12] + step2[13], bd);
+ step1[13] = HighbdWrapLow(step2[12] - step2[13], bd);
+ step1[14] = HighbdWrapLow(-step2[14] + step2[15], bd);
+ step1[15] = HighbdWrapLow(step2[14] + step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[31] = step2[31];
+ temp1 = -step2[17] * (long)CosPi4_64 + step2[30] * (long)CosPi28_64;
+ temp2 = step2[17] * (long)CosPi28_64 + step2[30] * (long)CosPi4_64;
+ step1[17] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[30] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step2[18] * (long)CosPi28_64 - step2[29] * (long)CosPi4_64;
+ temp2 = -step2[18] * (long)CosPi4_64 + step2[29] * (long)CosPi28_64;
+ step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[19] = step2[19];
+ step1[20] = step2[20];
+ temp1 = -step2[21] * (long)CosPi20_64 + step2[26] * (long)CosPi12_64;
+ temp2 = step2[21] * (long)CosPi12_64 + step2[26] * (long)CosPi20_64;
+ step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step2[22] * (long)CosPi12_64 - step2[25] * (long)CosPi20_64;
+ temp2 = -step2[22] * (long)CosPi20_64 + step2[25] * (long)CosPi12_64;
+ step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[27] = step2[27];
+ step1[28] = step2[28];
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * (long)CosPi16_64;
+ temp2 = (step1[0] - step1[1]) * (long)CosPi16_64;
+ step2[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = step1[2] * (long)CosPi24_64 - step1[3] * (long)CosPi8_64;
+ temp2 = step1[2] * (long)CosPi8_64 + step1[3] * (long)CosPi24_64;
+ step2[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[4] = HighbdWrapLow(step1[4] + step1[5], bd);
+ step2[5] = HighbdWrapLow(step1[4] - step1[5], bd);
+ step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd);
+ step2[7] = HighbdWrapLow(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * (long)CosPi8_64 + step1[14] * (long)CosPi24_64;
+ temp2 = step1[9] * (long)CosPi24_64 + step1[14] * (long)CosPi8_64;
+ step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step1[10] * (long)CosPi24_64 - step1[13] * (long)CosPi8_64;
+ temp2 = -step1[10] * (long)CosPi8_64 + step1[13] * (long)CosPi24_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ step2[16] = HighbdWrapLow(step1[16] + step1[19], bd);
+ step2[17] = HighbdWrapLow(step1[17] + step1[18], bd);
+ step2[18] = HighbdWrapLow(step1[17] - step1[18], bd);
+ step2[19] = HighbdWrapLow(step1[16] - step1[19], bd);
+ step2[20] = HighbdWrapLow(-step1[20] + step1[23], bd);
+ step2[21] = HighbdWrapLow(-step1[21] + step1[22], bd);
+ step2[22] = HighbdWrapLow(step1[21] + step1[22], bd);
+ step2[23] = HighbdWrapLow(step1[20] + step1[23], bd);
+
+ step2[24] = HighbdWrapLow(step1[24] + step1[27], bd);
+ step2[25] = HighbdWrapLow(step1[25] + step1[26], bd);
+ step2[26] = HighbdWrapLow(step1[25] - step1[26], bd);
+ step2[27] = HighbdWrapLow(step1[24] - step1[27], bd);
+ step2[28] = HighbdWrapLow(-step1[28] + step1[31], bd);
+ step2[29] = HighbdWrapLow(-step1[29] + step1[30], bd);
+ step2[30] = HighbdWrapLow(step1[29] + step1[30], bd);
+ step2[31] = HighbdWrapLow(step1[28] + step1[31], bd);
+
+ // stage 5
+ step1[0] = HighbdWrapLow(step2[0] + step2[3], bd);
+ step1[1] = HighbdWrapLow(step2[1] + step2[2], bd);
+ step1[2] = HighbdWrapLow(step2[1] - step2[2], bd);
+ step1[3] = HighbdWrapLow(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * (long)CosPi16_64;
+ temp2 = (step2[5] + step2[6]) * (long)CosPi16_64;
+ step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = HighbdWrapLow(step2[8] + step2[11], bd);
+ step1[9] = HighbdWrapLow(step2[9] + step2[10], bd);
+ step1[10] = HighbdWrapLow(step2[9] - step2[10], bd);
+ step1[11] = HighbdWrapLow(step2[8] - step2[11], bd);
+ step1[12] = HighbdWrapLow(-step2[12] + step2[15], bd);
+ step1[13] = HighbdWrapLow(-step2[13] + step2[14], bd);
+ step1[14] = HighbdWrapLow(step2[13] + step2[14], bd);
+ step1[15] = HighbdWrapLow(step2[12] + step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ temp1 = -step2[18] * (long)CosPi8_64 + step2[29] * (long)CosPi24_64;
+ temp2 = step2[18] * (long)CosPi24_64 + step2[29] * (long)CosPi8_64;
+ step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step2[19] * (long)CosPi8_64 + step2[28] * (long)CosPi24_64;
+ temp2 = step2[19] * (long)CosPi24_64 + step2[28] * (long)CosPi8_64;
+ step1[19] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[28] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step2[20] * (long)CosPi24_64 - step2[27] * (long)CosPi8_64;
+ temp2 = -step2[20] * (long)CosPi8_64 + step2[27] * (long)CosPi24_64;
+ step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = -step2[21] * (long)CosPi24_64 - step2[26] * (long)CosPi8_64;
+ temp2 = -step2[21] * (long)CosPi8_64 + step2[26] * (long)CosPi24_64;
+ step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[22] = step2[22];
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[25] = step2[25];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // stage 6
+ step2[0] = HighbdWrapLow(step1[0] + step1[7], bd);
+ step2[1] = HighbdWrapLow(step1[1] + step1[6], bd);
+ step2[2] = HighbdWrapLow(step1[2] + step1[5], bd);
+ step2[3] = HighbdWrapLow(step1[3] + step1[4], bd);
+ step2[4] = HighbdWrapLow(step1[3] - step1[4], bd);
+ step2[5] = HighbdWrapLow(step1[2] - step1[5], bd);
+ step2[6] = HighbdWrapLow(step1[1] - step1[6], bd);
+ step2[7] = HighbdWrapLow(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * (long)CosPi16_64;
+ temp2 = (step1[10] + step1[13]) * (long)CosPi16_64;
+ step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * (long)CosPi16_64;
+ temp2 = (step1[11] + step1[12]) * (long)CosPi16_64;
+ step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ step2[16] = HighbdWrapLow(step1[16] + step1[23], bd);
+ step2[17] = HighbdWrapLow(step1[17] + step1[22], bd);
+ step2[18] = HighbdWrapLow(step1[18] + step1[21], bd);
+ step2[19] = HighbdWrapLow(step1[19] + step1[20], bd);
+ step2[20] = HighbdWrapLow(step1[19] - step1[20], bd);
+ step2[21] = HighbdWrapLow(step1[18] - step1[21], bd);
+ step2[22] = HighbdWrapLow(step1[17] - step1[22], bd);
+ step2[23] = HighbdWrapLow(step1[16] - step1[23], bd);
+
+ step2[24] = HighbdWrapLow(-step1[24] + step1[31], bd);
+ step2[25] = HighbdWrapLow(-step1[25] + step1[30], bd);
+ step2[26] = HighbdWrapLow(-step1[26] + step1[29], bd);
+ step2[27] = HighbdWrapLow(-step1[27] + step1[28], bd);
+ step2[28] = HighbdWrapLow(step1[27] + step1[28], bd);
+ step2[29] = HighbdWrapLow(step1[26] + step1[29], bd);
+ step2[30] = HighbdWrapLow(step1[25] + step1[30], bd);
+ step2[31] = HighbdWrapLow(step1[24] + step1[31], bd);
+
+ // stage 7
+ step1[0] = HighbdWrapLow(step2[0] + step2[15], bd);
+ step1[1] = HighbdWrapLow(step2[1] + step2[14], bd);
+ step1[2] = HighbdWrapLow(step2[2] + step2[13], bd);
+ step1[3] = HighbdWrapLow(step2[3] + step2[12], bd);
+ step1[4] = HighbdWrapLow(step2[4] + step2[11], bd);
+ step1[5] = HighbdWrapLow(step2[5] + step2[10], bd);
+ step1[6] = HighbdWrapLow(step2[6] + step2[9], bd);
+ step1[7] = HighbdWrapLow(step2[7] + step2[8], bd);
+ step1[8] = HighbdWrapLow(step2[7] - step2[8], bd);
+ step1[9] = HighbdWrapLow(step2[6] - step2[9], bd);
+ step1[10] = HighbdWrapLow(step2[5] - step2[10], bd);
+ step1[11] = HighbdWrapLow(step2[4] - step2[11], bd);
+ step1[12] = HighbdWrapLow(step2[3] - step2[12], bd);
+ step1[13] = HighbdWrapLow(step2[2] - step2[13], bd);
+ step1[14] = HighbdWrapLow(step2[1] - step2[14], bd);
+ step1[15] = HighbdWrapLow(step2[0] - step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ step1[18] = step2[18];
+ step1[19] = step2[19];
+ temp1 = (-step2[20] + step2[27]) * (long)CosPi16_64;
+ temp2 = (step2[20] + step2[27]) * (long)CosPi16_64;
+ step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = (-step2[21] + step2[26]) * (long)CosPi16_64;
+ temp2 = (step2[21] + step2[26]) * (long)CosPi16_64;
+ step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = (-step2[22] + step2[25]) * (long)CosPi16_64;
+ temp2 = (step2[22] + step2[25]) * (long)CosPi16_64;
+ step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ temp1 = (-step2[23] + step2[24]) * (long)CosPi16_64;
+ temp2 = (step2[23] + step2[24]) * (long)CosPi16_64;
+ step1[23] = HighbdWrapLow(DctConstRoundShift(temp1), bd);
+ step1[24] = HighbdWrapLow(DctConstRoundShift(temp2), bd);
+ step1[28] = step2[28];
+ step1[29] = step2[29];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // final stage
+ output[0] = HighbdWrapLow(step1[0] + step1[31], bd);
+ output[1] = HighbdWrapLow(step1[1] + step1[30], bd);
+ output[2] = HighbdWrapLow(step1[2] + step1[29], bd);
+ output[3] = HighbdWrapLow(step1[3] + step1[28], bd);
+ output[4] = HighbdWrapLow(step1[4] + step1[27], bd);
+ output[5] = HighbdWrapLow(step1[5] + step1[26], bd);
+ output[6] = HighbdWrapLow(step1[6] + step1[25], bd);
+ output[7] = HighbdWrapLow(step1[7] + step1[24], bd);
+ output[8] = HighbdWrapLow(step1[8] + step1[23], bd);
+ output[9] = HighbdWrapLow(step1[9] + step1[22], bd);
+ output[10] = HighbdWrapLow(step1[10] + step1[21], bd);
+ output[11] = HighbdWrapLow(step1[11] + step1[20], bd);
+ output[12] = HighbdWrapLow(step1[12] + step1[19], bd);
+ output[13] = HighbdWrapLow(step1[13] + step1[18], bd);
+ output[14] = HighbdWrapLow(step1[14] + step1[17], bd);
+ output[15] = HighbdWrapLow(step1[15] + step1[16], bd);
+ output[16] = HighbdWrapLow(step1[15] - step1[16], bd);
+ output[17] = HighbdWrapLow(step1[14] - step1[17], bd);
+ output[18] = HighbdWrapLow(step1[13] - step1[18], bd);
+ output[19] = HighbdWrapLow(step1[12] - step1[19], bd);
+ output[20] = HighbdWrapLow(step1[11] - step1[20], bd);
+ output[21] = HighbdWrapLow(step1[10] - step1[21], bd);
+ output[22] = HighbdWrapLow(step1[9] - step1[22], bd);
+ output[23] = HighbdWrapLow(step1[8] - step1[23], bd);
+ output[24] = HighbdWrapLow(step1[7] - step1[24], bd);
+ output[25] = HighbdWrapLow(step1[6] - step1[25], bd);
+ output[26] = HighbdWrapLow(step1[5] - step1[26], bd);
+ output[27] = HighbdWrapLow(step1[4] - step1[27], bd);
+ output[28] = HighbdWrapLow(step1[3] - step1[28], bd);
+ output[29] = HighbdWrapLow(step1[2] - step1[29], bd);
+ output[30] = HighbdWrapLow(step1[1] - step1[30], bd);
+ output[31] = HighbdWrapLow(step1[0] - step1[31], bd);
+ }
+
+ public static void HighbdIdct32x321024Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ for (i = 0; i < 32; ++i)
+ {
+ int zeroCoeff = 0;
+ for (j = 0; j < 32; ++j)
+ {
+ zeroCoeff |= input[j];
+ }
+
+ if (zeroCoeff != 0)
+ {
+ HighbdIdct32(input, outptr, bd);
+ }
+ else
+ {
+ outptr.Slice(0, 32).Fill(0);
+ }
+
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ HighbdIdct32(tempIn, tempOut, bd);
+ for (j = 0; j < 32; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct32x32135Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ // Only upper-left 16x16 has non-zero coeff
+ for (i = 0; i < 16; ++i)
+ {
+ HighbdIdct32(input, outptr, bd);
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ Span<ushort> destT = dest;
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ HighbdIdct32(tempIn, tempOut, bd);
+ for (j = 0; j < 32; ++j)
+ {
+ destT[i] = HighbdClipPixelAdd(destT[i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ destT = destT.Slice(stride);
+ }
+ }
+ }
+
+ public static void HighbdIdct32x3234Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[32 * 32];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[32];
+ Span<int> tempOut = stackalloc int[32];
+
+ // Rows
+ // Only upper-left 8x8 has non-zero coeff
+ for (i = 0; i < 8; ++i)
+ {
+ HighbdIdct32(input, outptr, bd);
+ input = input.Slice(32);
+ outptr = outptr.Slice(32);
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i)
+ {
+ for (j = 0; j < 32; ++j)
+ {
+ tempIn[j] = output[j * 32 + i];
+ }
+
+ HighbdIdct32(tempIn, tempOut, bd);
+ for (j = 0; j < 32; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ }
+ }
+ }
+
+ public static void HighbdIdct32x321Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int bd)
+ {
+ int i, j;
+ int a1;
+ int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd);
+
+ output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd);
+ a1 = BitUtils.RoundPowerOfTwo(output, 6);
+
+ for (j = 0; j < 32; ++j)
+ {
+ for (i = 0; i < 32; ++i)
+ {
+ dest[i] = HighbdClipPixelAdd(dest[i], a1, bd);
+ }
+
+ dest = dest.Slice(stride);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs
new file mode 100644
index 00000000..0d5e8b6e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs
@@ -0,0 +1,73 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class Prob
+ {
+ public const int MaxProb = 255;
+
+ private static byte GetProb(uint num, uint den)
+ {
+ Debug.Assert(den != 0);
+ {
+ int p = (int)(((ulong)num * 256 + (den >> 1)) / den);
+ // (p > 255) ? 255 : (p < 1) ? 1 : p;
+ int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0);
+ return (byte)clippedProb;
+ }
+ }
+
+ /* This function assumes prob1 and prob2 are already within [1,255] range. */
+ public static byte WeightedProb(int prob1, int prob2, int factor)
+ {
+ return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8);
+ }
+
+ // MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT;
+ private static readonly uint[] CountToUpdateFactor = new uint[]
+ {
+ 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64,
+ 70, 76, 83, 89, 96, 102, 108, 115, 121, 128
+ };
+
+ private const int ModeMvCountSat = 20;
+
+ public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1)
+ {
+ uint den = ct0 + ct1;
+ if (den == 0)
+ {
+ return preProb;
+ }
+ else
+ {
+ uint count = Math.Min(den, ModeMvCountSat);
+ uint factor = CountToUpdateFactor[(int)count];
+ byte prob = GetProb(ct0, den);
+ return WeightedProb(preProb, prob, (int)factor);
+ }
+ }
+
+ private static uint TreeMergeProbsImpl(
+ uint i,
+ sbyte[] tree,
+ ReadOnlySpan<byte> preProbs,
+ ReadOnlySpan<uint> counts,
+ Span<byte> probs)
+ {
+ int l = tree[i];
+ uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs);
+ int r = tree[i + 1];
+ uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs);
+ probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount);
+ return leftCount + rightCount;
+ }
+
+ public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan<byte> preProbs, ReadOnlySpan<uint> counts, Span<byte> probs)
+ {
+ TreeMergeProbsImpl(0, tree, preProbs, counts, probs);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs
new file mode 100644
index 00000000..94aa6979
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs
@@ -0,0 +1,237 @@
+using System;
+using System.Buffers.Binary;
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal struct Reader
+ {
+ private static readonly byte[] Norm = new byte[]
+ {
+ 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ private const int BdValueSize = sizeof(ulong) * 8;
+
+ // This is meant to be a large, positive constant that can still be efficiently
+ // loaded as an immediate (on platforms like ARM, for example).
+ // Even relatively modest values like 100 would work fine.
+ private const int LotsOfBits = 0x40000000;
+
+ public ulong Value;
+ public uint Range;
+ public int Count;
+ private ArrayPtr<byte> _buffer;
+
+ public bool Init(ArrayPtr<byte> buffer, int size)
+ {
+ if (size != 0 && buffer.IsNull)
+ {
+ return true;
+ }
+ else
+ {
+ _buffer = new ArrayPtr<byte>(ref buffer[0], size);
+ Value = 0;
+ Count = -8;
+ Range = 255;
+ Fill();
+ return ReadBit() != 0; // Marker bit
+ }
+ }
+
+ private void Fill()
+ {
+ ReadOnlySpan<byte> buffer = _buffer.ToSpan();
+ ReadOnlySpan<byte> bufferStart = buffer;
+ ulong value = Value;
+ int count = Count;
+ ulong bytesLeft = (ulong)buffer.Length;
+ ulong bitsLeft = bytesLeft * 8;
+ int shift = BdValueSize - 8 - (count + 8);
+
+ if (bitsLeft > BdValueSize)
+ {
+ int bits = (shift & unchecked((int)0xfffffff8)) + 8;
+ ulong nv;
+ ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer);
+ nv = bigEndianValues >> (BdValueSize - bits);
+ count += bits;
+ buffer = buffer.Slice(bits >> 3);
+ value = Value | (nv << (shift & 0x7));
+ }
+ else
+ {
+ int bitsOver = shift + 8 - (int)bitsLeft;
+ int loopEnd = 0;
+ if (bitsOver >= 0)
+ {
+ count += LotsOfBits;
+ loopEnd = bitsOver;
+ }
+
+ if (bitsOver < 0 || bitsLeft != 0)
+ {
+ while (shift >= loopEnd)
+ {
+ count += 8;
+ value |= (ulong)buffer[0] << shift;
+ buffer = buffer.Slice(1);
+ shift -= 8;
+ }
+ }
+ }
+
+ // NOTE: Variable 'buffer' may not relate to '_buffer' after decryption,
+ // so we increase '_buffer' by the amount that 'buffer' moved, rather than
+ // assign 'buffer' to '_buffer'.
+ _buffer = _buffer.Slice(bufferStart.Length - buffer.Length);
+ Value = value;
+ Count = count;
+ }
+
+ public bool HasError()
+ {
+ // Check if we have reached the end of the buffer.
+ //
+ // Variable 'count' stores the number of bits in the 'value' buffer, minus
+ // 8. The top byte is part of the algorithm, and the remainder is buffered
+ // to be shifted into it. So if count == 8, the top 16 bits of 'value' are
+ // occupied, 8 for the algorithm and 8 in the buffer.
+ //
+ // When reading a byte from the user's buffer, count is filled with 8 and
+ // one byte is filled into the value buffer. When we reach the end of the
+ // data, count is additionally filled with LotsOfBits. So when
+ // count == LotsOfBits - 1, the user's data has been exhausted.
+ //
+ // 1 if we have tried to decode bits after the end of stream was encountered.
+ // 0 No error.
+ return Count > BdValueSize && Count < LotsOfBits;
+ }
+
+ public int Read(int prob)
+ {
+ uint bit = 0;
+ ulong value;
+ ulong bigsplit;
+ int count;
+ uint range;
+ uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8;
+
+ if (Count < 0)
+ {
+ Fill();
+ }
+
+ value = Value;
+ count = Count;
+
+ bigsplit = (ulong)split << (BdValueSize - 8);
+
+ range = split;
+
+ if (value >= bigsplit)
+ {
+ range = Range - split;
+ value -= bigsplit;
+ bit = 1;
+ }
+
+ {
+ int shift = Norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+ }
+ Value = value;
+ Count = count;
+ Range = range;
+
+ return (int)bit;
+ }
+
+ public int ReadBit()
+ {
+ return Read(128); // vpx_prob_half
+ }
+
+ public int ReadLiteral(int bits)
+ {
+ int literal = 0, bit;
+
+ for (bit = bits - 1; bit >= 0; bit--)
+ {
+ literal |= ReadBit() << bit;
+ }
+
+ return literal;
+ }
+
+ public int ReadTree(ReadOnlySpan<sbyte> tree, ReadOnlySpan<byte> probs)
+ {
+ sbyte i = 0;
+
+ while ((i = tree[i + Read(probs[i >> 1])]) > 0)
+ {
+ continue;
+ }
+
+ return -i;
+ }
+
+ public int ReadBool(int prob, ref ulong value, ref int count, ref uint range)
+ {
+ uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8;
+ ulong bigsplit = (ulong)split << (BdValueSize - 8);
+
+ if (count < 0)
+ {
+ Value = value;
+ Count = count;
+ Fill();
+ value = Value;
+ count = Count;
+ }
+
+ if (value >= bigsplit)
+ {
+ range = range - split;
+ value = value - bigsplit;
+ {
+ int shift = Norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+ }
+ return 1;
+ }
+ range = split;
+ {
+ int shift = Norm[range];
+ range <<= shift;
+ value <<= shift;
+ count -= shift;
+ }
+ return 0;
+ }
+
+ public ArrayPtr<byte> FindEnd()
+ {
+ // Find the end of the coded buffer
+ while (Count > 8 && Count < BdValueSize)
+ {
+ Count -= 8;
+ _buffer = _buffer.Slice(-1);
+ }
+ return _buffer;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs
new file mode 100644
index 00000000..e041f2e0
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs
@@ -0,0 +1,54 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp
+{
+ internal static class TxfmCommon
+ {
+ // Constants used by all idct/dct functions
+ public const int DctConstBits = 14;
+ public const int DctConstRounding = 1 << (DctConstBits - 1);
+
+ public const int UnitQuantShift = 2;
+ public const int UnitQuantFactor = 1 << UnitQuantShift;
+
+ // Constants:
+ // for (int i = 1; i < 32; ++i)
+ // Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64)));
+ // Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64)
+ public const short CosPi1_64 = 16364;
+ public const short CosPi2_64 = 16305;
+ public const short CosPi3_64 = 16207;
+ public const short CosPi4_64 = 16069;
+ public const short CosPi5_64 = 15893;
+ public const short CosPi6_64 = 15679;
+ public const short CosPi7_64 = 15426;
+ public const short CosPi8_64 = 15137;
+ public const short CosPi9_64 = 14811;
+ public const short CosPi10_64 = 14449;
+ public const short CosPi11_64 = 14053;
+ public const short CosPi12_64 = 13623;
+ public const short CosPi13_64 = 13160;
+ public const short CosPi14_64 = 12665;
+ public const short CosPi15_64 = 12140;
+ public const short CosPi16_64 = 11585;
+ public const short CosPi17_64 = 11003;
+ public const short CosPi18_64 = 10394;
+ public const short CosPi19_64 = 9760;
+ public const short CosPi20_64 = 9102;
+ public const short CosPi21_64 = 8423;
+ public const short CosPi22_64 = 7723;
+ public const short CosPi23_64 = 7005;
+ public const short CosPi24_64 = 6270;
+ public const short CosPi25_64 = 5520;
+ public const short CosPi26_64 = 4756;
+ public const short CosPi27_64 = 3981;
+ public const short CosPi28_64 = 3196;
+ public const short CosPi29_64 = 2404;
+ public const short CosPi30_64 = 1606;
+ public const short CosPi31_64 = 804;
+
+ // 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3
+ public const short SinPi1_9 = 5283;
+ public const short SinPi2_9 = 9929;
+ public const short SinPi3_9 = 13377;
+ public const short SinPi4_9 = 15212;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs
new file mode 100644
index 00000000..9fa5842a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs
@@ -0,0 +1,536 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using System;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class Idct
+ {
+ private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
+ private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
+
+ private struct Transform2D
+ {
+ public Transform1D Cols, Rows; // Vertical and horizontal
+
+ public Transform2D(Transform1D cols, Transform1D rows)
+ {
+ Cols = cols;
+ Rows = rows;
+ }
+ }
+
+ private struct HighbdTransform2D
+ {
+ public HighbdTransform1D Cols, Rows; // Vertical and horizontal
+
+ public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
+ {
+ Cols = cols;
+ Rows = rows;
+ }
+ }
+
+ private static readonly Transform2D[] Iht4 = new Transform2D[]
+ {
+ new Transform2D(Idct4, Idct4), // DCT_DCT = 0
+ new Transform2D(Iadst4, Idct4), // ADST_DCT = 1
+ new Transform2D(Idct4, Iadst4), // DCT_ADST = 2
+ new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3
+ };
+
+ public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[4 * 4];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[4];
+ Span<int> tempOut = stackalloc int[4];
+
+ // Inverse transform row vectors
+ for (i = 0; i < 4; ++i)
+ {
+ Iht4[txType].Rows(input, outptr);
+ input = input.Slice(4);
+ outptr = outptr.Slice(4);
+ }
+
+ // Inverse transform column vectors
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ tempIn[j] = output[j * 4 + i];
+ }
+
+ Iht4[txType].Cols(tempIn, tempOut);
+ for (j = 0; j < 4; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
+ }
+ }
+ }
+
+ private static readonly Transform2D[] Iht8 = new Transform2D[]
+ {
+ new Transform2D(Idct8, Idct8), // DCT_DCT = 0
+ new Transform2D(Iadst8, Idct8), // ADST_DCT = 1
+ new Transform2D(Idct8, Iadst8), // DCT_ADST = 2
+ new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3
+ };
+
+ public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+ Transform2D ht = Iht8[txType];
+
+ // Inverse transform row vectors
+ for (i = 0; i < 8; ++i)
+ {
+ ht.Rows(input, outptr);
+ input = input.Slice(8);
+ outptr = outptr.Slice(8);
+ }
+
+ // Inverse transform column vectors
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ ht.Cols(tempIn, tempOut);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
+ }
+ }
+ }
+
+ private static readonly Transform2D[] Iht16 = new Transform2D[]
+ {
+ new Transform2D(Idct16, Idct16), // DCT_DCT = 0
+ new Transform2D(Iadst16, Idct16), // ADST_DCT = 1
+ new Transform2D(Idct16, Iadst16), // DCT_ADST = 2
+ new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3
+ };
+
+ public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+ Transform2D ht = Iht16[txType];
+
+ // Rows
+ for (i = 0; i < 16; ++i)
+ {
+ ht.Rows(input, outptr);
+ input = input.Slice(16);
+ outptr = outptr.Slice(16);
+ }
+
+ // Columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ ht.Cols(tempIn, tempOut);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
+ }
+ }
+ }
+
+ // Idct
+ public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ if (eob > 1)
+ {
+ Idct4x416Add(input, dest, stride);
+ }
+ else
+ {
+ Idct4x41Add(input, dest, stride);
+ }
+ }
+
+ public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ if (eob > 1)
+ {
+ Iwht4x416Add(input, dest, stride);
+ }
+ else
+ {
+ Iwht4x41Add(input, dest, stride);
+ }
+ }
+
+ public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ // If dc is 1, then input[0] is the reconstructed value, do not need
+ // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to decide what to do.
+ if (eob == 1)
+ {
+ // DC only DCT coefficient
+ Idct8x81Add(input, dest, stride);
+ }
+ else if (eob <= 12)
+ {
+ Idct8x812Add(input, dest, stride);
+ }
+ else
+ {
+ Idct8x864Add(input, dest, stride);
+ }
+ }
+
+ public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ /* The calculation can be simplified if there are not many non-zero dct
+ * coefficients. Use eobs to separate different cases. */
+ if (eob == 1) /* DC only DCT coefficient. */
+ {
+ Idct16x161Add(input, dest, stride);
+ }
+ else if (eob <= 10)
+ {
+ Idct16x1610Add(input, dest, stride);
+ }
+ else if (eob <= 38)
+ {
+ Idct16x1638Add(input, dest, stride);
+ }
+ else
+ {
+ Idct16x16256Add(input, dest, stride);
+ }
+ }
+
+ public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ if (eob == 1)
+ {
+ Idct32x321Add(input, dest, stride);
+ }
+ else if (eob <= 34)
+ {
+ // Non-zero coeff only in upper-left 8x8
+ Idct32x3234Add(input, dest, stride);
+ }
+ else if (eob <= 135)
+ {
+ // Non-zero coeff only in upper-left 16x16
+ Idct32x32135Add(input, dest, stride);
+ }
+ else
+ {
+ Idct32x321024Add(input, dest, stride);
+ }
+ }
+
+ // Iht
+ public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ if (txType == TxType.DctDct)
+ {
+ Idct4x4Add(input, dest, stride, eob);
+ }
+ else
+ {
+ Iht4x416Add(input, dest, stride, (int)txType);
+ }
+ }
+
+ public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
+ {
+ if (txType == TxType.DctDct)
+ {
+ Idct8x8Add(input, dest, stride, eob);
+ }
+ else
+ {
+ Iht8x864Add(input, dest, stride, (int)txType);
+ }
+ }
+
+ public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
+ int stride, int eob)
+ {
+ if (txType == TxType.DctDct)
+ {
+ Idct16x16Add(input, dest, stride, eob);
+ }
+ else
+ {
+ Iht16x16256Add(input, dest, stride, (int)txType);
+ }
+ }
+
+ private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[]
+ {
+ new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0
+ new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
+ new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
+ new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3
+ };
+
+ public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[4 * 4];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[4];
+ Span<int> tempOut = stackalloc int[4];
+
+ // Inverse transform row vectors.
+ for (i = 0; i < 4; ++i)
+ {
+ HighbdIht4[txType].Rows(input, outptr, bd);
+ input = input.Slice(4);
+ outptr = outptr.Slice(4);
+ }
+
+ // Inverse transform column vectors.
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ tempIn[j] = output[j * 4 + i];
+ }
+
+ HighbdIht4[txType].Cols(tempIn, tempOut, bd);
+ for (j = 0; j < 4; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
+ }
+ }
+ }
+
+ private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[]
+ {
+ new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0
+ new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
+ new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
+ new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3
+ };
+
+ public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[8 * 8];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[8];
+ Span<int> tempOut = stackalloc int[8];
+ HighbdTransform2D ht = HighIht8[txType];
+
+ // Inverse transform row vectors.
+ for (i = 0; i < 8; ++i)
+ {
+ ht.Rows(input, outptr, bd);
+ input = input.Slice(8);
+ outptr = output.Slice(8);
+ }
+
+ // Inverse transform column vectors.
+ for (i = 0; i < 8; ++i)
+ {
+ for (j = 0; j < 8; ++j)
+ {
+ tempIn[j] = output[j * 8 + i];
+ }
+
+ ht.Cols(tempIn, tempOut, bd);
+ for (j = 0; j < 8; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
+ }
+ }
+ }
+
+ private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[]
+ {
+ new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0
+ new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
+ new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
+ new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3
+ };
+
+ public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
+ {
+ int i, j;
+ Span<int> output = stackalloc int[16 * 16];
+ Span<int> outptr = output;
+ Span<int> tempIn = stackalloc int[16];
+ Span<int> tempOut = stackalloc int[16];
+ HighbdTransform2D ht = HighIht16[txType];
+
+ // Rows
+ for (i = 0; i < 16; ++i)
+ {
+ ht.Rows(input, outptr, bd);
+ input = input.Slice(16);
+ outptr = output.Slice(16);
+ }
+
+ // Columns
+ for (i = 0; i < 16; ++i)
+ {
+ for (j = 0; j < 16; ++j)
+ {
+ tempIn[j] = output[j * 16 + i];
+ }
+
+ ht.Cols(tempIn, tempOut, bd);
+ for (j = 0; j < 16; ++j)
+ {
+ dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
+ }
+ }
+ }
+
+ // Idct
+ public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ if (eob > 1)
+ {
+ HighbdIdct4x416Add(input, dest, stride, bd);
+ }
+ else
+ {
+ HighbdIdct4x41Add(input, dest, stride, bd);
+ }
+ }
+
+ public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ if (eob > 1)
+ {
+ HighbdIwht4x416Add(input, dest, stride, bd);
+ }
+ else
+ {
+ HighbdIwht4x41Add(input, dest, stride, bd);
+ }
+ }
+
+ public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ // If dc is 1, then input[0] is the reconstructed value, do not need
+ // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to decide what to do.
+ // DC only DCT coefficient
+ if (eob == 1)
+ {
+ vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
+ }
+ else if (eob <= 12)
+ {
+ HighbdIdct8x812Add(input, dest, stride, bd);
+ }
+ else
+ {
+ HighbdIdct8x864Add(input, dest, stride, bd);
+ }
+ }
+
+ public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to separate different cases.
+ // DC only DCT coefficient.
+ if (eob == 1)
+ {
+ HighbdIdct16x161Add(input, dest, stride, bd);
+ }
+ else if (eob <= 10)
+ {
+ HighbdIdct16x1610Add(input, dest, stride, bd);
+ }
+ else if (eob <= 38)
+ {
+ HighbdIdct16x1638Add(input, dest, stride, bd);
+ }
+ else
+ {
+ HighbdIdct16x16256Add(input, dest, stride, bd);
+ }
+ }
+
+ public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ // Non-zero coeff only in upper-left 8x8
+ if (eob == 1)
+ {
+ HighbdIdct32x321Add(input, dest, stride, bd);
+ }
+ else if (eob <= 34)
+ {
+ HighbdIdct32x3234Add(input, dest, stride, bd);
+ }
+ else if (eob <= 135)
+ {
+ HighbdIdct32x32135Add(input, dest, stride, bd);
+ }
+ else
+ {
+ HighbdIdct32x321024Add(input, dest, stride, bd);
+ }
+ }
+
+ // Iht
+ public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ if (txType == TxType.DctDct)
+ {
+ HighbdIdct4x4Add(input, dest, stride, eob, bd);
+ }
+ else
+ {
+ HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
+ }
+ }
+
+ public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ if (txType == TxType.DctDct)
+ {
+ HighbdIdct8x8Add(input, dest, stride, eob, bd);
+ }
+ else
+ {
+ HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
+ }
+ }
+
+ public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
+ {
+ if (txType == TxType.DctDct)
+ {
+ HighbdIdct16x16Add(input, dest, stride, eob, bd);
+ }
+ else
+ {
+ HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs
new file mode 100644
index 00000000..baa0ab99
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ class InternalErrorException : Exception
+ {
+ public InternalErrorException(string message) : base(message)
+ {
+ }
+
+ public InternalErrorException(string message, Exception innerException) : base(message, innerException)
+ {
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs
new file mode 100644
index 00000000..68e9cb4b
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal struct InternalErrorInfo
+ {
+ public CodecErr ErrorCode;
+
+ public void InternalError(CodecErr error, string message)
+ {
+ ErrorCode = error;
+
+ throw new InternalErrorException(message);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs b/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs
new file mode 100644
index 00000000..13006934
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs
@@ -0,0 +1,418 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class LoopFilter
+ {
+ public const int MaxLoopFilter = 63;
+
+ public const int MaxRefLfDeltas = 4;
+ public const int MaxModeLfDeltas = 2;
+
+ // 64 bit masks for left transform size. Each 1 represents a position where
+ // we should apply a loop filter across the left border of an 8x8 block
+ // boundary.
+ //
+ // In the case of TX_16X16 -> ( in low order byte first we end up with
+ // a mask that looks like this
+ //
+ // 10101010
+ // 10101010
+ // 10101010
+ // 10101010
+ // 10101010
+ // 10101010
+ // 10101010
+ // 10101010
+ //
+ // A loopfilter should be applied to every other 8x8 horizontally.
+ private static readonly ulong[] Left64X64TxformMask = new ulong[]
+ {
+ 0xffffffffffffffffUL, // TX_4X4
+ 0xffffffffffffffffUL, // TX_8x8
+ 0x5555555555555555UL, // TX_16x16
+ 0x1111111111111111UL, // TX_32x32
+ };
+
+ // 64 bit masks for above transform size. Each 1 represents a position where
+ // we should apply a loop filter across the top border of an 8x8 block
+ // boundary.
+ //
+ // In the case of TX_32x32 -> ( in low order byte first we end up with
+ // a mask that looks like this
+ //
+ // 11111111
+ // 00000000
+ // 00000000
+ // 00000000
+ // 11111111
+ // 00000000
+ // 00000000
+ // 00000000
+ //
+ // A loopfilter should be applied to every other 4 the row vertically.
+ private static readonly ulong[] Above64X64TxformMask = new ulong[]
+ {
+ 0xffffffffffffffffUL, // TX_4X4
+ 0xffffffffffffffffUL, // TX_8x8
+ 0x00ff00ff00ff00ffUL, // TX_16x16
+ 0x000000ff000000ffUL, // TX_32x32
+ };
+
+ // 64 bit masks for prediction sizes (left). Each 1 represents a position
+ // where left border of an 8x8 block. These are aligned to the right most
+ // appropriate bit, and then shifted into place.
+ //
+ // In the case of TX_16x32 -> ( low order byte first ) we end up with
+ // a mask that looks like this :
+ //
+ // 10000000
+ // 10000000
+ // 10000000
+ // 10000000
+ // 00000000
+ // 00000000
+ // 00000000
+ // 00000000
+ private static readonly ulong[] LeftPredictionMask = new ulong[]
+ {
+ 0x0000000000000001UL, // BLOCK_4X4,
+ 0x0000000000000001UL, // BLOCK_4X8,
+ 0x0000000000000001UL, // BLOCK_8X4,
+ 0x0000000000000001UL, // BLOCK_8X8,
+ 0x0000000000000101UL, // BLOCK_8X16,
+ 0x0000000000000001UL, // BLOCK_16X8,
+ 0x0000000000000101UL, // BLOCK_16X16,
+ 0x0000000001010101UL, // BLOCK_16X32,
+ 0x0000000000000101UL, // BLOCK_32X16,
+ 0x0000000001010101UL, // BLOCK_32X32,
+ 0x0101010101010101UL, // BLOCK_32X64,
+ 0x0000000001010101UL, // BLOCK_64X32,
+ 0x0101010101010101UL, // BLOCK_64X64
+ };
+
+ // 64 bit mask to shift and set for each prediction size.
+ private static readonly ulong[] AbovePredictionMask = new ulong[]
+ {
+ 0x0000000000000001UL, // BLOCK_4X4
+ 0x0000000000000001UL, // BLOCK_4X8
+ 0x0000000000000001UL, // BLOCK_8X4
+ 0x0000000000000001UL, // BLOCK_8X8
+ 0x0000000000000001UL, // BLOCK_8X16,
+ 0x0000000000000003UL, // BLOCK_16X8
+ 0x0000000000000003UL, // BLOCK_16X16
+ 0x0000000000000003UL, // BLOCK_16X32,
+ 0x000000000000000fUL, // BLOCK_32X16,
+ 0x000000000000000fUL, // BLOCK_32X32,
+ 0x000000000000000fUL, // BLOCK_32X64,
+ 0x00000000000000ffUL, // BLOCK_64X32,
+ 0x00000000000000ffUL, // BLOCK_64X64
+ };
+
+ // 64 bit mask to shift and set for each prediction size. A bit is set for
+ // each 8x8 block that would be in the left most block of the given block
+ // size in the 64x64 block.
+ private static readonly ulong[] SizeMask = new ulong[]
+ {
+ 0x0000000000000001UL, // BLOCK_4X4
+ 0x0000000000000001UL, // BLOCK_4X8
+ 0x0000000000000001UL, // BLOCK_8X4
+ 0x0000000000000001UL, // BLOCK_8X8
+ 0x0000000000000101UL, // BLOCK_8X16,
+ 0x0000000000000003UL, // BLOCK_16X8
+ 0x0000000000000303UL, // BLOCK_16X16
+ 0x0000000003030303UL, // BLOCK_16X32,
+ 0x0000000000000f0fUL, // BLOCK_32X16,
+ 0x000000000f0f0f0fUL, // BLOCK_32X32,
+ 0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
+ 0x00000000ffffffffUL, // BLOCK_64X32,
+ 0xffffffffffffffffUL, // BLOCK_64X64
+ };
+
+ // These are used for masking the left and above borders.
+ private const ulong LeftBorder = 0x1111111111111111UL;
+ private const ulong AboveBorder = 0x000000ff000000ffUL;
+
+ // 16 bit masks for uv transform sizes.
+ private static readonly ushort[] Left64X64TxformMaskUv = new ushort[]
+ {
+ 0xffff, // TX_4X4
+ 0xffff, // TX_8x8
+ 0x5555, // TX_16x16
+ 0x1111, // TX_32x32
+ };
+
+ private static readonly ushort[] Above64X64TxformMaskUv = new ushort[]
+ {
+ 0xffff, // TX_4X4
+ 0xffff, // TX_8x8
+ 0x0f0f, // TX_16x16
+ 0x000f, // TX_32x32
+ };
+
+ // 16 bit left mask to shift and set for each uv prediction size.
+ private static readonly ushort[] LeftPredictionMaskUv = new ushort[]
+ {
+ 0x0001, // BLOCK_4X4,
+ 0x0001, // BLOCK_4X8,
+ 0x0001, // BLOCK_8X4,
+ 0x0001, // BLOCK_8X8,
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8,
+ 0x0001, // BLOCK_16X16,
+ 0x0011, // BLOCK_16X32,
+ 0x0001, // BLOCK_32X16,
+ 0x0011, // BLOCK_32X32,
+ 0x1111, // BLOCK_32X64
+ 0x0011, // BLOCK_64X32,
+ 0x1111, // BLOCK_64X64
+ };
+
+ // 16 bit above mask to shift and set for uv each prediction size.
+ private static readonly ushort[] AbovePredictionMaskUv = new ushort[]
+ {
+ 0x0001, // BLOCK_4X4
+ 0x0001, // BLOCK_4X8
+ 0x0001, // BLOCK_8X4
+ 0x0001, // BLOCK_8X8
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8
+ 0x0001, // BLOCK_16X16
+ 0x0001, // BLOCK_16X32,
+ 0x0003, // BLOCK_32X16,
+ 0x0003, // BLOCK_32X32,
+ 0x0003, // BLOCK_32X64,
+ 0x000f, // BLOCK_64X32,
+ 0x000f, // BLOCK_64X64
+ };
+
+ // 64 bit mask to shift and set for each uv prediction size
+ private static readonly ushort[] SizeMaskUv = new ushort[]
+ {
+ 0x0001, // BLOCK_4X4
+ 0x0001, // BLOCK_4X8
+ 0x0001, // BLOCK_8X4
+ 0x0001, // BLOCK_8X8
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8
+ 0x0001, // BLOCK_16X16
+ 0x0011, // BLOCK_16X32,
+ 0x0003, // BLOCK_32X16,
+ 0x0033, // BLOCK_32X32,
+ 0x3333, // BLOCK_32X64,
+ 0x00ff, // BLOCK_64X32,
+ 0xffff, // BLOCK_64X64
+ };
+
+ private const ushort LeftBorderUv = 0x1111;
+ private const ushort AboveBorderUv = 0x000f;
+
+ private static readonly int[] ModeLfLut = new int[]
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
+ 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
+ };
+
+ private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
+ {
+ return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]];
+ }
+
+ private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
+ {
+ return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
+ }
+
+ // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
+ // or greater area.
+ private static readonly byte[][] FirstBlockIn16x16 = new byte[][]
+ {
+ new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
+ new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
+ new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
+ new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }
+ };
+
+ // This function sets up the bit masks for a block represented
+ // by miRow, miCol in a 64x64 region.
+ public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
+ {
+ BlockSize blockSize = mi.SbType;
+ TxSize txSizeY = mi.TxSize;
+ ref LoopFilterInfoN lfiN = ref cm.LfInfo;
+ int filterLevel = GetFilterLevel(ref lfiN, ref mi);
+ TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
+ ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
+ ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
+ ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
+ ref ulong int4X4Y = ref lfm.Int4x4Y;
+ ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
+ ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
+ ref ushort int4X4Uv = ref lfm.Int4x4Uv;
+ int rowInSb = (miRow & 7);
+ int colInSb = (miCol & 7);
+ int shiftY = colInSb + (rowInSb << 3);
+ int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
+ int buildUv = FirstBlockIn16x16[rowInSb][colInSb];
+
+ if (filterLevel == 0)
+ {
+ return;
+ }
+ else
+ {
+ int index = shiftY;
+ int i;
+ for (i = 0; i < bh; i++)
+ {
+ MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel);
+ index += 8;
+ }
+ }
+
+ // These set 1 in the current block size for the block size edges.
+ // For instance if the block size is 32x16, we'll set:
+ // above = 1111
+ // 0000
+ // and
+ // left = 1000
+ // = 1000
+ // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+ // 1, not 8...
+ //
+ // U and V set things on a 16 bit scale.
+ //
+ aboveY |= AbovePredictionMask[(int)blockSize] << shiftY;
+ leftY |= LeftPredictionMask[(int)blockSize] << shiftY;
+
+ if (buildUv != 0)
+ {
+ aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv);
+ leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv);
+ }
+
+ // If the block has no coefficients and is not intra we skip applying
+ // the loop filter on block edges.
+ if (mi.Skip != 0 && mi.IsInterBlock())
+ {
+ return;
+ }
+
+ // Add a mask for the transform size. The transform size mask is set to
+ // be correct for a 64x64 prediction block size. Mask to match the size of
+ // the block we are working on and then shift it into place.
+ aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY;
+ leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY;
+
+ if (buildUv != 0)
+ {
+ aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
+ leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
+ }
+
+ // Try to determine what to do with the internal 4x4 block boundaries. These
+ // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
+ // internal ones can be skipped and don't depend on the prediction block size.
+ if (txSizeY == TxSize.Tx4x4)
+ {
+ int4X4Y |= SizeMask[(int)blockSize] << shiftY;
+ }
+
+ if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
+ {
+ int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
+ }
+ }
+
+ public static unsafe void ResetLfm(ref Vp9Common cm)
+ {
+ if (cm.Lf.FilterLevel != 0)
+ {
+ MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
+ }
+ }
+
+ private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
+ {
+ int lvl;
+
+ // For each possible value for the loop filter fill out limits
+ for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
+ {
+ // Set loop filter parameters that control sharpness.
+ int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
+
+ if (sharpnessLvl > 0)
+ {
+ if (blockInsideLimit > (9 - sharpnessLvl))
+ {
+ blockInsideLimit = (9 - sharpnessLvl);
+ }
+ }
+
+ if (blockInsideLimit < 1)
+ {
+ blockInsideLimit = 1;
+ }
+
+ lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit);
+ lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
+ }
+ }
+
+ public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
+ {
+ int segId;
+ // nShift is the multiplier for lfDeltas
+ // the multiplier is 1 for when filterLvl is between 0 and 31;
+ // 2 when filterLvl is between 32 and 63
+ int scale = 1 << (defaultFiltLvl >> 5);
+ ref LoopFilterInfoN lfi = ref cm.LfInfo;
+ ref Types.LoopFilter lf = ref cm.Lf;
+ ref Segmentation seg = ref cm.Seg;
+
+ // Update limits if sharpness has changed
+ if (lf.LastSharpnessLevel != lf.SharpnessLevel)
+ {
+ UpdateSharpness(ref lfi, lf.SharpnessLevel);
+ lf.LastSharpnessLevel = lf.SharpnessLevel;
+ }
+
+ for (segId = 0; segId < Constants.MaxSegments; segId++)
+ {
+ int lvlSeg = defaultFiltLvl;
+ if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
+ {
+ int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
+ lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
+ }
+
+ if (!lf.ModeRefDeltaEnabled)
+ {
+ // We could get rid of this if we assume that deltas are set to
+ // zero when not in use; encoder always uses deltas
+ MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg);
+ }
+ else
+ {
+ int refr, mode;
+ int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
+ lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
+
+ for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
+ {
+ for (mode = 0; mode < MaxModeLfDeltas; ++mode)
+ {
+ int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
+ lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs b/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs
new file mode 100644
index 00000000..f703d214
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs
@@ -0,0 +1,1612 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class Luts
+ {
+ public static readonly byte[] SizeGroupLookup = new byte[]
+ {
+ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3
+ };
+
+ public static readonly BlockSize[][] SubsizeLookup = new BlockSize[][]
+ {
+ new BlockSize[]
+ { // PARTITION_NONE
+ BlockSize.Block4x4, BlockSize.Block4x8, BlockSize.Block8x4, BlockSize.Block8x8, BlockSize.Block8x16, BlockSize.Block16x8,
+ BlockSize.Block16x16, BlockSize.Block16x32, BlockSize.Block32x16, BlockSize.Block32x32, BlockSize.Block32x64,
+ BlockSize.Block64x32, BlockSize.Block64x64
+ },
+ new BlockSize[]
+ { // PARTITION_HORZ
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block8x4, BlockSize.BlockInvalid,
+ BlockSize.BlockInvalid, BlockSize.Block16x8, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x16,
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block64x32
+ },
+ new BlockSize[]
+ { // PARTITION_VERT
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block4x8, BlockSize.BlockInvalid,
+ BlockSize.BlockInvalid, BlockSize.Block8x16, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block16x32,
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x64
+ },
+ new BlockSize[]
+ { // PARTITION_SPLIT
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block4x4, BlockSize.BlockInvalid,
+ BlockSize.BlockInvalid, BlockSize.Block8x8, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block16x16,
+ BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x32
+ }
+ };
+
+ public static readonly TxSize[] MaxTxSizeLookup = new TxSize[]
+ {
+ TxSize.Tx4x4, TxSize.Tx4x4, TxSize.Tx4x4, TxSize.Tx8x8, TxSize.Tx8x8, TxSize.Tx8x8, TxSize.Tx16x16,
+ TxSize.Tx16x16, TxSize.Tx16x16, TxSize.Tx32x32, TxSize.Tx32x32, TxSize.Tx32x32, TxSize.Tx32x32
+ };
+
+ public static readonly TxSize[] TxModeToBiggestTxSize = new TxSize[]
+ {
+ TxSize.Tx4x4, // ONLY_4X4
+ TxSize.Tx8x8, // ALLOW_8X8
+ TxSize.Tx16x16, // ALLOW_16X16
+ TxSize.Tx32x32, // ALLOW_32X32
+ TxSize.Tx32x32, // TX_MODE_SELECT
+ };
+
+ public static readonly BlockSize[][][] SsSizeLookup = new BlockSize[][][]
+ {
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block4x4, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.BlockInvalid } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block4x8, BlockSize.Block4x4 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.BlockInvalid } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block8x4, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block4x4, BlockSize.BlockInvalid } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block8x8, BlockSize.Block8x4 }, new BlockSize[] { BlockSize.Block4x8, BlockSize.Block4x4 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block8x16, BlockSize.Block8x8 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block4x8 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block16x8, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block8x8, BlockSize.Block8x4 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block16x16, BlockSize.Block16x8 }, new BlockSize[] { BlockSize.Block8x16, BlockSize.Block8x8 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block16x32, BlockSize.Block16x16 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block8x16 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block32x16, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block16x16, BlockSize.Block16x8 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block32x32, BlockSize.Block32x16 }, new BlockSize[] { BlockSize.Block16x32, BlockSize.Block16x16 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block32x64, BlockSize.Block32x32 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block16x32 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block64x32, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block32x32, BlockSize.Block32x16 } },
+ new BlockSize[][] { new BlockSize[] { BlockSize.Block64x64, BlockSize.Block64x32 }, new BlockSize[] { BlockSize.Block32x64, BlockSize.Block32x32 } },
+ };
+
+ public static readonly TxSize[][][][] UvTxsizeLookup = new TxSize[][][][]
+ {
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+ new TxSize[][][]
+ {
+ // BLOCK_4X4
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_4X8
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_8X4
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_8X8
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_8X16
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_16X8
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_16X16
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_16X32
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_32X16
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_32X32
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_32X64
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_64X32
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 } },
+ },
+ new TxSize[][][]
+ {
+ // BLOCK_64X64
+ new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } },
+ new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 }, new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 } },
+ },
+ };
+
+ public struct PartitionContextPair
+ {
+ public sbyte Above;
+ public sbyte Left;
+
+ public PartitionContextPair(sbyte above, sbyte left)
+ {
+ Above = above;
+ Left = left;
+ }
+ }
+
+ // Generates 4 bit field in which each bit set to 1 represents
+ // a blocksize partition 1111 means we split 64x64, 32x32, 16x16
+ // and 8x8. 1000 means we just split the 64x64 to 32x32
+ public static readonly PartitionContextPair[] PartitionContextLookup = new PartitionContextPair[]
+ {
+ new PartitionContextPair(15, 15), // 4X4 - {0b1111, 0b1111}
+ new PartitionContextPair(15, 14), // 4X8 - {0b1111, 0b1110}
+ new PartitionContextPair(14, 15), // 8X4 - {0b1110, 0b1111}
+ new PartitionContextPair(14, 14), // 8X8 - {0b1110, 0b1110}
+ new PartitionContextPair(14, 12), // 8X16 - {0b1110, 0b1100}
+ new PartitionContextPair(12, 14), // 16X8 - {0b1100, 0b1110}
+ new PartitionContextPair(12, 12), // 16X16 - {0b1100, 0b1100}
+ new PartitionContextPair(12, 8), // 16X32 - {0b1100, 0b1000}
+ new PartitionContextPair(8, 12), // 32X16 - {0b1000, 0b1100}
+ new PartitionContextPair(8, 8), // 32X32 - {0b1000, 0b1000}
+ new PartitionContextPair(8, 0), // 32X64 - {0b1000, 0b0000}
+ new PartitionContextPair(0, 8), // 64X32 - {0b0000, 0b1000}
+ new PartitionContextPair(0, 0), // 64X64 - {0b0000, 0b0000}
+ };
+
+ // Filter
+
+ private static readonly Array8<short>[] BilinearFilters = new Array8<short>[]
+ {
+ NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(0, 0, 0, 120, 8, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 112, 16, 0, 0, 0), NewArray8Short(0, 0, 0, 104, 24, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 96, 32, 0, 0, 0), NewArray8Short(0, 0, 0, 88, 40, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 80, 48, 0, 0, 0), NewArray8Short(0, 0, 0, 72, 56, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 64, 64, 0, 0, 0), NewArray8Short(0, 0, 0, 56, 72, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 48, 80, 0, 0, 0), NewArray8Short(0, 0, 0, 40, 88, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 32, 96, 0, 0, 0), NewArray8Short(0, 0, 0, 24, 104, 0, 0, 0),
+ NewArray8Short(0, 0, 0, 16, 112, 0, 0, 0), NewArray8Short(0, 0, 0, 8, 120, 0, 0, 0)
+ };
+
+ // Lagrangian interpolation filter
+ private static readonly Array8<short>[] SubPelFilters8 = new Array8<short>[]
+ {
+ NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(0, 1, -5, 126, 8, -3, 1, 0),
+ NewArray8Short(-1, 3, -10, 122, 18, -6, 2, 0), NewArray8Short(-1, 4, -13, 118, 27, -9, 3, -1),
+ NewArray8Short(-1, 4, -16, 112, 37, -11, 4, -1), NewArray8Short(-1, 5, -18, 105, 48, -14, 4, -1),
+ NewArray8Short(-1, 5, -19, 97, 58, -16, 5, -1), NewArray8Short(-1, 6, -19, 88, 68, -18, 5, -1),
+ NewArray8Short(-1, 6, -19, 78, 78, -19, 6, -1), NewArray8Short(-1, 5, -18, 68, 88, -19, 6, -1),
+ NewArray8Short(-1, 5, -16, 58, 97, -19, 5, -1), NewArray8Short(-1, 4, -14, 48, 105, -18, 5, -1),
+ NewArray8Short(-1, 4, -11, 37, 112, -16, 4, -1), NewArray8Short(-1, 3, -9, 27, 118, -13, 4, -1),
+ NewArray8Short(0, 2, -6, 18, 122, -10, 3, -1), NewArray8Short(0, 1, -3, 8, 126, -5, 1, 0)
+ };
+
+ // DCT based filter
+ private static readonly Array8<short>[] SubPelFilters8S = new Array8<short>[]
+ {
+ NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(-1, 3, -7, 127, 8, -3, 1, 0),
+ NewArray8Short(-2, 5, -13, 125, 17, -6, 3, -1), NewArray8Short(-3, 7, -17, 121, 27, -10, 5, -2),
+ NewArray8Short(-4, 9, -20, 115, 37, -13, 6, -2), NewArray8Short(-4, 10, -23, 108, 48, -16, 8, -3),
+ NewArray8Short(-4, 10, -24, 100, 59, -19, 9, -3), NewArray8Short(-4, 11, -24, 90, 70, -21, 10, -4),
+ NewArray8Short(-4, 11, -23, 80, 80, -23, 11, -4), NewArray8Short(-4, 10, -21, 70, 90, -24, 11, -4),
+ NewArray8Short(-3, 9, -19, 59, 100, -24, 10, -4), NewArray8Short(-3, 8, -16, 48, 108, -23, 10, -4),
+ NewArray8Short(-2, 6, -13, 37, 115, -20, 9, -4), NewArray8Short(-2, 5, -10, 27, 121, -17, 7, -3),
+ NewArray8Short(-1, 3, -6, 17, 125, -13, 5, -2), NewArray8Short(0, 1, -3, 8, 127, -7, 3, -1)
+ };
+
+ // freqmultiplier = 0.5
+ private static readonly Array8<short>[] SubPelFilters8Lp = new Array8<short>[]
+ {
+ NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(-3, -1, 32, 64, 38, 1, -3, 0),
+ NewArray8Short(-2, -2, 29, 63, 41, 2, -3, 0), NewArray8Short(-2, -2, 26, 63, 43, 4, -4, 0),
+ NewArray8Short(-2, -3, 24, 62, 46, 5, -4, 0), NewArray8Short(-2, -3, 21, 60, 49, 7, -4, 0),
+ NewArray8Short(-1, -4, 18, 59, 51, 9, -4, 0), NewArray8Short(-1, -4, 16, 57, 53, 12, -4, -1),
+ NewArray8Short(-1, -4, 14, 55, 55, 14, -4, -1), NewArray8Short(-1, -4, 12, 53, 57, 16, -4, -1),
+ NewArray8Short(0, -4, 9, 51, 59, 18, -4, -1), NewArray8Short(0, -4, 7, 49, 60, 21, -3, -2),
+ NewArray8Short(0, -4, 5, 46, 62, 24, -3, -2), NewArray8Short(0, -4, 4, 43, 63, 26, -2, -2),
+ NewArray8Short(0, -3, 2, 41, 63, 29, -2, -2), NewArray8Short(0, -3, 1, 38, 64, 32, -1, -3)
+ };
+
+ private static Array8<short> NewArray8Short(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7)
+ {
+ Array8<short> output = new Array8<short>();
+
+ output[0] = e0;
+ output[1] = e1;
+ output[2] = e2;
+ output[3] = e3;
+ output[4] = e4;
+ output[5] = e5;
+ output[6] = e6;
+ output[7] = e7;
+
+ return output;
+ }
+
+ public static readonly Array8<short>[][] Vp9FilterKernels = new Array8<short>[][]
+ {
+ SubPelFilters8, SubPelFilters8Lp, SubPelFilters8S, BilinearFilters
+ };
+
+ // Scan
+
+ private static readonly short[] DefaultScan4X4 = new short[]
+ {
+ 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15,
+ };
+
+ private static readonly short[] ColScan4X4 = new short[]
+ {
+ 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15,
+ };
+
+ private static readonly short[] RowScan4X4 = new short[]
+ {
+ 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
+ };
+
+ private static readonly short[] DefaultScan8X8 = new short[]
+ {
+ 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26,
+ 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6,
+ 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30,
+ 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
+ };
+
+ private static readonly short[] ColScan8X8 = new short[]
+ {
+ 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3,
+ 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28,
+ 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7,
+ 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63,
+ };
+
+ private static readonly short[] RowScan8X8 = new short[]
+ {
+ 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12,
+ 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35,
+ 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51,
+ 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
+ };
+
+ private static readonly short[] DefaultScan16X16 = new short[]
+ {
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65,
+ 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112,
+ 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7,
+ 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116,
+ 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147,
+ 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192,
+ 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11,
+ 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43,
+ 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137,
+ 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212,
+ 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45,
+ 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15,
+ 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216,
+ 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232,
+ 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219,
+ 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
+ 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
+ 255,
+ };
+
+ private static readonly short[] ColScan16X16 = new short[]
+ {
+ 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18,
+ 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51,
+ 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161,
+ 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6,
+ 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70,
+ 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55,
+ 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72,
+ 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57,
+ 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198,
+ 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184,
+ 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75,
+ 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76,
+ 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61,
+ 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172,
+ 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126,
+ 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
+ 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
+ 255,
+ };
+
+ private static readonly short[] RowScan16X16 = new short[]
+ {
+ 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48,
+ 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9,
+ 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54,
+ 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26,
+ 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72,
+ 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145,
+ 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161,
+ 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45,
+ 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122,
+ 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209,
+ 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183,
+ 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78,
+ 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243,
+ 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245,
+ 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188,
+ 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
+ 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
+ 255,
+ };
+
+ private static readonly short[] DefaultScan32X32 = new short[]
+ {
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66,
+ 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5,
+ 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6,
+ 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320,
+ 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71,
+ 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
+ 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104,
+ 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418,
+ 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137,
+ 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358,
+ 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75,
+ 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
+ 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329,
+ 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330,
+ 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299,
+ 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363,
+ 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111,
+ 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19,
+ 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672,
+ 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673,
+ 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146,
+ 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457,
+ 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54,
+ 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303,
+ 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366,
+ 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644,
+ 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397,
+ 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584,
+ 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57,
+ 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523,
+ 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120,
+ 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
+ 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
+ 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
+ 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152,
+ 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277,
+ 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650,
+ 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92,
+ 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
+ 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279,
+ 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838,
+ 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311,
+ 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622,
+ 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
+ 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901,
+ 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964,
+ 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437,
+ 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841,
+ 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438,
+ 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873,
+ 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346,
+ 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502,
+ 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904,
+ 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533,
+ 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658,
+ 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907,
+ 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504,
+ 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815,
+ 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002,
+ 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879,
+ 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785,
+ 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724,
+ 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880,
+ 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
+ 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602,
+ 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510,
+ 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820,
+ 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697,
+ 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760,
+ 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
+ 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638,
+ 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700,
+ 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950,
+ 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889,
+ 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891,
+ 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
+ 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862,
+ 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926,
+ 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
+ };
+
+ // Neighborhood 2-tuples for various scans and blocksizes,
+ // in {top, left} order for each position in corresponding scan order.
+ private static readonly short[] DefaultScan4X4Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2,
+ 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+ };
+
+ private static readonly short[] ColScan4X4Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9,
+ 9, 2, 2, 6, 6, 2, 2, 3, 3, 10, 10, 7, 7, 11, 11, 0, 0,
+ };
+
+ private static readonly short[] RowScan4X4Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8,
+ 8, 6, 6, 8, 8, 9, 9, 12, 12, 10, 10, 13, 13, 14, 14, 0, 0,
+ };
+
+ private static readonly short[] ColScan8X8Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32,
+ 32, 17, 17, 2, 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3,
+ 48, 48, 11, 11, 26, 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12,
+ 12, 49, 49, 42, 42, 20, 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43,
+ 13, 13, 36, 36, 5, 5, 21, 21, 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6,
+ 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, 30, 45, 45, 15, 15, 38, 38, 23, 23,
+ 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, 47, 47, 55, 55, 0, 0,
+ };
+
+ private static readonly short[] RowScan8X8Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16,
+ 16, 10, 10, 16, 16, 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24,
+ 5, 5, 12, 12, 19, 19, 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27,
+ 27, 40, 40, 13, 13, 34, 34, 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21,
+ 42, 42, 14, 14, 48, 48, 36, 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50,
+ 50, 57, 57, 44, 44, 37, 37, 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59,
+ 38, 38, 60, 60, 46, 46, 53, 53, 54, 54, 61, 61, 62, 62, 0, 0,
+ };
+
+ private static readonly short[] DefaultScan8X8Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2,
+ 2, 10, 17, 17, 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32,
+ 4, 11, 26, 33, 19, 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34,
+ 41, 20, 27, 13, 20, 5, 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6,
+ 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22,
+ 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45,
+ 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
+ };
+
+ private static readonly short[] ColScan16X16Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64,
+ 64, 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2,
+ 65, 65, 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128,
+ 128, 3, 3, 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35,
+ 113, 113, 3, 3, 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67,
+ 67, 20, 20, 83, 83, 114, 114, 36, 36, 176, 176, 4, 4, 145, 145,
+ 52, 52, 99, 99, 5, 5, 130, 130, 68, 68, 192, 192, 161, 161, 21,
+ 21, 115, 115, 84, 84, 37, 37, 146, 146, 208, 208, 53, 53, 5, 5,
+ 100, 100, 177, 177, 131, 131, 69, 69, 6, 6, 224, 224, 116, 116, 22,
+ 22, 162, 162, 85, 85, 147, 147, 38, 38, 193, 193, 101, 101, 54, 54,
+ 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, 209, 209, 7, 7, 117,
+ 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, 225, 225, 39, 39,
+ 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, 71, 71, 210,
+ 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, 56, 56,
+ 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, 72,
+ 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41,
+ 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151,
+ 151, 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182,
+ 10, 10, 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121,
+ 121, 213, 213, 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168,
+ 10, 10, 90, 90, 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27,
+ 27, 199, 199, 43, 43, 184, 184, 122, 122, 169, 169, 230, 230, 59, 59,
+ 11, 11, 75, 75, 138, 138, 200, 200, 215, 215, 91, 91, 12, 12, 28,
+ 28, 185, 185, 107, 107, 154, 154, 44, 44, 231, 231, 216, 216, 60, 60,
+ 123, 123, 12, 12, 76, 76, 201, 201, 170, 170, 232, 232, 139, 139, 92,
+ 92, 13, 13, 108, 108, 29, 29, 186, 186, 217, 217, 155, 155, 45, 45,
+ 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, 77, 77, 14, 14, 171,
+ 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, 46, 46, 156, 156,
+ 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, 31, 31, 172,
+ 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, 110, 110,
+ 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, 142,
+ 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, 220,
+ 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, 175,
+ 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, 223,
+ 239, 239, 0, 0,
+ };
+
+ private static readonly short[] RowScan16X16Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17,
+ 17, 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32,
+ 19, 19, 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7,
+ 7, 35, 35, 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8,
+ 65, 65, 51, 51, 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52,
+ 52, 23, 23, 81, 81, 67, 67, 80, 80, 38, 38, 10, 10, 53, 53,
+ 82, 82, 96, 96, 68, 68, 24, 24, 97, 97, 83, 83, 39, 39, 96,
+ 96, 54, 54, 11, 11, 69, 69, 98, 98, 112, 112, 84, 84, 25, 25,
+ 40, 40, 55, 55, 113, 113, 99, 99, 12, 12, 70, 70, 112, 112, 85,
+ 85, 26, 26, 114, 114, 100, 100, 128, 128, 41, 41, 56, 56, 71, 71,
+ 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, 128, 128, 72, 72, 130,
+ 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, 42, 42, 144, 144,
+ 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, 88, 88, 132,
+ 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, 160, 160,
+ 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, 74,
+ 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44,
+ 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135,
+ 135, 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121,
+ 60, 60, 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45,
+ 45, 165, 165, 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208,
+ 122, 122, 152, 152, 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181,
+ 181, 224, 224, 107, 107, 196, 196, 61, 61, 153, 153, 224, 224, 182, 182,
+ 168, 168, 210, 210, 46, 46, 138, 138, 92, 92, 183, 183, 225, 225, 211,
+ 211, 240, 240, 197, 197, 169, 169, 123, 123, 154, 154, 198, 198, 77, 77,
+ 212, 212, 184, 184, 108, 108, 226, 226, 199, 199, 62, 62, 227, 227, 241,
+ 241, 139, 139, 213, 213, 170, 170, 185, 185, 155, 155, 228, 228, 242, 242,
+ 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, 215, 215, 229, 229, 140,
+ 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, 156, 156, 244, 244,
+ 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, 202, 202, 246,
+ 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, 157, 187, 187,
+ 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, 203, 203, 142,
+ 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, 219, 174, 174,
+ 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, 206, 206, 236,
+ 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, 238, 253, 253,
+ 254, 254, 0, 0,
+ };
+
+ private static readonly short[] DefaultScan16X16Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17,
+ 32, 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64,
+ 64, 64, 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80,
+ 80, 35, 50, 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96,
+ 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52,
+ 67, 112, 112, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 6, 6,
+ 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 128, 114, 129, 69,
+ 84, 38, 53, 7, 22, 7, 7, 129, 144, 23, 38, 54, 69, 100, 115,
+ 85, 100, 115, 130, 144, 144, 130, 145, 39, 54, 70, 85, 8, 23, 55,
+ 70, 116, 131, 101, 116, 145, 160, 24, 39, 8, 8, 86, 101, 131, 146,
+ 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161,
+ 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 9, 9, 176, 176,
+ 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148,
+ 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, 10, 10,
+ 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164,
+ 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42,
+ 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43,
+ 58, 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136,
+ 209, 224, 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181,
+ 196, 12, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211,
+ 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168,
+ 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44,
+ 76, 91, 13, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154,
+ 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 14, 14,
+ 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170,
+ 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215,
+ 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171,
+ 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231,
+ 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110,
+ 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218,
+ 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234,
+ 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250,
+ 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236,
+ 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238,
+ 239, 254, 0, 0,
+ };
+
+ private static readonly short[] DefaultScan32X32Neighbors = new short[]
+ {
+ 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64,
+ 33, 64, 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128,
+ 97, 128, 3, 34, 66, 97, 3, 3, 35, 66, 98, 129, 129, 160,
+ 160, 160, 4, 35, 67, 98, 192, 192, 4, 4, 130, 161, 161, 192,
+ 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 224,
+ 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, 225, 256, 256, 256,
+ 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, 195, 226,
+ 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289,
+ 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7,
+ 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352,
+ 352, 352, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322,
+ 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 8, 8,
+ 384, 384, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103,
+ 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386,
+ 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, 199, 230,
+ 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262,
+ 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73,
+ 294, 325, 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419,
+ 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200,
+ 11, 42, 106, 137, 480, 480, 450, 481, 358, 389, 264, 295, 201, 232,
+ 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 11, 11, 481, 512,
+ 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 512,
+ 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483,
+ 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265,
+ 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545,
+ 453, 484, 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329,
+ 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485,
+ 45, 76, 172, 203, 330, 361, 576, 576, 13, 13, 267, 298, 546, 577,
+ 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578,
+ 14, 45, 14, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204,
+ 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173,
+ 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142,
+ 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238,
+ 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50,
+ 51, 82, 83, 114, 608, 608, 484, 515, 360, 391, 236, 267, 112, 143,
+ 19, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392,
+ 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 20, 20, 672, 672,
+ 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424,
+ 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145,
+ 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642,
+ 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425,
+ 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208,
+ 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736,
+ 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457,
+ 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178,
+ 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582,
+ 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117,
+ 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118,
+ 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23,
+ 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427,
+ 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24,
+ 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583,
+ 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304,
+ 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 25, 25,
+ 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677,
+ 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460,
+ 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243,
+ 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 26, 26,
+ 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616,
+ 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337,
+ 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58,
+ 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493,
+ 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90,
+ 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122,
+ 864, 864, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151,
+ 27, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648,
+ 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183,
+ 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, 897, 804, 835,
+ 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556,
+ 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277,
+ 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960,
+ 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774,
+ 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557,
+ 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340,
+ 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123,
+ 61, 92, 30, 61, 30, 30, 961, 992, 930, 961, 899, 930, 837, 868,
+ 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589,
+ 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310,
+ 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993,
+ 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590,
+ 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125,
+ 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374,
+ 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403,
+ 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683,
+ 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280,
+ 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808,
+ 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529,
+ 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219,
+ 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871,
+ 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654,
+ 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437,
+ 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220,
+ 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841,
+ 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562,
+ 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252,
+ 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749,
+ 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346,
+ 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502,
+ 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407,
+ 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656,
+ 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967,
+ 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688,
+ 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409,
+ 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906,
+ 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689,
+ 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472,
+ 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000,
+ 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721,
+ 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442,
+ 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846,
+ 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381,
+ 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382,
+ 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908,
+ 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443,
+ 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785,
+ 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475,
+ 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879,
+ 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662,
+ 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445,
+ 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818,
+ 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508,
+ 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757,
+ 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882,
+ 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539,
+ 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571,
+ 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789,
+ 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007,
+ 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790,
+ 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573,
+ 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822,
+ 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009,
+ 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606,
+ 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667,
+ 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979,
+ 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700,
+ 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856,
+ 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012,
+ 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733,
+ 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734,
+ 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920,
+ 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828,
+ 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860,
+ 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861,
+ 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894,
+ 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019,
+ 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021,
+ 959, 990, 991, 1022, 0, 0,
+ };
+
+ private static readonly short[] Vp9DefaultIscan4X4 = new short[]
+ {
+ 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15,
+ };
+
+ private static readonly short[] Vp9ColIscan4X4 = new short[]
+ {
+ 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15,
+ };
+
+ private static readonly short[] Vp9RowIscan4X4 = new short[]
+ {
+ 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15,
+ };
+
+ private static readonly short[] Vp9ColIscan8X8 = new short[]
+ {
+ 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51,
+ 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56,
+ 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60,
+ 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63,
+ };
+
+ private static readonly short[] Vp9RowIscan8X8 = new short[]
+ {
+ 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39,
+ 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52,
+ 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59,
+ 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63,
+ };
+
+ private static readonly short[] Vp9DefaultIscan8X8 = new short[]
+ {
+ 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44,
+ 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53,
+ 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60,
+ 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63,
+ };
+
+ private static readonly short[] Vp9ColIscan16X16 = new short[]
+ {
+ 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198,
+ 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212,
+ 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216,
+ 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218,
+ 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223,
+ 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228,
+ 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230,
+ 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235,
+ 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237,
+ 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240,
+ 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244,
+ 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247,
+ 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251,
+ 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253,
+ 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254,
+ 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255,
+ };
+
+ private static readonly short[] Vp9RowIscan16X16 = new short[]
+ {
+ 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76,
+ 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99,
+ 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103,
+ 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100,
+ 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94,
+ 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96,
+ 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98,
+ 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102,
+ 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108,
+ 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114,
+ 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123,
+ 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129,
+ 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137,
+ 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149,
+ 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152,
+ 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253,
+ 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254,
+ 255,
+ };
+
+ private static readonly short[] Vp9DefaultIscan16X16 = new short[]
+ {
+ 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166,
+ 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154,
+ 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148,
+ 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127,
+ 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114,
+ 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106,
+ 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99,
+ 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95,
+ 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93,
+ 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94,
+ 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97,
+ 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102,
+ 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111,
+ 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115,
+ 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121,
+ 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254,
+ 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253,
+ 255,
+ };
+
+ private static readonly short[] Vp9DefaultIscan32X32 = new short[]
+ {
+ 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145,
+ 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356,
+ 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45,
+ 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237,
+ 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3,
+ 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189,
+ 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425,
+ 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73,
+ 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296,
+ 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14,
+ 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214,
+ 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523,
+ 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105,
+ 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373,
+ 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33,
+ 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227,
+ 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615,
+ 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139,
+ 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437,
+ 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56,
+ 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340,
+ 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705,
+ 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177,
+ 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545,
+ 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89,
+ 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395,
+ 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764,
+ 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207,
+ 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638,
+ 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122,
+ 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486,
+ 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833,
+ 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349,
+ 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719,
+ 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169,
+ 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561,
+ 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117,
+ 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415,
+ 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774,
+ 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283,
+ 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691,
+ 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166,
+ 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510,
+ 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845,
+ 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363,
+ 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737,
+ 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206,
+ 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571,
+ 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900,
+ 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461,
+ 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807,
+ 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302,
+ 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698,
+ 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944,
+ 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535,
+ 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854,
+ 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382,
+ 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754,
+ 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971,
+ 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625,
+ 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929,
+ 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476,
+ 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820,
+ 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001,
+ 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709,
+ 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951,
+ 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554,
+ 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860,
+ 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448,
+ 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790,
+ 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984,
+ 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648,
+ 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931,
+ 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497,
+ 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835,
+ 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003,
+ 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724,
+ 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958,
+ 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023,
+ };
+
+ public class ScanOrder
+ {
+ public short[] Scan { get; }
+ public short[] IScan { get; }
+ public short[] Neighbors { get; }
+
+ public ScanOrder(short[] scan, short[] iScan, short[] neighbors)
+ {
+ Scan = scan;
+ IScan = iScan;
+ Neighbors = neighbors;
+ }
+ }
+
+ public static readonly ScanOrder[] Vp9DefaultScanOrders = new ScanOrder[]
+ {
+ new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors),
+ new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors),
+ new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors),
+ new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors)
+ };
+
+ public static readonly ScanOrder[][] Vp9ScanOrders = new ScanOrder[][]
+ {
+ new ScanOrder[]
+ { // TX_4X4
+ new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors),
+ new ScanOrder(RowScan4X4, Vp9RowIscan4X4, RowScan4X4Neighbors),
+ new ScanOrder(ColScan4X4, Vp9ColIscan4X4, ColScan4X4Neighbors),
+ new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors)
+ },
+ new ScanOrder[]
+ { // TX_8X8
+ new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors),
+ new ScanOrder(RowScan8X8, Vp9RowIscan8X8, RowScan8X8Neighbors),
+ new ScanOrder(ColScan8X8, Vp9ColIscan8X8, ColScan8X8Neighbors),
+ new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors)
+ },
+ new ScanOrder[]
+ { // TX_16X16
+ new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors),
+ new ScanOrder(RowScan16X16, Vp9RowIscan16X16, RowScan16X16Neighbors),
+ new ScanOrder(ColScan16X16, Vp9ColIscan16X16, ColScan16X16Neighbors),
+ new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors)
+ },
+ new ScanOrder[]
+ { // TX_32X32
+ new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors),
+ new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors),
+ new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors),
+ new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors)
+ }
+ };
+
+ // Entropy MV
+
+ public static readonly sbyte[] Vp9MvJointTree = new sbyte[]
+ {
+ -(sbyte)MvJointType.MvJointZero, 2, -(sbyte)MvJointType.MvJointHnzvz, 4, -(sbyte)MvJointType.MvJointHzvnz, -(sbyte)MvJointType.MvJointHnzvnz
+ };
+
+ public static readonly sbyte[] Vp9MvClassTree = new sbyte[]
+ {
+ -(sbyte)MvClassType.MvClass0,
+ 2,
+ -(sbyte)MvClassType.MvClass1,
+ 4,
+ 6,
+ 8,
+ -(sbyte)MvClassType.MvClass2,
+ -(sbyte)MvClassType.MvClass3,
+ 10,
+ 12,
+ -(sbyte)MvClassType.MvClass4,
+ -(sbyte)MvClassType.MvClass5,
+ -(sbyte)MvClassType.MvClass6,
+ 14,
+ 16,
+ 18,
+ -(sbyte)MvClassType.MvClass7,
+ -(sbyte)MvClassType.MvClass8,
+ -(sbyte)MvClassType.MvClass9,
+ -(sbyte)MvClassType.MvClass10,
+ };
+
+ public static readonly sbyte[] Vp9MvFPTree = new sbyte[] { -0, 2, -1, 4, -2, -3 };
+
+ // Entropy
+
+ public static readonly byte[] Vp9Cat1Prob = new byte[] { 159 };
+ public static readonly byte[] Vp9Cat2Prob = new byte[] { 165, 145 };
+ public static readonly byte[] Vp9Cat3Prob = new byte[] { 173, 148, 140 };
+ public static readonly byte[] Vp9Cat4Prob = new byte[] { 176, 155, 140, 135 };
+ public static readonly byte[] Vp9Cat5Prob = new byte[] { 180, 157, 141, 134, 130 };
+ public static readonly byte[] Vp9Cat6Prob = new byte[] { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
+
+ public static readonly byte[] Vp9Cat6ProbHigh12 = new byte[]
+ {
+ 255, 255, 255, 255, 254, 254, 54, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
+ };
+
+ private static readonly byte[] Vp9CoefbandTrans8X8Plus = new byte[]
+ {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+ // Beyond MAXBAND_INDEX+1 all values are filled as 5
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ };
+
+ private static readonly byte[] Vp9CoefbandTrans4X4 = new byte[]
+ {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+ };
+
+ public static byte[] get_band_translate(TxSize txSize)
+ {
+ return txSize == TxSize.Tx4x4 ? Vp9CoefbandTrans4X4 : Vp9CoefbandTrans8X8Plus;
+ }
+
+ public static readonly byte[][] Vp9Pareto8Full = new byte[][]
+ {
+ new byte[] { 3, 86, 128, 6, 86, 23, 88, 29 },
+ new byte[] { 6, 86, 128, 11, 87, 42, 91, 52 },
+ new byte[] { 9, 86, 129, 17, 88, 61, 94, 76 },
+ new byte[] { 12, 86, 129, 22, 88, 77, 97, 93 },
+ new byte[] { 15, 87, 129, 28, 89, 93, 100, 110 },
+ new byte[] { 17, 87, 129, 33, 90, 105, 103, 123 },
+ new byte[] { 20, 88, 130, 38, 91, 118, 106, 136 },
+ new byte[] { 23, 88, 130, 43, 91, 128, 108, 146 },
+ new byte[] { 26, 89, 131, 48, 92, 139, 111, 156 },
+ new byte[] { 28, 89, 131, 53, 93, 147, 114, 163 },
+ new byte[] { 31, 90, 131, 58, 94, 156, 117, 171 },
+ new byte[] { 34, 90, 131, 62, 94, 163, 119, 177 },
+ new byte[] { 37, 90, 132, 66, 95, 171, 122, 184 },
+ new byte[] { 39, 90, 132, 70, 96, 177, 124, 189 },
+ new byte[] { 42, 91, 132, 75, 97, 183, 127, 194 },
+ new byte[] { 44, 91, 132, 79, 97, 188, 129, 198 },
+ new byte[] { 47, 92, 133, 83, 98, 193, 132, 202 },
+ new byte[] { 49, 92, 133, 86, 99, 197, 134, 205 },
+ new byte[] { 52, 93, 133, 90, 100, 201, 137, 208 },
+ new byte[] { 54, 93, 133, 94, 100, 204, 139, 211 },
+ new byte[] { 57, 94, 134, 98, 101, 208, 142, 214 },
+ new byte[] { 59, 94, 134, 101, 102, 211, 144, 216 },
+ new byte[] { 62, 94, 135, 105, 103, 214, 146, 218 },
+ new byte[] { 64, 94, 135, 108, 103, 216, 148, 220 },
+ new byte[] { 66, 95, 135, 111, 104, 219, 151, 222 },
+ new byte[] { 68, 95, 135, 114, 105, 221, 153, 223 },
+ new byte[] { 71, 96, 136, 117, 106, 224, 155, 225 },
+ new byte[] { 73, 96, 136, 120, 106, 225, 157, 226 },
+ new byte[] { 76, 97, 136, 123, 107, 227, 159, 228 },
+ new byte[] { 78, 97, 136, 126, 108, 229, 160, 229 },
+ new byte[] { 80, 98, 137, 129, 109, 231, 162, 231 },
+ new byte[] { 82, 98, 137, 131, 109, 232, 164, 232 },
+ new byte[] { 84, 98, 138, 134, 110, 234, 166, 233 },
+ new byte[] { 86, 98, 138, 137, 111, 235, 168, 234 },
+ new byte[] { 89, 99, 138, 140, 112, 236, 170, 235 },
+ new byte[] { 91, 99, 138, 142, 112, 237, 171, 235 },
+ new byte[] { 93, 100, 139, 145, 113, 238, 173, 236 },
+ new byte[] { 95, 100, 139, 147, 114, 239, 174, 237 },
+ new byte[] { 97, 101, 140, 149, 115, 240, 176, 238 },
+ new byte[] { 99, 101, 140, 151, 115, 241, 177, 238 },
+ new byte[] { 101, 102, 140, 154, 116, 242, 179, 239 },
+ new byte[] { 103, 102, 140, 156, 117, 242, 180, 239 },
+ new byte[] { 105, 103, 141, 158, 118, 243, 182, 240 },
+ new byte[] { 107, 103, 141, 160, 118, 243, 183, 240 },
+ new byte[] { 109, 104, 141, 162, 119, 244, 185, 241 },
+ new byte[] { 111, 104, 141, 164, 119, 244, 186, 241 },
+ new byte[] { 113, 104, 142, 166, 120, 245, 187, 242 },
+ new byte[] { 114, 104, 142, 168, 121, 245, 188, 242 },
+ new byte[] { 116, 105, 143, 170, 122, 246, 190, 243 },
+ new byte[] { 118, 105, 143, 171, 122, 246, 191, 243 },
+ new byte[] { 120, 106, 143, 173, 123, 247, 192, 244 },
+ new byte[] { 121, 106, 143, 175, 124, 247, 193, 244 },
+ new byte[] { 123, 107, 144, 177, 125, 248, 195, 244 },
+ new byte[] { 125, 107, 144, 178, 125, 248, 196, 244 },
+ new byte[] { 127, 108, 145, 180, 126, 249, 197, 245 },
+ new byte[] { 128, 108, 145, 181, 127, 249, 198, 245 },
+ new byte[] { 130, 109, 145, 183, 128, 249, 199, 245 },
+ new byte[] { 132, 109, 145, 184, 128, 249, 200, 245 },
+ new byte[] { 134, 110, 146, 186, 129, 250, 201, 246 },
+ new byte[] { 135, 110, 146, 187, 130, 250, 202, 246 },
+ new byte[] { 137, 111, 147, 189, 131, 251, 203, 246 },
+ new byte[] { 138, 111, 147, 190, 131, 251, 204, 246 },
+ new byte[] { 140, 112, 147, 192, 132, 251, 205, 247 },
+ new byte[] { 141, 112, 147, 193, 132, 251, 206, 247 },
+ new byte[] { 143, 113, 148, 194, 133, 251, 207, 247 },
+ new byte[] { 144, 113, 148, 195, 134, 251, 207, 247 },
+ new byte[] { 146, 114, 149, 197, 135, 252, 208, 248 },
+ new byte[] { 147, 114, 149, 198, 135, 252, 209, 248 },
+ new byte[] { 149, 115, 149, 199, 136, 252, 210, 248 },
+ new byte[] { 150, 115, 149, 200, 137, 252, 210, 248 },
+ new byte[] { 152, 115, 150, 201, 138, 252, 211, 248 },
+ new byte[] { 153, 115, 150, 202, 138, 252, 212, 248 },
+ new byte[] { 155, 116, 151, 204, 139, 253, 213, 249 },
+ new byte[] { 156, 116, 151, 205, 139, 253, 213, 249 },
+ new byte[] { 158, 117, 151, 206, 140, 253, 214, 249 },
+ new byte[] { 159, 117, 151, 207, 141, 253, 215, 249 },
+ new byte[] { 161, 118, 152, 208, 142, 253, 216, 249 },
+ new byte[] { 162, 118, 152, 209, 142, 253, 216, 249 },
+ new byte[] { 163, 119, 153, 210, 143, 253, 217, 249 },
+ new byte[] { 164, 119, 153, 211, 143, 253, 217, 249 },
+ new byte[] { 166, 120, 153, 212, 144, 254, 218, 250 },
+ new byte[] { 167, 120, 153, 212, 145, 254, 219, 250 },
+ new byte[] { 168, 121, 154, 213, 146, 254, 220, 250 },
+ new byte[] { 169, 121, 154, 214, 146, 254, 220, 250 },
+ new byte[] { 171, 122, 155, 215, 147, 254, 221, 250 },
+ new byte[] { 172, 122, 155, 216, 147, 254, 221, 250 },
+ new byte[] { 173, 123, 155, 217, 148, 254, 222, 250 },
+ new byte[] { 174, 123, 155, 217, 149, 254, 222, 250 },
+ new byte[] { 176, 124, 156, 218, 150, 254, 223, 250 },
+ new byte[] { 177, 124, 156, 219, 150, 254, 223, 250 },
+ new byte[] { 178, 125, 157, 220, 151, 254, 224, 251 },
+ new byte[] { 179, 125, 157, 220, 151, 254, 224, 251 },
+ new byte[] { 180, 126, 157, 221, 152, 254, 225, 251 },
+ new byte[] { 181, 126, 157, 221, 152, 254, 225, 251 },
+ new byte[] { 183, 127, 158, 222, 153, 254, 226, 251 },
+ new byte[] { 184, 127, 158, 223, 154, 254, 226, 251 },
+ new byte[] { 185, 128, 159, 224, 155, 255, 227, 251 },
+ new byte[] { 186, 128, 159, 224, 155, 255, 227, 251 },
+ new byte[] { 187, 129, 160, 225, 156, 255, 228, 251 },
+ new byte[] { 188, 130, 160, 225, 156, 255, 228, 251 },
+ new byte[] { 189, 131, 160, 226, 157, 255, 228, 251 },
+ new byte[] { 190, 131, 160, 226, 158, 255, 228, 251 },
+ new byte[] { 191, 132, 161, 227, 159, 255, 229, 251 },
+ new byte[] { 192, 132, 161, 227, 159, 255, 229, 251 },
+ new byte[] { 193, 133, 162, 228, 160, 255, 230, 252 },
+ new byte[] { 194, 133, 162, 229, 160, 255, 230, 252 },
+ new byte[] { 195, 134, 163, 230, 161, 255, 231, 252 },
+ new byte[] { 196, 134, 163, 230, 161, 255, 231, 252 },
+ new byte[] { 197, 135, 163, 231, 162, 255, 231, 252 },
+ new byte[] { 198, 135, 163, 231, 162, 255, 231, 252 },
+ new byte[] { 199, 136, 164, 232, 163, 255, 232, 252 },
+ new byte[] { 200, 136, 164, 232, 164, 255, 232, 252 },
+ new byte[] { 201, 137, 165, 233, 165, 255, 233, 252 },
+ new byte[] { 201, 137, 165, 233, 165, 255, 233, 252 },
+ new byte[] { 202, 138, 166, 233, 166, 255, 233, 252 },
+ new byte[] { 203, 138, 166, 233, 166, 255, 233, 252 },
+ new byte[] { 204, 139, 166, 234, 167, 255, 234, 252 },
+ new byte[] { 205, 139, 166, 234, 167, 255, 234, 252 },
+ new byte[] { 206, 140, 167, 235, 168, 255, 235, 252 },
+ new byte[] { 206, 140, 167, 235, 168, 255, 235, 252 },
+ new byte[] { 207, 141, 168, 236, 169, 255, 235, 252 },
+ new byte[] { 208, 141, 168, 236, 170, 255, 235, 252 },
+ new byte[] { 209, 142, 169, 237, 171, 255, 236, 252 },
+ new byte[] { 209, 143, 169, 237, 171, 255, 236, 252 },
+ new byte[] { 210, 144, 169, 237, 172, 255, 236, 252 },
+ new byte[] { 211, 144, 169, 237, 172, 255, 236, 252 },
+ new byte[] { 212, 145, 170, 238, 173, 255, 237, 252 },
+ new byte[] { 213, 145, 170, 238, 173, 255, 237, 252 },
+ new byte[] { 214, 146, 171, 239, 174, 255, 237, 253 },
+ new byte[] { 214, 146, 171, 239, 174, 255, 237, 253 },
+ new byte[] { 215, 147, 172, 240, 175, 255, 238, 253 },
+ new byte[] { 215, 147, 172, 240, 175, 255, 238, 253 },
+ new byte[] { 216, 148, 173, 240, 176, 255, 238, 253 },
+ new byte[] { 217, 148, 173, 240, 176, 255, 238, 253 },
+ new byte[] { 218, 149, 173, 241, 177, 255, 239, 253 },
+ new byte[] { 218, 149, 173, 241, 178, 255, 239, 253 },
+ new byte[] { 219, 150, 174, 241, 179, 255, 239, 253 },
+ new byte[] { 219, 151, 174, 241, 179, 255, 239, 253 },
+ new byte[] { 220, 152, 175, 242, 180, 255, 240, 253 },
+ new byte[] { 221, 152, 175, 242, 180, 255, 240, 253 },
+ new byte[] { 222, 153, 176, 242, 181, 255, 240, 253 },
+ new byte[] { 222, 153, 176, 242, 181, 255, 240, 253 },
+ new byte[] { 223, 154, 177, 243, 182, 255, 240, 253 },
+ new byte[] { 223, 154, 177, 243, 182, 255, 240, 253 },
+ new byte[] { 224, 155, 178, 244, 183, 255, 241, 253 },
+ new byte[] { 224, 155, 178, 244, 183, 255, 241, 253 },
+ new byte[] { 225, 156, 178, 244, 184, 255, 241, 253 },
+ new byte[] { 225, 157, 178, 244, 184, 255, 241, 253 },
+ new byte[] { 226, 158, 179, 244, 185, 255, 242, 253 },
+ new byte[] { 227, 158, 179, 244, 185, 255, 242, 253 },
+ new byte[] { 228, 159, 180, 245, 186, 255, 242, 253 },
+ new byte[] { 228, 159, 180, 245, 186, 255, 242, 253 },
+ new byte[] { 229, 160, 181, 245, 187, 255, 242, 253 },
+ new byte[] { 229, 160, 181, 245, 187, 255, 242, 253 },
+ new byte[] { 230, 161, 182, 246, 188, 255, 243, 253 },
+ new byte[] { 230, 162, 182, 246, 188, 255, 243, 253 },
+ new byte[] { 231, 163, 183, 246, 189, 255, 243, 253 },
+ new byte[] { 231, 163, 183, 246, 189, 255, 243, 253 },
+ new byte[] { 232, 164, 184, 247, 190, 255, 243, 253 },
+ new byte[] { 232, 164, 184, 247, 190, 255, 243, 253 },
+ new byte[] { 233, 165, 185, 247, 191, 255, 244, 253 },
+ new byte[] { 233, 165, 185, 247, 191, 255, 244, 253 },
+ new byte[] { 234, 166, 185, 247, 192, 255, 244, 253 },
+ new byte[] { 234, 167, 185, 247, 192, 255, 244, 253 },
+ new byte[] { 235, 168, 186, 248, 193, 255, 244, 253 },
+ new byte[] { 235, 168, 186, 248, 193, 255, 244, 253 },
+ new byte[] { 236, 169, 187, 248, 194, 255, 244, 253 },
+ new byte[] { 236, 169, 187, 248, 194, 255, 244, 253 },
+ new byte[] { 236, 170, 188, 248, 195, 255, 245, 253 },
+ new byte[] { 236, 170, 188, 248, 195, 255, 245, 253 },
+ new byte[] { 237, 171, 189, 249, 196, 255, 245, 254 },
+ new byte[] { 237, 172, 189, 249, 196, 255, 245, 254 },
+ new byte[] { 238, 173, 190, 249, 197, 255, 245, 254 },
+ new byte[] { 238, 173, 190, 249, 197, 255, 245, 254 },
+ new byte[] { 239, 174, 191, 249, 198, 255, 245, 254 },
+ new byte[] { 239, 174, 191, 249, 198, 255, 245, 254 },
+ new byte[] { 240, 175, 192, 249, 199, 255, 246, 254 },
+ new byte[] { 240, 176, 192, 249, 199, 255, 246, 254 },
+ new byte[] { 240, 177, 193, 250, 200, 255, 246, 254 },
+ new byte[] { 240, 177, 193, 250, 200, 255, 246, 254 },
+ new byte[] { 241, 178, 194, 250, 201, 255, 246, 254 },
+ new byte[] { 241, 178, 194, 250, 201, 255, 246, 254 },
+ new byte[] { 242, 179, 195, 250, 202, 255, 246, 254 },
+ new byte[] { 242, 180, 195, 250, 202, 255, 246, 254 },
+ new byte[] { 242, 181, 196, 250, 203, 255, 247, 254 },
+ new byte[] { 242, 181, 196, 250, 203, 255, 247, 254 },
+ new byte[] { 243, 182, 197, 251, 204, 255, 247, 254 },
+ new byte[] { 243, 183, 197, 251, 204, 255, 247, 254 },
+ new byte[] { 244, 184, 198, 251, 205, 255, 247, 254 },
+ new byte[] { 244, 184, 198, 251, 205, 255, 247, 254 },
+ new byte[] { 244, 185, 199, 251, 206, 255, 247, 254 },
+ new byte[] { 244, 185, 199, 251, 206, 255, 247, 254 },
+ new byte[] { 245, 186, 200, 251, 207, 255, 247, 254 },
+ new byte[] { 245, 187, 200, 251, 207, 255, 247, 254 },
+ new byte[] { 246, 188, 201, 252, 207, 255, 248, 254 },
+ new byte[] { 246, 188, 201, 252, 207, 255, 248, 254 },
+ new byte[] { 246, 189, 202, 252, 208, 255, 248, 254 },
+ new byte[] { 246, 190, 202, 252, 208, 255, 248, 254 },
+ new byte[] { 247, 191, 203, 252, 209, 255, 248, 254 },
+ new byte[] { 247, 191, 203, 252, 209, 255, 248, 254 },
+ new byte[] { 247, 192, 204, 252, 210, 255, 248, 254 },
+ new byte[] { 247, 193, 204, 252, 210, 255, 248, 254 },
+ new byte[] { 248, 194, 205, 252, 211, 255, 248, 254 },
+ new byte[] { 248, 194, 205, 252, 211, 255, 248, 254 },
+ new byte[] { 248, 195, 206, 252, 212, 255, 249, 254 },
+ new byte[] { 248, 196, 206, 252, 212, 255, 249, 254 },
+ new byte[] { 249, 197, 207, 253, 213, 255, 249, 254 },
+ new byte[] { 249, 197, 207, 253, 213, 255, 249, 254 },
+ new byte[] { 249, 198, 208, 253, 214, 255, 249, 254 },
+ new byte[] { 249, 199, 209, 253, 214, 255, 249, 254 },
+ new byte[] { 250, 200, 210, 253, 215, 255, 249, 254 },
+ new byte[] { 250, 200, 210, 253, 215, 255, 249, 254 },
+ new byte[] { 250, 201, 211, 253, 215, 255, 249, 254 },
+ new byte[] { 250, 202, 211, 253, 215, 255, 249, 254 },
+ new byte[] { 250, 203, 212, 253, 216, 255, 249, 254 },
+ new byte[] { 250, 203, 212, 253, 216, 255, 249, 254 },
+ new byte[] { 251, 204, 213, 253, 217, 255, 250, 254 },
+ new byte[] { 251, 205, 213, 253, 217, 255, 250, 254 },
+ new byte[] { 251, 206, 214, 254, 218, 255, 250, 254 },
+ new byte[] { 251, 206, 215, 254, 218, 255, 250, 254 },
+ new byte[] { 252, 207, 216, 254, 219, 255, 250, 254 },
+ new byte[] { 252, 208, 216, 254, 219, 255, 250, 254 },
+ new byte[] { 252, 209, 217, 254, 220, 255, 250, 254 },
+ new byte[] { 252, 210, 217, 254, 220, 255, 250, 254 },
+ new byte[] { 252, 211, 218, 254, 221, 255, 250, 254 },
+ new byte[] { 252, 212, 218, 254, 221, 255, 250, 254 },
+ new byte[] { 253, 213, 219, 254, 222, 255, 250, 254 },
+ new byte[] { 253, 213, 220, 254, 222, 255, 250, 254 },
+ new byte[] { 253, 214, 221, 254, 223, 255, 250, 254 },
+ new byte[] { 253, 215, 221, 254, 223, 255, 250, 254 },
+ new byte[] { 253, 216, 222, 254, 224, 255, 251, 254 },
+ new byte[] { 253, 217, 223, 254, 224, 255, 251, 254 },
+ new byte[] { 253, 218, 224, 254, 225, 255, 251, 254 },
+ new byte[] { 253, 219, 224, 254, 225, 255, 251, 254 },
+ new byte[] { 254, 220, 225, 254, 225, 255, 251, 254 },
+ new byte[] { 254, 221, 226, 254, 225, 255, 251, 254 },
+ new byte[] { 254, 222, 227, 255, 226, 255, 251, 254 },
+ new byte[] { 254, 223, 227, 255, 226, 255, 251, 254 },
+ new byte[] { 254, 224, 228, 255, 227, 255, 251, 254 },
+ new byte[] { 254, 225, 229, 255, 227, 255, 251, 254 },
+ new byte[] { 254, 226, 230, 255, 228, 255, 251, 254 },
+ new byte[] { 254, 227, 230, 255, 229, 255, 251, 254 },
+ new byte[] { 255, 228, 231, 255, 230, 255, 251, 254 },
+ new byte[] { 255, 229, 232, 255, 230, 255, 251, 254 },
+ new byte[] { 255, 230, 233, 255, 231, 255, 252, 254 },
+ new byte[] { 255, 231, 234, 255, 231, 255, 252, 254 },
+ new byte[] { 255, 232, 235, 255, 232, 255, 252, 254 },
+ new byte[] { 255, 233, 236, 255, 232, 255, 252, 254 },
+ new byte[] { 255, 235, 237, 255, 233, 255, 252, 254 },
+ new byte[] { 255, 236, 238, 255, 234, 255, 252, 254 },
+ new byte[] { 255, 238, 240, 255, 235, 255, 252, 255 },
+ new byte[] { 255, 239, 241, 255, 235, 255, 252, 254 },
+ new byte[] { 255, 241, 243, 255, 236, 255, 252, 254 },
+ new byte[] { 255, 243, 245, 255, 237, 255, 252, 254 },
+ new byte[] { 255, 246, 247, 255, 239, 255, 253, 255 },
+ };
+
+ /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+ public static readonly sbyte[] Vp9IntraModeTree = new sbyte[]
+ {
+ -(sbyte)PredictionMode.DcPred, 2, /* 0 = DC_NODE */
+ -(sbyte)PredictionMode.TmPred, 4, /* 1 = TM_NODE */
+ -(sbyte)PredictionMode.VPred, 6, /* 2 = V_NODE */
+ 8, 12, /* 3 = COM_NODE */
+ -(sbyte)PredictionMode.HPred, 10, /* 4 = H_NODE */
+ -(sbyte)PredictionMode.D135Pred, -(sbyte)PredictionMode.D117Pred, /* 5 = D135_NODE */
+ -(sbyte)PredictionMode.D45Pred, 14, /* 6 = D45_NODE */
+ -(sbyte)PredictionMode.D63Pred, 16, /* 7 = D63_NODE */
+ -(sbyte)PredictionMode.D153Pred, -(sbyte)PredictionMode.D207Pred /* 8 = D153_NODE */
+ };
+
+ public static readonly sbyte[] Vp9InterModeTree = new sbyte[]
+ {
+ -((sbyte)PredictionMode.ZeroMv - (sbyte)PredictionMode. NearestMv), 2,
+ -((sbyte)PredictionMode.NearestMv - (sbyte)PredictionMode.NearestMv), 4,
+ -((sbyte)PredictionMode.NearMv - (sbyte)PredictionMode.NearestMv),
+ -((sbyte)PredictionMode.NewMv - (sbyte)PredictionMode.NearestMv)
+ };
+
+ public static readonly sbyte[] Vp9PartitionTree = new sbyte[]
+ {
+ -(sbyte)PartitionType.PartitionNone, 2, -(sbyte)PartitionType.PartitionHorz, 4, -(sbyte)PartitionType.PartitionVert, -(sbyte)PartitionType.PartitionSplit
+ };
+
+ public static readonly sbyte[] Vp9SwitchableInterpTree = new sbyte[]
+ {
+ -Constants.EightTap, 2, -Constants.EightTapSmooth, -Constants.EightTapSharp
+ };
+
+ public static readonly sbyte[] Vp9SegmentTree = new sbyte[]
+ {
+ 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7
+ };
+
+ // MV Ref
+
+ // This is used to figure out a context for the ref blocks. The code flattens
+ // an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+ // adding 9 for each intra block, 3 for each zero mv and 1 for each new
+ // motion vector. This single number is then converted into a context
+ // with a single lookup ( CounterToContext ).
+ public static readonly int[] Mode2Counter = new int[]
+ {
+ 9, // DC_PRED
+ 9, // V_PRED
+ 9, // H_PRED
+ 9, // D45_PRED
+ 9, // D135_PRED
+ 9, // D117_PRED
+ 9, // D153_PRED
+ 9, // D207_PRED
+ 9, // D63_PRED
+ 9, // TM_PRED
+ 0, // NEARESTMV
+ 0, // NEARMV
+ 3, // ZEROMV
+ 1, // NEWMV
+ };
+
+ // There are 3^3 different combinations of 3 counts that can be either 0,1 or
+ // 2. However the actual count can never be greater than 2 so the highest
+ // counter we need is 18. 9 is an invalid counter that's never used.
+ public static readonly MotionVectorContext[] CounterToContext = new MotionVectorContext[]
+ {
+ MotionVectorContext.BothPredicted, // 0
+ MotionVectorContext.NewPlusNonIntra, // 1
+ MotionVectorContext.BothNew, // 2
+ MotionVectorContext.ZeroPlusPredicted, // 3
+ MotionVectorContext.NewPlusNonIntra, // 4
+ MotionVectorContext.InvalidCase, // 5
+ MotionVectorContext.BothZero, // 6
+ MotionVectorContext.InvalidCase, // 7
+ MotionVectorContext.InvalidCase, // 8
+ MotionVectorContext.IntraPlusNonIntra, // 9
+ MotionVectorContext.IntraPlusNonIntra, // 10
+ MotionVectorContext.InvalidCase, // 11
+ MotionVectorContext.IntraPlusNonIntra, // 12
+ MotionVectorContext.InvalidCase, // 13
+ MotionVectorContext.InvalidCase, // 14
+ MotionVectorContext.InvalidCase, // 15
+ MotionVectorContext.InvalidCase, // 16
+ MotionVectorContext.InvalidCase, // 17
+ MotionVectorContext.BothIntra // 18
+ };
+
+ public static readonly Position[][] MvRefBlocks = new Position[][]
+ {
+ // 4X4
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, -1 ),
+ new Position( -2, 0 ),
+ new Position( 0, -2 ),
+ new Position( -2, -1 ),
+ new Position( -1, -2 ),
+ new Position( -2, -2 ) },
+ // 4X8
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, -1 ),
+ new Position( -2, 0 ),
+ new Position( 0, -2 ),
+ new Position( -2, -1 ),
+ new Position( -1, -2 ),
+ new Position( -2, -2 ) },
+ // 8X4
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, -1 ),
+ new Position( -2, 0 ),
+ new Position( 0, -2 ),
+ new Position( -2, -1 ),
+ new Position( -1, -2 ),
+ new Position( -2, -2 ) },
+ // 8X8
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, -1 ),
+ new Position( -2, 0 ),
+ new Position( 0, -2 ),
+ new Position( -2, -1 ),
+ new Position( -1, -2 ),
+ new Position( -2, -2 ) },
+ // 8X16
+ new Position[] { new Position( 0, -1 ),
+ new Position( -1, 0 ),
+ new Position( 1, -1 ),
+ new Position( -1, -1 ),
+ new Position( 0, -2 ),
+ new Position( -2, 0 ),
+ new Position( -2, -1 ),
+ new Position( -1, -2 ) },
+ // 16X8
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, 1 ),
+ new Position( -1, -1 ),
+ new Position( -2, 0 ),
+ new Position( 0, -2 ),
+ new Position( -1, -2 ),
+ new Position( -2, -1 ) },
+ // 16X16
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, 1 ),
+ new Position( 1, -1 ),
+ new Position( -1, -1 ),
+ new Position( -3, 0 ),
+ new Position( 0, -3 ),
+ new Position( -3, -3 ) },
+ // 16X32
+ new Position[] { new Position( 0, -1 ),
+ new Position( -1, 0 ),
+ new Position( 2, -1 ),
+ new Position( -1, -1 ),
+ new Position( -1, 1 ),
+ new Position( 0, -3 ),
+ new Position( -3, 0 ),
+ new Position( -3, -3 ) },
+ // 32X16
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, 2 ),
+ new Position( -1, -1 ),
+ new Position( 1, -1 ),
+ new Position( -3, 0 ),
+ new Position( 0, -3 ),
+ new Position( -3, -3 ) },
+ // 32X32
+ new Position[] { new Position( -1, 1 ),
+ new Position( 1, -1 ),
+ new Position( -1, 2 ),
+ new Position( 2, -1 ),
+ new Position( -1, -1 ),
+ new Position( -3, 0 ),
+ new Position( 0, -3 ),
+ new Position( -3, -3 ) },
+ // 32X64
+ new Position[] { new Position( 0, -1 ),
+ new Position( -1, 0 ),
+ new Position( 4, -1 ),
+ new Position( -1, 2 ),
+ new Position( -1, -1 ),
+ new Position( 0, -3 ),
+ new Position( -3, 0 ),
+ new Position( 2, -1 ) },
+ // 64X32
+ new Position[] { new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, 4 ),
+ new Position( 2, -1 ),
+ new Position( -1, -1 ),
+ new Position( -3, 0 ),
+ new Position( 0, -3 ),
+ new Position( -1, 2 ) },
+ // 64X64
+ new Position[] { new Position( -1, 3 ),
+ new Position( 3, -1 ),
+ new Position( -1, 4 ),
+ new Position( 4, -1 ),
+ new Position( -1, -1 ),
+ new Position( -1, 0 ),
+ new Position( 0, -1 ),
+ new Position( -1, 6 ) }
+ };
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs
new file mode 100644
index 00000000..a9da1042
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs
@@ -0,0 +1,389 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class PredCommon
+ {
+ public static int GetReferenceModeContext(ref Vp9Common cm, ref MacroBlockD xd)
+ {
+ int ctx;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
+ { // both edges available
+ if (!xd.AboveMi.Value.HasSecondRef() && !xd.LeftMi.Value.HasSecondRef())
+ {
+ // Neither edge uses comp pred (0/1)
+ ctx = (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0) ^
+ (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0);
+ }
+ else if (!xd.AboveMi.Value.HasSecondRef())
+ {
+ // One of two edges uses comp pred (2/3)
+ ctx = 2 + (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.AboveMi.Value.IsInterBlock() ? 1 : 0);
+ }
+ else if (!xd.LeftMi.Value.HasSecondRef())
+ {
+ // One of two edges uses comp pred (2/3)
+ ctx = 2 + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.LeftMi.Value.IsInterBlock() ? 1 : 0);
+ }
+ else // Both edges use comp pred (4)
+ {
+ ctx = 4;
+ }
+ }
+ else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
+ { // One edge available
+ ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
+
+ if (!edgeMi.HasSecondRef())
+ {
+ // Edge does not use comp pred (0/1)
+ ctx = edgeMi.RefFrame[0] == cm.CompFixedRef ? 1 : 0;
+ }
+ else
+ {
+ // Edge uses comp pred (3)
+ ctx = 3;
+ }
+ }
+ else
+ { // No edges available (1)
+ ctx = 1;
+ }
+ Debug.Assert(ctx >= 0 && ctx < Constants.CompInterContexts);
+ return ctx;
+ }
+
+ // Returns a context number for the given MB prediction signal
+ public static int GetPredContextCompRefP(ref Vp9Common cm, ref MacroBlockD xd)
+ {
+ int predContext;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ int fixRefIdx = cm.RefFrameSignBias[cm.CompFixedRef];
+ int varRefIdx = fixRefIdx == 0 ? 1 : 0;
+
+ if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
+ { // Both edges available
+ bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
+ bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
+
+ if (aboveIntra && leftIntra)
+ { // Intra/Intra (2)
+ predContext = 2;
+ }
+ else if (aboveIntra || leftIntra)
+ { // Intra/Inter
+ ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
+
+ if (!edgeMi.HasSecondRef()) // single pred (1/3)
+ {
+ predContext = 1 + 2 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
+ }
+ else // Comp pred (1/3)
+ {
+ predContext = 1 + 2 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
+ }
+ }
+ else
+ { // Inter/Inter
+ bool lSg = !xd.LeftMi.Value.HasSecondRef();
+ bool aSg = !xd.AboveMi.Value.HasSecondRef();
+ sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx];
+ sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx];
+
+ if (vrfa == vrfl && cm.CompVarRef[1] == vrfa)
+ {
+ predContext = 0;
+ }
+ else if (lSg && aSg)
+ { // Single/Single
+ if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) ||
+ (vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0]))
+ {
+ predContext = 4;
+ }
+ else if (vrfa == vrfl)
+ {
+ predContext = 3;
+ }
+ else
+ {
+ predContext = 1;
+ }
+ }
+ else if (lSg || aSg)
+ { // Single/Comp
+ sbyte vrfc = lSg ? vrfa : vrfl;
+ sbyte rfs = aSg ? vrfa : vrfl;
+ if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1])
+ {
+ predContext = 1;
+ }
+ else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1])
+ {
+ predContext = 2;
+ }
+ else
+ {
+ predContext = 4;
+ }
+ }
+ else if (vrfa == vrfl)
+ { // Comp/Comp
+ predContext = 4;
+ }
+ else
+ {
+ predContext = 2;
+ }
+ }
+ }
+ else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
+ { // One edge available
+ ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
+
+ if (!edgeMi.IsInterBlock())
+ {
+ predContext = 2;
+ }
+ else
+ {
+ if (edgeMi.HasSecondRef())
+ {
+ predContext = 4 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0);
+ }
+ else
+ {
+ predContext = 3 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0);
+ }
+ }
+ }
+ else
+ { // No edges available (2)
+ predContext = 2;
+ }
+ Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
+ return predContext;
+ }
+
+ public static int GetPredContextSingleRefP1(ref MacroBlockD xd)
+ {
+ int predContext;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
+ { // Both edges available
+ bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
+ bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
+
+ if (aboveIntra && leftIntra)
+ { // Intra/Intra
+ predContext = 2;
+ }
+ else if (aboveIntra || leftIntra)
+ { // Intra/Inter or Inter/Intra
+ ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
+ if (!edgeMi.HasSecondRef())
+ {
+ predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
+ edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
+ }
+ }
+ else
+ { // Inter/Inter
+ bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
+ bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
+ sbyte above0 = xd.AboveMi.Value.RefFrame[0];
+ sbyte above1 = xd.AboveMi.Value.RefFrame[1];
+ sbyte left0 = xd.LeftMi.Value.RefFrame[0];
+ sbyte left1 = xd.LeftMi.Value.RefFrame[1];
+
+ if (aboveHasSecond && leftHasSecond)
+ {
+ predContext = 1 + (above0 == Constants.LastFrame || above1 == Constants.LastFrame ||
+ left0 == Constants.LastFrame || left1 == Constants.LastFrame ? 1 : 0);
+ }
+ else if (aboveHasSecond || leftHasSecond)
+ {
+ sbyte rfs = !aboveHasSecond ? above0 : left0;
+ sbyte crf1 = aboveHasSecond ? above0 : left0;
+ sbyte crf2 = aboveHasSecond ? above1 : left1;
+
+ if (rfs == Constants.LastFrame)
+ {
+ predContext = 3 + (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0);
+ }
+ }
+ else
+ {
+ predContext = 2 * (above0 == Constants.LastFrame ? 1 : 0) + 2 * (left0 == Constants.LastFrame ? 1 : 0);
+ }
+ }
+ }
+ else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
+ { // One edge available
+ ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
+ if (!edgeMi.IsInterBlock())
+ { // Intra
+ predContext = 2;
+ }
+ else
+ { // Inter
+ if (!edgeMi.HasSecondRef())
+ {
+ predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame ||
+ edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0);
+ }
+ }
+ }
+ else
+ { // No edges available
+ predContext = 2;
+ }
+ Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
+ return predContext;
+ }
+
+ public static int GetPredContextSingleRefP2(ref MacroBlockD xd)
+ {
+ int predContext;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull)
+ { // Both edges available
+ bool aboveIntra = !xd.AboveMi.Value.IsInterBlock();
+ bool leftIntra = !xd.LeftMi.Value.IsInterBlock();
+
+ if (aboveIntra && leftIntra)
+ { // Intra/Intra
+ predContext = 2;
+ }
+ else if (aboveIntra || leftIntra)
+ { // Intra/Inter or Inter/Intra
+ ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value;
+ if (!edgeMi.HasSecondRef())
+ {
+ if (edgeMi.RefFrame[0] == Constants.LastFrame)
+ {
+ predContext = 3;
+ }
+ else
+ {
+ predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
+ }
+ }
+ else
+ {
+ predContext = 1 + 2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
+ edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
+ }
+ }
+ else
+ { // Inter/Inter
+ bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef();
+ bool leftHasSecond = xd.LeftMi.Value.HasSecondRef();
+ sbyte above0 = xd.AboveMi.Value.RefFrame[0];
+ sbyte above1 = xd.AboveMi.Value.RefFrame[1];
+ sbyte left0 = xd.LeftMi.Value.RefFrame[0];
+ sbyte left1 = xd.LeftMi.Value.RefFrame[1];
+
+ if (aboveHasSecond && leftHasSecond)
+ {
+ if (above0 == left0 && above1 == left1)
+ {
+ predContext = 3 * (above0 == Constants.GoldenFrame || above1 == Constants.GoldenFrame ||
+ left0 == Constants.GoldenFrame || left1 == Constants.GoldenFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = 2;
+ }
+ }
+ else if (aboveHasSecond || leftHasSecond)
+ {
+ sbyte rfs = !aboveHasSecond ? above0 : left0;
+ sbyte crf1 = aboveHasSecond ? above0 : left0;
+ sbyte crf2 = aboveHasSecond ? above1 : left1;
+
+ if (rfs == Constants.GoldenFrame)
+ {
+ predContext = 3 + (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
+ }
+ else if (rfs == Constants.AltRefFrame)
+ {
+ predContext = crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0;
+ }
+ else
+ {
+ predContext = 1 + 2 * (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0);
+ }
+ }
+ else
+ {
+ if (above0 == Constants.LastFrame && left0 == Constants.LastFrame)
+ {
+ predContext = 3;
+ }
+ else if (above0 == Constants.LastFrame || left0 == Constants.LastFrame)
+ {
+ sbyte edge0 = (above0 == Constants.LastFrame) ? left0 : above0;
+ predContext = 4 * (edge0 == Constants.GoldenFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = 2 * (above0 == Constants.GoldenFrame ? 1 : 0) + 2 * (left0 == Constants.GoldenFrame ? 1 : 0);
+ }
+ }
+ }
+ }
+ else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull)
+ { // One edge available
+ ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value;
+
+ if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef()))
+ {
+ predContext = 2;
+ }
+ else if (!edgeMi.HasSecondRef())
+ {
+ predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0);
+ }
+ else
+ {
+ predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ||
+ edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0);
+ }
+ }
+ else
+ { // No edges available (2)
+ predContext = 2;
+ }
+ Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts);
+ return predContext;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs
new file mode 100644
index 00000000..5c52c32f
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs
@@ -0,0 +1,203 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class QuantCommon
+ {
+ public const int MinQ = 0;
+ public const int MaxQ = 255;
+
+ private static readonly short[] DcQlookup = new short[]
+ {
+ 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
+ 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
+ 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
+ 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
+ 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
+ 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
+ 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
+ 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
+ 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
+ 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
+ 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
+ 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
+ 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
+ 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
+ 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
+ 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
+ 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
+ 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
+ 1184, 1232, 1282, 1336,
+ };
+
+ private static readonly short[] DcQlookup10 = new short[]
+ {
+ 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
+ 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
+ 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
+ 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
+ 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
+ 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
+ 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
+ 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
+ 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
+ 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
+ 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
+ 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
+ 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
+ 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
+ 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
+ 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
+ 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
+ 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
+ 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
+ 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
+ };
+
+ private static readonly short[] DcQlookup12 = new short[]
+ {
+ 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
+ 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
+ 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
+ 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
+ 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
+ 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
+ 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
+ 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
+ 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
+ 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
+ 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
+ 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
+ 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
+ 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
+ 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
+ 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
+ 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
+ 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
+ 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
+ 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
+ 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
+ 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
+ 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
+ 19718, 20521, 21387,
+ };
+
+ private static readonly short[] AcQlookup = new short[]
+ {
+ 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+ 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
+ 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+ 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
+ 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
+ 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
+ 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
+ 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
+ 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
+ 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
+ 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
+ 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
+ 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
+ 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
+ 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+ };
+
+ private static readonly short[] AcQlookup10 = new short[]
+ {
+ 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
+ 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
+ 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
+ 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
+ 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
+ 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
+ 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
+ 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
+ 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
+ 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
+ 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
+ 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
+ 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
+ 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
+ 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
+ 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
+ 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
+ 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
+ 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
+ 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
+ };
+
+ private static readonly short[] AcQlookup12 = new short[]
+ {
+ 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
+ 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
+ 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
+ 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
+ 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
+ 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
+ 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
+ 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
+ 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
+ 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
+ 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
+ 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
+ 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
+ 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
+ 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
+ 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
+ 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
+ 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
+ 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
+ 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
+ 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
+ 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
+ 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
+ 28143, 28687, 29247,
+ };
+
+ public static short DcQuant(int qindex, int delta, BitDepth bitDepth)
+ {
+ switch (bitDepth)
+ {
+ case BitDepth.Bits8: return DcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
+ case BitDepth.Bits10: return DcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
+ case BitDepth.Bits12: return DcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
+ default:
+ Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+ }
+
+ public static short AcQuant(int qindex, int delta, BitDepth bitDepth)
+ {
+ switch (bitDepth)
+ {
+ case BitDepth.Bits8: return AcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)];
+ case BitDepth.Bits10: return AcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)];
+ case BitDepth.Bits12: return AcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)];
+ default:
+ Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+ }
+
+ public static int GetQIndex(ref Segmentation seg, int segmentId, int baseQIndex)
+ {
+ if (seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlAltQ) != 0)
+ {
+ int data = seg.GetSegData(segmentId, SegLvlFeatures.SegLvlAltQ);
+ int segQIndex = seg.AbsDelta == Constants.SegmentAbsData ? data : baseQIndex + data;
+ return Math.Clamp(segQIndex, 0, MaxQ);
+ }
+ else
+ {
+ return baseQIndex;
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs b/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs
new file mode 100644
index 00000000..a4c295e5
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs
@@ -0,0 +1,234 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class ReconInter
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static unsafe void InterPredictor(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ int subpelX,
+ int subpelY,
+ ref ScaleFactors sf,
+ int w,
+ int h,
+ int refr,
+ Array8<short>[] kernel,
+ int xs,
+ int ys)
+ {
+ sf.InterPredict(
+ subpelX != 0 ? 1 : 0,
+ subpelY != 0 ? 1 : 0,
+ refr,
+ src,
+ srcStride,
+ dst,
+ dstStride,
+ subpelX,
+ subpelY,
+ w,
+ h,
+ kernel,
+ xs,
+ ys);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static unsafe void HighbdInterPredictor(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ int subpelX,
+ int subpelY,
+ ref ScaleFactors sf,
+ int w,
+ int h,
+ int refr,
+ Array8<short>[] kernel,
+ int xs,
+ int ys,
+ int bd)
+ {
+ sf.HighbdInterPredict(
+ subpelX != 0 ? 1 : 0,
+ subpelY != 0 ? 1 : 0,
+ refr,
+ src,
+ srcStride,
+ dst,
+ dstStride,
+ subpelX,
+ subpelY,
+ w,
+ h,
+ kernel,
+ xs,
+ ys,
+ bd);
+ }
+
+ private static int RoundMvCompQ4(int value)
+ {
+ return (value < 0 ? value - 2 : value + 2) / 4;
+ }
+
+ private static Mv MiMvPredQ4(ref ModeInfo mi, int idx)
+ {
+ Mv res = new Mv()
+ {
+ Row = (short)RoundMvCompQ4(
+ mi.Bmi[0].Mv[idx].Row + mi.Bmi[1].Mv[idx].Row +
+ mi.Bmi[2].Mv[idx].Row + mi.Bmi[3].Mv[idx].Row),
+ Col = (short)RoundMvCompQ4(
+ mi.Bmi[0].Mv[idx].Col + mi.Bmi[1].Mv[idx].Col +
+ mi.Bmi[2].Mv[idx].Col + mi.Bmi[3].Mv[idx].Col)
+ };
+ return res;
+ }
+
+ private static int RoundMvCompQ2(int value)
+ {
+ return (value < 0 ? value - 1 : value + 1) / 2;
+ }
+
+ private static Mv MiMvPredQ2(ref ModeInfo mi, int idx, int block0, int block1)
+ {
+ Mv res = new Mv()
+ {
+ Row = (short)RoundMvCompQ2(
+ mi.Bmi[block0].Mv[idx].Row +
+ mi.Bmi[block1].Mv[idx].Row),
+ Col = (short)RoundMvCompQ2(
+ mi.Bmi[block0].Mv[idx].Col +
+ mi.Bmi[block1].Mv[idx].Col)
+ };
+ return res;
+ }
+
+ public static Mv ClampMvToUmvBorderSb(ref MacroBlockD xd, ref Mv srcMv, int bw, int bh, int ssX, int ssY)
+ {
+ // If the MV points so far into the UMV border that no visible pixels
+ // are used for reconstruction, the subpel part of the MV can be
+ // discarded and the MV limited to 16 pixels with equivalent results.
+ int spelLeft = (Constants.Vp9InterpExtend + bw) << SubpelBits;
+ int spelRight = spelLeft - SubpelShifts;
+ int spelTop = (Constants.Vp9InterpExtend + bh) << SubpelBits;
+ int spelBottom = spelTop - SubpelShifts;
+ Mv clampedMv = new Mv()
+ {
+ Row = (short)(srcMv.Row * (1 << (1 - ssY))),
+ Col = (short)(srcMv.Col * (1 << (1 - ssX)))
+ };
+
+ Debug.Assert(ssX <= 1);
+ Debug.Assert(ssY <= 1);
+
+ clampedMv.ClampMv(
+ xd.MbToLeftEdge * (1 << (1 - ssX)) - spelLeft,
+ xd.MbToRightEdge * (1 << (1 - ssX)) + spelRight,
+ xd.MbToTopEdge * (1 << (1 - ssY)) - spelTop,
+ xd.MbToBottomEdge * (1 << (1 - ssY)) + spelBottom);
+
+ return clampedMv;
+ }
+
+ public static Mv AverageSplitMvs(ref MacroBlockDPlane pd, ref ModeInfo mi, int refr, int block)
+ {
+ int ssIdx = ((pd.SubsamplingX > 0 ? 1 : 0) << 1) | (pd.SubsamplingY > 0 ? 1 : 0);
+ Mv res = new Mv();
+ switch (ssIdx)
+ {
+ case 0: res = mi.Bmi[block].Mv[refr]; break;
+ case 1: res = MiMvPredQ2(ref mi, refr, block, block + 2); break;
+ case 2: res = MiMvPredQ2(ref mi, refr, block, block + 1); break;
+ case 3: res = MiMvPredQ4(ref mi, refr); break;
+ default: Debug.Assert(ssIdx <= 3 && ssIdx >= 0); break;
+ }
+ return res;
+ }
+
+ private static int ScaledBufferOffset(int xOffset, int yOffset, int stride, Ptr<ScaleFactors> sf)
+ {
+ int x = !sf.IsNull ? sf.Value.ScaleValueX(xOffset) : xOffset;
+ int y = !sf.IsNull ? sf.Value.ScaleValueY(yOffset) : yOffset;
+ return y * stride + x;
+ }
+
+ private static void SetupPredPlanes(
+ ref Buf2D dst,
+ ArrayPtr<byte> src,
+ int stride,
+ int miRow,
+ int miCol,
+ Ptr<ScaleFactors> scale,
+ int subsamplingX,
+ int subsamplingY)
+ {
+ int x = (Constants.MiSize * miCol) >> subsamplingX;
+ int y = (Constants.MiSize * miRow) >> subsamplingY;
+ dst.Buf = src.Slice(ScaledBufferOffset(x, y, stride, scale));
+ dst.Stride = stride;
+ }
+
+ public static void SetupDstPlanes(
+ ref Array3<MacroBlockDPlane> planes,
+ ref Surface src,
+ int miRow,
+ int miCol)
+ {
+ Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
+ buffers[0] = src.YBuffer;
+ buffers[1] = src.UBuffer;
+ buffers[2] = src.VBuffer;
+ Span<int> strides = stackalloc int[Constants.MaxMbPlane];
+ strides[0] = src.Stride;
+ strides[1] = src.UvStride;
+ strides[2] = src.UvStride;
+ int i;
+
+ for (i = 0; i < Constants.MaxMbPlane; ++i)
+ {
+ ref MacroBlockDPlane pd = ref planes[i];
+ SetupPredPlanes(ref pd.Dst, buffers[i], strides[i], miRow, miCol, Ptr<ScaleFactors>.Null, pd.SubsamplingX, pd.SubsamplingY);
+ }
+ }
+
+ public static void SetupPrePlanes(
+ ref MacroBlockD xd,
+ int idx,
+ ref Surface src,
+ int miRow,
+ int miCol,
+ Ptr<ScaleFactors> sf)
+ {
+ if (!src.YBuffer.IsNull && !src.UBuffer.IsNull && !src.VBuffer.IsNull)
+ {
+ Span<ArrayPtr<byte>> buffers = stackalloc ArrayPtr<byte>[Constants.MaxMbPlane];
+ buffers[0] = src.YBuffer;
+ buffers[1] = src.UBuffer;
+ buffers[2] = src.VBuffer;
+ Span<int> strides = stackalloc int[Constants.MaxMbPlane];
+ strides[0] = src.Stride;
+ strides[1] = src.UvStride;
+ strides[2] = src.UvStride;
+ int i;
+
+ for (i = 0; i < Constants.MaxMbPlane; ++i)
+ {
+ ref MacroBlockDPlane pd = ref xd.Plane[i];
+ SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX, pd.SubsamplingY);
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs b/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs
new file mode 100644
index 00000000..0e1ddfb3
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs
@@ -0,0 +1,761 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.IntraPred;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal static class ReconIntra
+ {
+ public static readonly TxType[] IntraModeToTxTypeLookup = new TxType[]
+ {
+ TxType.DctDct, // DC
+ TxType.AdstDct, // V
+ TxType.DctAdst, // H
+ TxType.DctDct, // D45
+ TxType.AdstAdst, // D135
+ TxType.AdstDct, // D117
+ TxType.DctAdst, // D153
+ TxType.DctAdst, // D207
+ TxType.AdstDct, // D63
+ TxType.AdstAdst // TM
+ };
+
+ private const int NeedLeft = 1 << 1;
+ private const int NeedAbove = 1 << 2;
+ private const int NeedAboveRight = 1 << 3;
+
+ private static readonly byte[] ExtendModes = new byte[]
+ {
+ NeedAbove | NeedLeft, // DC
+ NeedAbove, // V
+ NeedLeft, // H
+ NeedAboveRight, // D45
+ NeedLeft | NeedAbove, // D135
+ NeedLeft | NeedAbove, // D117
+ NeedLeft | NeedAbove, // D153
+ NeedLeft, // D207
+ NeedAboveRight, // D63
+ NeedLeft | NeedAbove, // TM
+ };
+
+ private unsafe delegate void IntraPredFn(byte* dst, int stride, byte* above, byte* left);
+
+ private static unsafe IntraPredFn[][] _pred = new IntraPredFn[][]
+ {
+ new IntraPredFn[]
+ {
+ null,
+ null,
+ null,
+ null
+ },
+ new IntraPredFn[]
+ {
+ VPredictor4x4,
+ VPredictor8x8,
+ VPredictor16x16,
+ VPredictor32x32
+ },
+ new IntraPredFn[]
+ {
+ HPredictor4x4,
+ HPredictor8x8,
+ HPredictor16x16,
+ HPredictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D45Predictor4x4,
+ D45Predictor8x8,
+ D45Predictor16x16,
+ D45Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D135Predictor4x4,
+ D135Predictor8x8,
+ D135Predictor16x16,
+ D135Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D117Predictor4x4,
+ D117Predictor8x8,
+ D117Predictor16x16,
+ D117Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D153Predictor4x4,
+ D153Predictor8x8,
+ D153Predictor16x16,
+ D153Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D207Predictor4x4,
+ D207Predictor8x8,
+ D207Predictor16x16,
+ D207Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ D63Predictor4x4,
+ D63Predictor8x8,
+ D63Predictor16x16,
+ D63Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ TMPredictor4x4,
+ TMPredictor8x8,
+ TMPredictor16x16,
+ TMPredictor32x32
+ }
+ };
+
+ private static unsafe IntraPredFn[][][] _dcPred = new IntraPredFn[][][]
+ {
+ new IntraPredFn[][]
+ {
+ new IntraPredFn[]
+ {
+ Dc128Predictor4x4,
+ Dc128Predictor8x8,
+ Dc128Predictor16x16,
+ Dc128Predictor32x32
+ },
+ new IntraPredFn[]
+ {
+ DcTopPredictor4x4,
+ DcTopPredictor8x8,
+ DcTopPredictor16x16,
+ DcTopPredictor32x32
+ }
+ },
+ new IntraPredFn[][]
+ {
+ new IntraPredFn[]
+ {
+ DcLeftPredictor4x4,
+ DcLeftPredictor8x8,
+ DcLeftPredictor16x16,
+ DcLeftPredictor32x32
+ },
+ new IntraPredFn[]
+ {
+ DcPredictor4x4,
+ DcPredictor8x8,
+ DcPredictor16x16,
+ DcPredictor32x32
+ }
+ }
+ };
+
+ private unsafe delegate void IntraHighPredFn(ushort* dst, int stride, ushort* above, ushort* left, int bd);
+
+ private static unsafe IntraHighPredFn[][] _predHigh = new IntraHighPredFn[][]
+ {
+ new IntraHighPredFn[]
+ {
+ null,
+ null,
+ null,
+ null
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdVPredictor4x4,
+ HighbdVPredictor8x8,
+ HighbdVPredictor16x16,
+ HighbdVPredictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdHPredictor4x4,
+ HighbdHPredictor8x8,
+ HighbdHPredictor16x16,
+ HighbdHPredictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD45Predictor4x4,
+ HighbdD45Predictor8x8,
+ HighbdD45Predictor16x16,
+ HighbdD45Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD135Predictor4x4,
+ HighbdD135Predictor8x8,
+ HighbdD135Predictor16x16,
+ HighbdD135Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD117Predictor4x4,
+ HighbdD117Predictor8x8,
+ HighbdD117Predictor16x16,
+ HighbdD117Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD153Predictor4x4,
+ HighbdD153Predictor8x8,
+ HighbdD153Predictor16x16,
+ HighbdD153Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD207Predictor4x4,
+ HighbdD207Predictor8x8,
+ HighbdD207Predictor16x16,
+ HighbdD207Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdD63Predictor4x4,
+ HighbdD63Predictor8x8,
+ HighbdD63Predictor16x16,
+ HighbdD63Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdTMPredictor4x4,
+ HighbdTMPredictor8x8,
+ HighbdTMPredictor16x16,
+ HighbdTMPredictor32x32
+ }
+ };
+
+ private static unsafe IntraHighPredFn[][][] _dcPredHigh = new IntraHighPredFn[][][]
+ {
+ new IntraHighPredFn[][]
+ {
+ new IntraHighPredFn[]
+ {
+ HighbdDc128Predictor4x4,
+ HighbdDc128Predictor8x8,
+ HighbdDc128Predictor16x16,
+ HighbdDc128Predictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdDcTopPredictor4x4,
+ HighbdDcTopPredictor8x8,
+ HighbdDcTopPredictor16x16,
+ HighbdDcTopPredictor32x32
+ }
+ },
+ new IntraHighPredFn[][]
+ {
+ new IntraHighPredFn[]
+ {
+ HighbdDcLeftPredictor4x4,
+ HighbdDcLeftPredictor8x8,
+ HighbdDcLeftPredictor16x16,
+ HighbdDcLeftPredictor32x32
+ },
+ new IntraHighPredFn[]
+ {
+ HighbdDcPredictor4x4,
+ HighbdDcPredictor8x8,
+ HighbdDcPredictor16x16,
+ HighbdDcPredictor32x32
+ }
+ }
+ };
+
+ private static unsafe void BuildIntraPredictorsHigh(
+ ref MacroBlockD xd,
+ byte* ref8,
+ int refStride,
+ byte* dst8,
+ int dstStride,
+ PredictionMode mode,
+ TxSize txSize,
+ int upAvailable,
+ int leftAvailable,
+ int rightAvailable,
+ int x,
+ int y,
+ int plane)
+ {
+ int i;
+ ushort* dst = (ushort*)dst8;
+ ushort* refr = (ushort*)ref8;
+ ushort* leftCol = stackalloc ushort[32];
+ ushort* aboveData = stackalloc ushort[64 + 16];
+ ushort* aboveRow = aboveData + 16;
+ ushort* constAboveRow = aboveRow;
+ int bs = 4 << (int)txSize;
+ int frameWidth, frameHeight;
+ int x0, y0;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+ int needLeft = ExtendModes[(int)mode] & NeedLeft;
+ int needAbove = ExtendModes[(int)mode] & NeedAbove;
+ int needAboveRight = ExtendModes[(int)mode] & NeedAboveRight;
+ int baseVal = 128 << (xd.Bd - 8);
+ // 127 127 127 .. 127 127 127 127 127 127
+ // 129 A B .. Y Z
+ // 129 C D .. W X
+ // 129 E F .. U V
+ // 129 G H .. S T T T T T
+ // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
+
+ // Get current frame pointer, width and height.
+ if (plane == 0)
+ {
+ frameWidth = xd.CurBuf.Width;
+ frameHeight = xd.CurBuf.Height;
+ }
+ else
+ {
+ frameWidth = xd.CurBuf.UvWidth;
+ frameHeight = xd.CurBuf.UvHeight;
+ }
+
+ // Get block position in current frame.
+ x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
+ y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
+
+ // NEED_LEFT
+ if (needLeft != 0)
+ {
+ if (leftAvailable != 0)
+ {
+ if (xd.MbToBottomEdge < 0)
+ {
+ /* slower path if the block needs border extension */
+ if (y0 + bs <= frameHeight)
+ {
+ for (i = 0; i < bs; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+ }
+ else
+ {
+ int extendBottom = frameHeight - y0;
+ for (i = 0; i < extendBottom; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+
+ for (; i < bs; ++i)
+ {
+ leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
+ }
+ }
+ }
+ else
+ {
+ /* faster path if the block does not need extension */
+ for (i = 0; i < bs; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+ }
+ }
+ else
+ {
+ MemoryUtil.Fill(leftCol, (ushort)(baseVal + 1), bs);
+ }
+ }
+
+ // NEED_ABOVE
+ if (needAbove != 0)
+ {
+ if (upAvailable != 0)
+ {
+ ushort* aboveRef = refr - refStride;
+ if (xd.MbToRightEdge < 0)
+ {
+ /* slower path if the block needs border extension */
+ if (x0 + bs <= frameWidth)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ }
+ else if (x0 <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
+ }
+ }
+ else
+ {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
+ {
+ constAboveRow = aboveRef;
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ }
+ }
+ aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs);
+ aboveRow[-1] = (ushort)(baseVal - 1);
+ }
+ }
+
+ // NEED_ABOVERIGHT
+ if (needAboveRight != 0)
+ {
+ if (upAvailable != 0)
+ {
+ ushort* aboveRef = refr - refStride;
+ if (xd.MbToRightEdge < 0)
+ {
+ /* slower path if the block needs border extension */
+ if (x0 + 2 * bs <= frameWidth)
+ {
+ if (rightAvailable != 0 && bs == 4)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+ }
+ else if (x0 + bs <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ if (rightAvailable != 0 && bs == 4)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+ }
+ else if (x0 <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
+ }
+ aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
+ }
+ else
+ {
+ /* faster path if the block does not need extension */
+ if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
+ {
+ constAboveRow = aboveRef;
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ if (bs == 4 && rightAvailable != 0)
+ {
+ MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+
+ aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1);
+ }
+ }
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs * 2);
+ aboveRow[-1] = (ushort)(baseVal - 1);
+ }
+ }
+
+ // Predict
+ if (mode == PredictionMode.DcPred)
+ {
+ _dcPredHigh[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
+ }
+ else
+ {
+ _predHigh[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd);
+ }
+ }
+
+ public static unsafe void BuildIntraPredictors(
+ ref MacroBlockD xd,
+ byte* refr,
+ int refStride,
+ byte* dst,
+ int dstStride,
+ PredictionMode mode,
+ TxSize txSize,
+ int upAvailable,
+ int leftAvailable,
+ int rightAvailable,
+ int x,
+ int y,
+ int plane)
+ {
+ int i;
+ byte* leftCol = stackalloc byte[32];
+ byte* aboveData = stackalloc byte[64 + 16];
+ byte* aboveRow = aboveData + 16;
+ byte* constAboveRow = aboveRow;
+ int bs = 4 << (int)txSize;
+ int frameWidth, frameHeight;
+ int x0, y0;
+ ref MacroBlockDPlane pd = ref xd.Plane[plane];
+
+ // 127 127 127 .. 127 127 127 127 127 127
+ // 129 A B .. Y Z
+ // 129 C D .. W X
+ // 129 E F .. U V
+ // 129 G H .. S T T T T T
+ // ..
+
+ // Get current frame pointer, width and height.
+ if (plane == 0)
+ {
+ frameWidth = xd.CurBuf.Width;
+ frameHeight = xd.CurBuf.Height;
+ }
+ else
+ {
+ frameWidth = xd.CurBuf.UvWidth;
+ frameHeight = xd.CurBuf.UvHeight;
+ }
+
+ // Get block position in current frame.
+ x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x;
+ y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y;
+
+ // NEED_LEFT
+ if ((ExtendModes[(int)mode] & NeedLeft) != 0)
+ {
+ if (leftAvailable != 0)
+ {
+ if (xd.MbToBottomEdge < 0)
+ {
+ /* Slower path if the block needs border extension */
+ if (y0 + bs <= frameHeight)
+ {
+ for (i = 0; i < bs; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+ }
+ else
+ {
+ int extendBottom = frameHeight - y0;
+ for (i = 0; i < extendBottom; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+
+ for (; i < bs; ++i)
+ {
+ leftCol[i] = refr[(extendBottom - 1) * refStride - 1];
+ }
+ }
+ }
+ else
+ {
+ /* Faster path if the block does not need extension */
+ for (i = 0; i < bs; ++i)
+ {
+ leftCol[i] = refr[i * refStride - 1];
+ }
+ }
+ }
+ else
+ {
+ MemoryUtil.Fill(leftCol, (byte)129, bs);
+ }
+ }
+
+ // NEED_ABOVE
+ if ((ExtendModes[(int)mode] & NeedAbove) != 0)
+ {
+ if (upAvailable != 0)
+ {
+ byte* aboveRef = refr - refStride;
+ if (xd.MbToRightEdge < 0)
+ {
+ /* Slower path if the block needs border extension */
+ if (x0 + bs <= frameWidth)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ }
+ else if (x0 <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth);
+ }
+ }
+ else
+ {
+ /* Faster path if the block does not need extension */
+ if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
+ {
+ constAboveRow = aboveRef;
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ }
+ }
+ aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow, (byte)127, bs);
+ aboveRow[-1] = 127;
+ }
+ }
+
+ // NEED_ABOVERIGHT
+ if ((ExtendModes[(int)mode] & NeedAboveRight) != 0)
+ {
+ if (upAvailable != 0)
+ {
+ byte* aboveRef = refr - refStride;
+ if (xd.MbToRightEdge < 0)
+ {
+ /* Slower path if the block needs border extension */
+ if (x0 + 2 * bs <= frameWidth)
+ {
+ if (rightAvailable != 0 && bs == 4)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs);
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+ }
+ else if (x0 + bs <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ if (rightAvailable != 0 && bs == 4)
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+ }
+ else if (x0 <= frameWidth)
+ {
+ int r = frameWidth - x0;
+ MemoryUtil.Copy(aboveRow, aboveRef, r);
+ MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth);
+ }
+ }
+ else
+ {
+ /* Faster path if the block does not need extension */
+ if (bs == 4 && rightAvailable != 0 && leftAvailable != 0)
+ {
+ constAboveRow = aboveRef;
+ }
+ else
+ {
+ MemoryUtil.Copy(aboveRow, aboveRef, bs);
+ if (bs == 4 && rightAvailable != 0)
+ {
+ MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs);
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs);
+ }
+ }
+ }
+ aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129;
+ }
+ else
+ {
+ MemoryUtil.Fill(aboveRow, (byte)127, bs * 2);
+ aboveRow[-1] = 127;
+ }
+ }
+
+ // Predict
+ if (mode == PredictionMode.DcPred)
+ {
+ _dcPred[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol);
+ }
+ else
+ {
+ _pred[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol);
+ }
+ }
+
+ public static unsafe void PredictIntraBlock(
+ ref MacroBlockD xd,
+ int bwlIn,
+ TxSize txSize,
+ PredictionMode mode,
+ byte* refr,
+ int refStride,
+ byte* dst,
+ int dstStride,
+ int aoff,
+ int loff,
+ int plane)
+ {
+ int bw = 1 << bwlIn;
+ int txw = 1 << (int)txSize;
+ int haveTop = loff != 0 || !xd.AboveMi.IsNull ? 1 : 0;
+ int haveLeft = aoff != 0 || !xd.LeftMi.IsNull ? 1 : 0;
+ int haveRight = (aoff + txw) < bw ? 1 : 0;
+ int x = aoff * 4;
+ int y = loff * 4;
+
+ if (xd.CurBuf.HighBd)
+ {
+ BuildIntraPredictorsHigh(
+ ref xd,
+ refr,
+ refStride,
+ dst,
+ dstStride,
+ mode,
+ txSize,
+ haveTop,
+ haveLeft,
+ haveRight,
+ x,
+ y,
+ plane);
+ return;
+ }
+ BuildIntraPredictors(
+ ref xd,
+ refr,
+ refStride,
+ dst,
+ dstStride,
+ mode,
+ txSize,
+ haveTop,
+ haveLeft,
+ haveRight,
+ x,
+ y,
+ plane);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj b/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj
new file mode 100644
index 00000000..8fb9d435
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj
@@ -0,0 +1,20 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs b/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs
new file mode 100644
index 00000000..3b60889b
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs
@@ -0,0 +1,10 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal struct TileBuffer
+ {
+ public ArrayPtr<byte> Data;
+ public int Size;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs b/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs
new file mode 100644
index 00000000..2a483702
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Dsp;
+using Ryujinx.Graphics.Nvdec.Vp9.Types;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9
+{
+ internal struct TileWorkerData
+ {
+ public Reader BitReader;
+ public MacroBlockD Xd;
+ /* dqcoeff are shared by all the planes. So planes must be decoded serially */
+ public Array32<Array32<int>> Dqcoeff;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs
new file mode 100644
index 00000000..9e1cd8b4
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs
@@ -0,0 +1,10 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct BModeInfo
+ {
+ public PredictionMode Mode;
+ public Array2<Mv> Mv; // First, second inter predictor motion vectors
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs
new file mode 100644
index 00000000..22a48e20
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum BlockSize
+ {
+ Block4x4 = 0,
+ Block4x8 = 1,
+ Block8x4 = 2,
+ Block8x8 = 3,
+ Block8x16 = 4,
+ Block16x8 = 5,
+ Block16x16 = 6,
+ Block16x32 = 7,
+ Block32x16 = 8,
+ Block32x32 = 9,
+ Block32x64 = 10,
+ Block64x32 = 11,
+ Block64x64 = 12,
+ BlockSizes = 13,
+ BlockInvalid = BlockSizes
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs
new file mode 100644
index 00000000..180d5e34
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs
@@ -0,0 +1,10 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Buf2D
+ {
+ public ArrayPtr<byte> Buf;
+ public int Stride;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs
new file mode 100644
index 00000000..a783999e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum FrameType
+ {
+ KeyFrame = 0,
+ InterFrame = 1
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs
new file mode 100644
index 00000000..8dc33bda
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs
@@ -0,0 +1,27 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct LoopFilter
+ {
+ public int FilterLevel;
+ public int LastFiltLevel;
+
+ public int SharpnessLevel;
+ public int LastSharpnessLevel;
+
+ public bool ModeRefDeltaEnabled;
+ public bool ModeRefDeltaUpdate;
+
+ // 0 = Intra, Last, GF, ARF
+ public Array4<sbyte> RefDeltas;
+ public Array4<sbyte> LastRefDeltas;
+
+ // 0 = ZERO_MV, MV
+ public Array2<sbyte> ModeDeltas;
+ public Array2<sbyte> LastModeDeltas;
+
+ public ArrayPtr<LoopFilterMask> Lfm;
+ public int LfmStride;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs
new file mode 100644
index 00000000..0ac38a7b
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs
@@ -0,0 +1,10 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct LoopFilterInfoN
+ {
+ public Array64<LoopFilterThresh> Lfthr;
+ public Array8<Array4<Array2<byte>>> Lvl;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs
new file mode 100644
index 00000000..4aff843a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs
@@ -0,0 +1,24 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ // This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+ // Each 1 bit represents a position in which we want to apply the loop filter.
+ // Left_ entries refer to whether we apply a filter on the border to the
+ // left of the block. Above_ entries refer to whether or not to apply a
+ // filter on the above border. Int_ entries refer to whether or not to
+ // apply borders on the 4x4 edges within the 8x8 block that each bit
+ // represents.
+ // Since each transform is accompanied by a potentially different type of
+ // loop filter there is a different entry in the array for each transform size.
+ internal struct LoopFilterMask
+ {
+ public Array4<ulong> LeftY;
+ public Array4<ulong> AboveY;
+ public ulong Int4x4Y;
+ public Array4<ushort> LeftUv;
+ public Array4<ushort> AboveUv;
+ public ushort Int4x4Uv;
+ public Array64<byte> LflY;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs
new file mode 100644
index 00000000..bea1d115
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs
@@ -0,0 +1,13 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ // Need to align this structure so when it is declared and
+ // passed it can be loaded into vector registers.
+ internal struct LoopFilterThresh
+ {
+ public Array16<byte> Mblim;
+ public Array16<byte> Lim;
+ public Array16<byte> HevThr;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs
new file mode 100644
index 00000000..f1111528
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs
@@ -0,0 +1,179 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct MacroBlockD
+ {
+ public Array3<MacroBlockDPlane> Plane;
+ public byte BmodeBlocksWl;
+ public byte BmodeBlocksHl;
+
+ public Ptr<Vp9BackwardUpdates> Counts;
+ public TileInfo Tile;
+
+ public int MiStride;
+
+ // Grid of 8x8 cells is placed over the block.
+ // If some of them belong to the same mbtree-block
+ // they will just have same mi[i][j] value
+ public ArrayPtr<Ptr<ModeInfo>> Mi;
+ public Ptr<ModeInfo> LeftMi;
+ public Ptr<ModeInfo> AboveMi;
+
+ public uint MaxBlocksWide;
+ public uint MaxBlocksHigh;
+
+ public ArrayPtr<Array3<byte>> PartitionProbs;
+
+ /* Distance of MB away from frame edges */
+ public int MbToLeftEdge;
+ public int MbToRightEdge;
+ public int MbToTopEdge;
+ public int MbToBottomEdge;
+
+ public Ptr<Vp9EntropyProbs> Fc;
+
+ /* pointers to reference frames */
+ public Array2<Ptr<RefBuffer>> BlockRefs;
+
+ /* pointer to current frame */
+ public Surface CurBuf;
+
+ public Array3<ArrayPtr<sbyte>> AboveContext;
+ public Array3<Array16<sbyte>> LeftContext;
+
+ public ArrayPtr<sbyte> AboveSegContext;
+ public Array8<sbyte> LeftSegContext;
+
+ /* Bit depth: 8, 10, 12 */
+ public int Bd;
+
+ public bool Lossless;
+ public bool Corrupted;
+
+ public Ptr<InternalErrorInfo> ErrorInfo;
+
+ public int GetPredContextSegId()
+ {
+ sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0;
+ sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0;
+
+ return aboveSip + leftSip;
+ }
+
+ public int GetSkipContext()
+ {
+ int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0;
+ int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0;
+ return aboveSkip + leftSkip;
+ }
+
+ public int GetPredContextSwitchableInterp()
+ {
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters;
+ int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters;
+
+ if (leftType == aboveType)
+ {
+ return leftType;
+ }
+ else if (leftType == Constants.SwitchableFilters)
+ {
+ return aboveType;
+ }
+ else if (aboveType == Constants.SwitchableFilters)
+ {
+ return leftType;
+ }
+ else
+ {
+ return Constants.SwitchableFilters;
+ }
+ }
+
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ // 0 - inter/inter, inter/--, --/inter, --/--
+ // 1 - intra/inter, inter/intra
+ // 2 - intra/--, --/intra
+ // 3 - intra/intra
+ public int GetIntraInterContext()
+ {
+ if (!AboveMi.IsNull && !LeftMi.IsNull)
+ { // Both edges available
+ bool aboveIntra = !AboveMi.Value.IsInterBlock();
+ bool leftIntra = !LeftMi.Value.IsInterBlock();
+ return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0);
+ }
+ else if (!AboveMi.IsNull || !LeftMi.IsNull)
+ { // One edge available
+ return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0);
+ }
+ return 0;
+ }
+
+ // Returns a context number for the given MB prediction signal
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real blocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ public int GetTxSizeContext()
+ {
+ int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType];
+ int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize;
+ int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize;
+ if (LeftMi.IsNull)
+ {
+ leftCtx = aboveCtx;
+ }
+
+ if (AboveMi.IsNull)
+ {
+ aboveCtx = leftCtx;
+ }
+
+ return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0;
+ }
+
+ public void SetupBlockPlanes(int ssX, int ssY)
+ {
+ int i;
+
+ for (i = 0; i < Constants.MaxMbPlane; i++)
+ {
+ Plane[i].SubsamplingX = i != 0 ? ssX : 0;
+ Plane[i].SubsamplingY = i != 0 ? ssY : 0;
+ }
+ }
+
+ public void SetSkipContext(int miRow, int miCol)
+ {
+ int aboveIdx = miCol * 2;
+ int leftIdx = (miRow * 2) & 15;
+ int i;
+ for (i = 0; i < Constants.MaxMbPlane; ++i)
+ {
+ ref MacroBlockDPlane pd = ref Plane[i];
+ pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX);
+ pd.LeftContext = new ArrayPtr<sbyte>(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY));
+ }
+ }
+
+ internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols)
+ {
+ MbToTopEdge = -((miRow * Constants.MiSize) * 8);
+ MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8;
+ MbToLeftEdge = -((miCol * Constants.MiSize) * 8);
+ MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8;
+
+ // Are edges available for intra prediction?
+ AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr<ModeInfo>.Null;
+ LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr<ModeInfo>.Null;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs
new file mode 100644
index 00000000..ae4ec6f4
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs
@@ -0,0 +1,21 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct MacroBlockDPlane
+ {
+ public ArrayPtr<int> DqCoeff;
+ public int SubsamplingX;
+ public int SubsamplingY;
+ public Buf2D Dst;
+ public Array2<Buf2D> Pre;
+ public ArrayPtr<sbyte> AboveContext;
+ public ArrayPtr<sbyte> LeftContext;
+ public Array8<Array2<short>> SegDequant;
+
+ // Number of 4x4s in current block
+ public ushort N4W, N4H;
+ // Log2 of N4W, N4H
+ public byte N4Wl, N4Hl;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs
new file mode 100644
index 00000000..8ef281d8
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs
@@ -0,0 +1,66 @@
+using Ryujinx.Common.Memory;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct ModeInfo
+ {
+ // Common for both Inter and Intra blocks
+ public BlockSize SbType;
+ public PredictionMode Mode;
+ public TxSize TxSize;
+ public sbyte Skip;
+ public sbyte SegmentId;
+ public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled
+
+ // Only for Intra blocks
+ public PredictionMode UvMode;
+
+ // Only for Inter blocks
+ public byte InterpFilter;
+
+ // if ref_frame[idx] is equal to AltRefFrame then
+ // MacroBlockD.BlockRef[idx] is an altref
+ public Array2<sbyte> RefFrame;
+
+ public Array2<Mv> Mv;
+
+ public Array4<BModeInfo> Bmi;
+
+ public PredictionMode GetYMode(int block)
+ {
+ return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode;
+ }
+
+ public TxSize GetUvTxSize(ref MacroBlockDPlane pd)
+ {
+ Debug.Assert(SbType < BlockSize.Block8x8 ||
+ Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid);
+ return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY];
+ }
+
+ public bool IsInterBlock()
+ {
+ return RefFrame[0] > Constants.IntraFrame;
+ }
+
+ public bool HasSecondRef()
+ {
+ return RefFrame[1] > Constants.IntraFrame;
+ }
+
+ private static readonly int[][] IdxNColumnToSubblock = new int[][]
+ {
+ new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 }
+ };
+
+ // This function returns either the appropriate sub block or block's mv
+ // on whether the block_size < 8x8 and we have check_sub_blocks set.
+ public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx)
+ {
+ return blockIdx >= 0 && SbType < BlockSize.Block8x8
+ ? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv]
+ : Mv[whichMv];
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs
new file mode 100644
index 00000000..319c8dba
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum MotionVectorContext
+ {
+ BothZero = 0,
+ ZeroPlusPredicted = 1,
+ BothPredicted = 2,
+ NewPlusNonIntra = 3,
+ BothNew = 4,
+ IntraPlusNonIntra = 5,
+ BothIntra = 6,
+ InvalidCase = 9
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs
new file mode 100644
index 00000000..c1f99ade
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs
@@ -0,0 +1,189 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Mv
+ {
+ public short Row;
+ public short Col;
+
+ private static readonly byte[] LogInBase2 = new byte[]
+ {
+ 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
+ };
+
+ public bool UseMvHp()
+ {
+ const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv
+ return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh;
+ }
+
+ public static bool MvJointVertical(MvJointType type)
+ {
+ return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz;
+ }
+
+ public static bool MvJointHorizontal(MvJointType type)
+ {
+ return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz;
+ }
+
+ private static int MvClassBase(MvClassType c)
+ {
+ return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0;
+ }
+
+ private static MvClassType GetMvClass(int z, Ptr<int> offset)
+ {
+ MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3];
+ if (!offset.IsNull)
+ {
+ offset.Value = z - MvClassBase(c);
+ }
+
+ return c;
+ }
+
+ private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp)
+ {
+ int s, z, c, o = 0, d, e, f;
+ Debug.Assert(v != 0); /* Should not be zero */
+ s = v < 0 ? 1 : 0;
+ counts.Sign[comp][s] += (uint)incr;
+ z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */
+
+ c = (int)GetMvClass(z, new Ptr<int>(ref o));
+ counts.Classes[comp][c] += (uint)incr;
+
+ d = (o >> 3); /* Int mv data */
+ f = (o >> 1) & 3; /* Fractional pel mv data */
+ e = (o & 1); /* High precision mv data */
+
+ if (c == (int)MvClassType.MvClass0)
+ {
+ counts.Class0[comp][d] += (uint)incr;
+ counts.Class0Fp[comp][d][f] += (uint)incr;
+ counts.Class0Hp[comp][e] += (uint)(usehp * incr);
+ }
+ else
+ {
+ int i;
+ int b = c + Constants.Class0Bits - 1; // Number of bits
+ for (i = 0; i < b; ++i)
+ {
+ counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr;
+ }
+
+ counts.Fp[comp][f] += (uint)incr;
+ counts.Hp[comp][e] += (uint)(usehp * incr);
+ }
+ }
+
+ private MvJointType GetMvJoint()
+ {
+ if (Row == 0)
+ {
+ return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz;
+ }
+ else
+ {
+ return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz;
+ }
+ }
+
+ internal void IncMv(Ptr<Vp9BackwardUpdates> counts)
+ {
+ if (!counts.IsNull)
+ {
+ MvJointType j = GetMvJoint();
+ ++counts.Value.Joints[(int)j];
+
+ if (MvJointVertical(j))
+ {
+ IncMvComponent(Row, ref counts.Value, 0, 1, 1);
+ }
+
+ if (MvJointHorizontal(j))
+ {
+ IncMvComponent(Col, ref counts.Value, 1, 1, 1);
+ }
+ }
+ }
+
+ public void ClampMv(int minCol, int maxCol, int minRow, int maxRow)
+ {
+ Col = (short)Math.Clamp(Col, minCol, maxCol);
+ Row = (short)Math.Clamp(Row, minRow, maxRow);
+ }
+
+ private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units
+
+ public void ClampMvRef(ref MacroBlockD xd)
+ {
+ ClampMv(
+ xd.MbToLeftEdge - MvBorder,
+ xd.MbToRightEdge + MvBorder,
+ xd.MbToTopEdge - MvBorder,
+ xd.MbToBottomEdge + MvBorder);
+ }
+
+ public void LowerMvPrecision(bool allowHP)
+ {
+ bool useHP = allowHP && UseMvHp();
+ if (!useHP)
+ {
+ if ((Row & 1) != 0)
+ {
+ Row += (short)(Row > 0 ? -1 : 1);
+ }
+
+ if ((Col & 1) != 0)
+ {
+ Col += (short)(Col > 0 ? -1 : 1);
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs
new file mode 100644
index 00000000..fb25d18e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Mv32
+ {
+ public int Row;
+ public int Col;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs
new file mode 100644
index 00000000..68a0b59a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum MvClassType
+ {
+ MvClass0 = 0, /* (0, 2] integer pel */
+ MvClass1 = 1, /* (2, 4] integer pel */
+ MvClass2 = 2, /* (4, 8] integer pel */
+ MvClass3 = 3, /* (8, 16] integer pel */
+ MvClass4 = 4, /* (16, 32] integer pel */
+ MvClass5 = 5, /* (32, 64] integer pel */
+ MvClass6 = 6, /* (64, 128] integer pel */
+ MvClass7 = 7, /* (128, 256] integer pel */
+ MvClass8 = 8, /* (256, 512] integer pel */
+ MvClass9 = 9, /* (512, 1024] integer pel */
+ MvClass10 = 10, /* (1024,2048] integer pel */
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs
new file mode 100644
index 00000000..a20cb6d0
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum MvJointType
+ {
+ MvJointZero = 0, /* Zero vector */
+ MvJointHnzvz = 1, /* Vert zero, hor nonzero */
+ MvJointHzvnz = 2, /* Hor zero, vert nonzero */
+ MvJointHnzvnz = 3, /* Both components nonzero */
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs
new file mode 100644
index 00000000..71949a09
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs
@@ -0,0 +1,10 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct MvRef
+ {
+ public Array2<Mv> Mv;
+ public Array2<sbyte> RefFrame;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs
new file mode 100644
index 00000000..096f9818
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum PartitionType
+ {
+ PartitionNone,
+ PartitionHorz,
+ PartitionVert,
+ PartitionSplit,
+ PartitionTypes,
+ PartitionInvalid = PartitionTypes
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs
new file mode 100644
index 00000000..790aa2a0
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum PlaneType
+ {
+ Y = 0,
+ Uv = 1,
+ PlaneTypes
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs
new file mode 100644
index 00000000..0d3b56f6
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Position
+ {
+ public int Row;
+ public int Col;
+
+ public Position(int row, int col)
+ {
+ Row = row;
+ Col = col;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs
new file mode 100644
index 00000000..bbb9be9a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum PredictionMode
+ {
+ DcPred = 0, // Average of above and left pixels
+ VPred = 1, // Vertical
+ HPred = 2, // Horizontal
+ D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi)
+ D135Pred = 4, // Directional 135 deg = 180 - 45
+ D117Pred = 5, // Directional 117 deg = 180 - 63
+ D153Pred = 6, // Directional 153 deg = 180 - 27
+ D207Pred = 7, // Directional 207 deg = 180 + 27
+ D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi)
+ TmPred = 9, // True-motion
+ NearestMv = 10,
+ NearMv = 11,
+ ZeroMv = 12,
+ NewMv = 13,
+ MbModeCount = 14
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs
new file mode 100644
index 00000000..9942dd05
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct RefBuffer
+ {
+ public Surface Buf;
+ public ScaleFactors Sf;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs
new file mode 100644
index 00000000..7cbf9f4e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum ReferenceMode
+ {
+ SingleReference = 0,
+ CompoundReference = 1,
+ ReferenceModeSelect = 2,
+ ReferenceModes = 3
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs
new file mode 100644
index 00000000..970f9680
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs
@@ -0,0 +1,451 @@
+using Ryujinx.Common.Memory;
+using System.Runtime.CompilerServices;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve;
+using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct ScaleFactors
+ {
+ private const int RefScaleShift = 14;
+ private const int RefNoScale = (1 << RefScaleShift);
+ private const int RefInvalidScale = -1;
+
+ private unsafe delegate void ConvolveFn(
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h);
+
+ private unsafe delegate void HighbdConvolveFn(
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ Array8<short>[] filter,
+ int x0Q4,
+ int xStepQ4,
+ int y0Q4,
+ int yStepQ4,
+ int w,
+ int h,
+ int bd);
+
+ private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][]
+ {
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ ConvolveCopy,
+ ConvolveAvg
+ },
+ new ConvolveFn[]
+ {
+ Convolve8Vert,
+ Convolve8AvgVert
+ }
+ },
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ Convolve8Horiz,
+ Convolve8AvgHoriz
+ },
+ new ConvolveFn[]
+ {
+ Convolve8,
+ Convolve8Avg
+ }
+ }
+ };
+
+ private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][]
+ {
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ ScaledVert,
+ ScaledAvgVert
+ },
+ new ConvolveFn[]
+ {
+ ScaledVert,
+ ScaledAvgVert
+ }
+ },
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ },
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ }
+ }
+ };
+
+ private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][]
+ {
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ ScaledHoriz,
+ ScaledAvgHoriz
+ },
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ }
+ },
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ ScaledHoriz,
+ ScaledAvgHoriz
+ },
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ }
+ }
+ };
+
+ private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][]
+ {
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ },
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ }
+ },
+ new ConvolveFn[][]
+ {
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ },
+ new ConvolveFn[]
+ {
+ Scaled2D,
+ ScaledAvg2D
+ }
+ }
+ };
+
+ private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][]
+ {
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolveCopy,
+ HighbdConvolveAvg
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Vert,
+ HighbdConvolve8AvgVert
+ }
+ },
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Horiz,
+ HighbdConvolve8AvgHoriz
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ }
+ };
+
+ private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][]
+ {
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Vert,
+ HighbdConvolve8AvgVert
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Vert,
+ HighbdConvolve8AvgVert
+ }
+ },
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ }
+ };
+
+ private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][]
+ {
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Horiz,
+ HighbdConvolve8AvgHoriz
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ },
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8Horiz,
+ HighbdConvolve8AvgHoriz
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ }
+ };
+
+ private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][]
+ {
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ },
+ new HighbdConvolveFn[][]
+ {
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ },
+ new HighbdConvolveFn[]
+ {
+ HighbdConvolve8,
+ HighbdConvolve8Avg
+ }
+ }
+ };
+
+ public int XScaleFP; // Horizontal fixed point scale factor
+ public int YScaleFP; // Vertical fixed point scale factor
+ public int XStepQ4;
+ public int YStepQ4;
+
+ public int ScaleValueX(int val)
+ {
+ return IsScaled() ? ScaledX(val) : val;
+ }
+
+ public int ScaleValueY(int val)
+ {
+ return IsScaled() ? ScaledY(val) : val;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public unsafe void InterPredict(
+ int horiz,
+ int vert,
+ int avg,
+ byte* src,
+ int srcStride,
+ byte* dst,
+ int dstStride,
+ int subpelX,
+ int subpelY,
+ int w,
+ int h,
+ Array8<short>[] kernel,
+ int xs,
+ int ys)
+ {
+ if (XStepQ4 == 16)
+ {
+ if (YStepQ4 == 16)
+ {
+ // No scaling in either direction.
+ PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
+ }
+ else
+ {
+ // No scaling in x direction. Must always scale in the y direction.
+ PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
+ }
+ }
+ else
+ {
+ if (YStepQ4 == 16)
+ {
+ // No scaling in the y direction. Must always scale in the x direction.
+ PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
+ }
+ else
+ {
+ // Must always scale in both directions.
+ Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h);
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public unsafe void HighbdInterPredict(
+ int horiz,
+ int vert,
+ int avg,
+ ushort* src,
+ int srcStride,
+ ushort* dst,
+ int dstStride,
+ int subpelX,
+ int subpelY,
+ int w,
+ int h,
+ Array8<short>[] kernel,
+ int xs,
+ int ys,
+ int bd)
+ {
+ if (XStepQ4 == 16)
+ {
+ if (YStepQ4 == 16)
+ {
+ // No scaling in either direction.
+ HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
+ }
+ else
+ {
+ // No scaling in x direction. Must always scale in the y direction.
+ HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
+ }
+ }
+ else
+ {
+ if (YStepQ4 == 16)
+ {
+ // No scaling in the y direction. Must always scale in the x direction.
+ HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
+ }
+ else
+ {
+ // Must always scale in both directions.
+ HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd);
+ }
+ }
+ }
+
+ private int ScaledX(int val)
+ {
+ return (int)((long)val * XScaleFP >> RefScaleShift);
+ }
+
+ private int ScaledY(int val)
+ {
+ return (int)((long)val * YScaleFP >> RefScaleShift);
+ }
+
+ private static int GetFixedPointScaleFactor(int otherSize, int thisSize)
+ {
+ // Calculate scaling factor once for each reference frame
+ // and use fixed point scaling factors in decoding and encoding routines.
+ // Hardware implementations can calculate scale factor in device driver
+ // and use multiplication and shifting on hardware instead of division.
+ return (otherSize << RefScaleShift) / thisSize;
+ }
+
+ public Mv32 ScaleMv(ref Mv mv, int x, int y)
+ {
+ int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask;
+ int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask;
+ Mv32 res = new Mv32()
+ {
+ Row = ScaledY(mv.Row) + yOffQ4,
+ Col = ScaledX(mv.Col) + xOffQ4
+ };
+ return res;
+ }
+
+ public bool IsValidScale()
+ {
+ return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale;
+ }
+
+ public bool IsScaled()
+ {
+ return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale);
+ }
+
+ public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight)
+ {
+ return 2 * thisWidth >= refWidth &&
+ 2 * thisHeight >= refHeight &&
+ thisWidth <= 16 * refWidth &&
+ thisHeight <= 16 * refHeight;
+ }
+
+ public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH)
+ {
+ if (!ValidRefFrameSize(otherW, otherH, thisW, thisH))
+ {
+ XScaleFP = RefInvalidScale;
+ YScaleFP = RefInvalidScale;
+ return;
+ }
+
+ XScaleFP = GetFixedPointScaleFactor(otherW, thisW);
+ YScaleFP = GetFixedPointScaleFactor(otherH, thisH);
+ XStepQ4 = ScaledX(16);
+ YStepQ4 = ScaledY(16);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs
new file mode 100644
index 00000000..c3ea3fd8
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum SegLvlFeatures
+ {
+ SegLvlAltQ = 0, // Use alternate Quantizer ....
+ SegLvlAltLf = 1, // Use alternate loop filter value...
+ SegLvlRefFrame = 2, // Optional Segment reference frame
+ SegLvlSkip = 3, // Optional Segment (0,0) + skip mode
+ SegLvlMax = 4 // Number of features supported
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs
new file mode 100644
index 00000000..53d1f2cc
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs
@@ -0,0 +1,71 @@
+using Ryujinx.Common.Memory;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Segmentation
+ {
+ private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 };
+ private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 };
+
+ public bool Enabled;
+ public bool UpdateMap;
+ public byte UpdateData;
+ public byte AbsDelta;
+ public bool TemporalUpdate;
+
+ public Array8<Array4<short>> FeatureData;
+ public Array8<uint> FeatureMask;
+ public int AqAvOffset;
+
+ public static byte GetPredProbSegId(ref Array3<byte> segPredProbs, ref MacroBlockD xd)
+ {
+ return segPredProbs[xd.GetPredContextSegId()];
+ }
+
+ public void ClearAllSegFeatures()
+ {
+ MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0);
+ MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0);
+ AqAvOffset = 0;
+ }
+
+ internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId)
+ {
+ FeatureMask[segmentId] |= 1u << (int)featureId;
+ }
+
+ internal static int FeatureDataMax(SegLvlFeatures featureId)
+ {
+ return SegFeatureDataMax[(int)featureId];
+ }
+
+ internal static int IsSegFeatureSigned(SegLvlFeatures featureId)
+ {
+ return SegFeatureDataSigned[(int)featureId];
+ }
+
+ internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData)
+ {
+ Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]);
+ if (segData < 0)
+ {
+ Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0);
+ Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]);
+ }
+
+ FeatureData[segmentId][(int)featureId] = (short)segData;
+ }
+
+ internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId)
+ {
+ return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0;
+ }
+
+ internal short GetSegData(int segmentId, SegLvlFeatures featureId)
+ {
+ return FeatureData[segmentId][(int)featureId];
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs
new file mode 100644
index 00000000..2b2a173e
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs
@@ -0,0 +1,80 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Surface : ISurface
+ {
+ public ArrayPtr<byte> YBuffer;
+ public ArrayPtr<byte> UBuffer;
+ public ArrayPtr<byte> VBuffer;
+
+ public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length);
+ public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length);
+ public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length);
+
+ public int Width { get; }
+ public int Height { get; }
+ public int AlignedWidth { get; }
+ public int AlignedHeight { get; }
+ public int Stride { get; }
+ public int UvWidth { get; }
+ public int UvHeight { get; }
+ public int UvAlignedWidth { get; }
+ public int UvAlignedHeight { get; }
+ public int UvStride { get; }
+ public bool HighBd => false;
+
+ private readonly IntPtr _pointer;
+
+ public Surface(int width, int height)
+ {
+ const int border = 32;
+ const int ssX = 1;
+ const int ssY = 1;
+ const bool highbd = false;
+
+ int alignedWidth = (width + 7) & ~7;
+ int alignedHeight = (height + 7) & ~7;
+ int yStride = ((alignedWidth + 2 * border) + 31) & ~31;
+ int yplaneSize = (alignedHeight + 2 * border) * yStride;
+ int uvWidth = alignedWidth >> ssX;
+ int uvHeight = alignedHeight >> ssY;
+ int uvStride = yStride >> ssX;
+ int uvBorderW = border >> ssX;
+ int uvBorderH = border >> ssY;
+ int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride;
+
+ int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize);
+
+ IntPtr pointer = Marshal.AllocHGlobal(frameSize);
+ _pointer = pointer;
+ Width = width;
+ Height = height;
+ AlignedWidth = alignedWidth;
+ AlignedHeight = alignedHeight;
+ Stride = yStride;
+ UvWidth = (width + ssX) >> ssX;
+ UvHeight = (height + ssY) >> ssY;
+ UvAlignedWidth = uvWidth;
+ UvAlignedHeight = uvHeight;
+ UvStride = uvStride;
+
+ ArrayPtr<byte> NewPlane(int start, int size, int border)
+ {
+ return new ArrayPtr<byte>(pointer + start + border, size - border);
+ }
+
+ YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border);
+ UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
+ VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW);
+ }
+
+ public void Dispose()
+ {
+ Marshal.FreeHGlobal(_pointer);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs
new file mode 100644
index 00000000..67289c47
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs
@@ -0,0 +1,85 @@
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct TileInfo
+ {
+ private const int MinTileWidthB64 = 4;
+ private const int MaxTileWidthB64 = 64;
+
+ public int MiRowStart, MiRowEnd;
+ public int MiColStart, MiColEnd;
+
+ public static int MiColsAlignedToSb(int nMis)
+ {
+ return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2);
+ }
+
+ private static int GetTileOffset(int idx, int mis, int log2)
+ {
+ int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2;
+ int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2;
+ return Math.Min(offset, mis);
+ }
+
+ public void SetRow(ref Vp9Common cm, int row)
+ {
+ MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows);
+ MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows);
+ }
+
+ public void SetCol(ref Vp9Common cm, int col)
+ {
+ MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols);
+ MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols);
+ }
+
+ public void Init(ref Vp9Common cm, int row, int col)
+ {
+ SetRow(ref cm, row);
+ SetCol(ref cm, col);
+ }
+
+ // Checks that the given miRow, miCol and search point
+ // are inside the borders of the tile.
+ public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos)
+ {
+ return !(miRow + miPos.Row < 0 ||
+ miCol + miPos.Col < MiColStart ||
+ miRow + miPos.Row >= miRows ||
+ miCol + miPos.Col >= MiColEnd);
+ }
+
+ private static int GetMinLog2TileCols(int sb64Cols)
+ {
+ int minLog2 = 0;
+ while ((MaxTileWidthB64 << minLog2) < sb64Cols)
+ {
+ ++minLog2;
+ }
+
+ return minLog2;
+ }
+
+ private static int GetMaxLog2TileCols(int sb64Cols)
+ {
+ int maxLog2 = 1;
+ while ((sb64Cols >> maxLog2) >= MinTileWidthB64)
+ {
+ ++maxLog2;
+ }
+
+ return maxLog2 - 1;
+ }
+
+ public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols)
+ {
+ int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2;
+ minLog2TileCols = GetMinLog2TileCols(sb64Cols);
+ maxLog2TileCols = GetMaxLog2TileCols(sb64Cols);
+ Debug.Assert(minLog2TileCols <= maxLog2TileCols);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs
new file mode 100644
index 00000000..db914525
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ public enum TxMode
+ {
+ Only4X4 = 0, // Only 4x4 transform used
+ Allow8X8 = 1, // Allow block transform size up to 8x8
+ Allow16X16 = 2, // Allow block transform size up to 16x16
+ Allow32X32 = 3, // Allow block transform size up to 32x32
+ TxModeSelect = 4, // Transform specified for each block
+ TxModes = 5
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs
new file mode 100644
index 00000000..994deb2c
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ public enum TxSize
+ {
+ Tx4x4 = 0, // 4x4 transform
+ Tx8x8 = 1, // 8x8 transform
+ Tx16x16 = 2, // 16x16 transform
+ Tx32x32 = 3, // 32x32 transform
+ TxSizes = 4
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs
new file mode 100644
index 00000000..dbf7251c
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal enum TxType
+ {
+ DctDct = 0, // DCT in both horizontal and vertical
+ AdstDct = 1, // ADST in vertical, DCT in horizontal
+ DctAdst = 2, // DCT in vertical, ADST in horizontal
+ AdstAdst = 3, // ADST in both directions
+ TxTypes = 4
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs
new file mode 100644
index 00000000..0dafb820
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs
@@ -0,0 +1,334 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Nvdec.Vp9.Common;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Vp9.Types
+{
+ internal struct Vp9Common
+ {
+ public MacroBlockD Mb;
+
+ public ArrayPtr<TileWorkerData> TileWorkerData;
+
+ public InternalErrorInfo Error;
+
+ public int Width;
+ public int Height;
+
+ public int SubsamplingX;
+ public int SubsamplingY;
+
+ public ArrayPtr<MvRef> PrevFrameMvs;
+ public ArrayPtr<MvRef> CurFrameMvs;
+
+ public Array3<RefBuffer> FrameRefs;
+
+ public FrameType FrameType;
+
+ // Flag signaling that the frame is encoded using only Intra modes.
+ public bool IntraOnly;
+
+ public bool AllowHighPrecisionMv;
+
+ // MBs, MbRows/Cols is in 16-pixel units; MiRows/Cols is in
+ // ModeInfo (8-pixel) units.
+ public int MBs;
+ public int MbRows, MiRows;
+ public int MbCols, MiCols;
+ public int MiStride;
+
+ /* Profile settings */
+ public TxMode TxMode;
+
+ public int BaseQindex;
+ public int YDcDeltaQ;
+ public int UvDcDeltaQ;
+ public int UvAcDeltaQ;
+ public Array8<Array2<short>> YDequant;
+ public Array8<Array2<short>> UvDequant;
+
+ /* We allocate a ModeInfo struct for each macroblock, together with
+ an extra row on top and column on the left to simplify prediction. */
+ public ArrayPtr<ModeInfo> Mip; /* Base of allocated array */
+ public ArrayPtr<ModeInfo> Mi; /* Corresponds to upper left visible macroblock */
+
+ public ArrayPtr<Ptr<ModeInfo>> MiGridBase;
+ public ArrayPtr<Ptr<ModeInfo>> MiGridVisible;
+
+ // Whether to use previous frame's motion vectors for prediction.
+ public bool UsePrevFrameMvs;
+
+ // Persistent mb segment id map used in prediction.
+ public int SegMapIdx;
+ public int PrevSegMapIdx;
+
+ public Array2<ArrayPtr<byte>> SegMapArray;
+ public ArrayPtr<byte> LastFrameSegMap;
+ public ArrayPtr<byte> CurrentFrameSegMap;
+
+ public byte InterpFilter;
+
+ public LoopFilterInfoN LfInfo;
+
+ public Array4<sbyte> RefFrameSignBias; /* Two state 0, 1 */
+
+ public LoopFilter Lf;
+ public Segmentation Seg;
+
+ // Context probabilities for reference frame prediction
+ public sbyte CompFixedRef;
+ public Array2<sbyte> CompVarRef;
+ public ReferenceMode ReferenceMode;
+
+ public Ptr<Vp9EntropyProbs> Fc;
+ public Ptr<Vp9BackwardUpdates> Counts;
+
+ public bool FrameParallelDecodingMode;
+
+ public int Log2TileCols, Log2TileRows;
+
+ public ArrayPtr<sbyte> AboveSegContext;
+ public ArrayPtr<sbyte> AboveContext;
+ public int AboveContextAllocCols;
+
+ public bool FrameIsIntraOnly()
+ {
+ return FrameType == FrameType.KeyFrame || IntraOnly;
+ }
+
+ public bool CompoundReferenceAllowed()
+ {
+ int i;
+ for (i = 1; i < Constants.RefsPerFrame; ++i)
+ {
+ if (RefFrameSignBias[i + 1] != RefFrameSignBias[1])
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private static int CalcMiSize(int len)
+ {
+ // Len is in mi units.
+ return len + Constants.MiBlockSize;
+ }
+
+ public void SetMbMi(int width, int height)
+ {
+ int alignedWidth = BitUtils.AlignPowerOfTwo(width, Constants.MiSizeLog2);
+ int alignedHeight = BitUtils.AlignPowerOfTwo(height, Constants.MiSizeLog2);
+
+ MiCols = alignedWidth >> Constants.MiSizeLog2;
+ MiRows = alignedHeight >> Constants.MiSizeLog2;
+ MiStride = CalcMiSize(MiCols);
+
+ MbCols = (MiCols + 1) >> 1;
+ MbRows = (MiRows + 1) >> 1;
+ MBs = MbRows * MbCols;
+ }
+
+ public void AllocTileWorkerData(MemoryAllocator allocator, int tileCols, int tileRows)
+ {
+ TileWorkerData = allocator.Allocate<TileWorkerData>(tileCols * tileRows);
+ }
+
+ public void FreeTileWorkerData(MemoryAllocator allocator)
+ {
+ allocator.Free(TileWorkerData);
+ }
+
+ private void AllocSegMap(MemoryAllocator allocator, int segMapSize)
+ {
+ int i;
+
+ for (i = 0; i < Constants.NumPingPongBuffers; ++i)
+ {
+ SegMapArray[i] = allocator.Allocate<byte>(segMapSize);
+ }
+
+ // Init the index.
+ SegMapIdx = 0;
+ PrevSegMapIdx = 1;
+
+ CurrentFrameSegMap = SegMapArray[SegMapIdx];
+ LastFrameSegMap = SegMapArray[PrevSegMapIdx];
+ }
+
+ private void FreeSegMap(MemoryAllocator allocator)
+ {
+ int i;
+
+ for (i = 0; i < Constants.NumPingPongBuffers; ++i)
+ {
+ allocator.Free(SegMapArray[i]);
+ SegMapArray[i] = ArrayPtr<byte>.Null;
+ }
+
+ CurrentFrameSegMap = ArrayPtr<byte>.Null;
+ LastFrameSegMap = ArrayPtr<byte>.Null;
+ }
+
+ private void DecAllocMi(MemoryAllocator allocator, int miSize)
+ {
+ Mip = allocator.Allocate<ModeInfo>(miSize);
+ MiGridBase = allocator.Allocate<Ptr<ModeInfo>>(miSize);
+ }
+
+ private void DecFreeMi(MemoryAllocator allocator)
+ {
+ allocator.Free(Mip);
+ Mip = ArrayPtr<ModeInfo>.Null;
+ allocator.Free(MiGridBase);
+ MiGridBase = ArrayPtr<Ptr<ModeInfo>>.Null;
+ }
+
+ public void FreeContextBuffers(MemoryAllocator allocator)
+ {
+ DecFreeMi(allocator);
+ FreeSegMap(allocator);
+ allocator.Free(AboveContext);
+ AboveContext = ArrayPtr<sbyte>.Null;
+ allocator.Free(AboveSegContext);
+ AboveSegContext = ArrayPtr<sbyte>.Null;
+ allocator.Free(Lf.Lfm);
+ Lf.Lfm = ArrayPtr<LoopFilterMask>.Null;
+ allocator.Free(CurFrameMvs);
+ CurFrameMvs = ArrayPtr<MvRef>.Null;
+ if (UsePrevFrameMvs)
+ {
+ allocator.Free(PrevFrameMvs);
+ PrevFrameMvs = ArrayPtr<MvRef>.Null;
+ }
+ }
+
+ private void AllocLoopFilter(MemoryAllocator allocator)
+ {
+ // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
+ // stride and rows are rounded up / truncated to a multiple of 8.
+ Lf.LfmStride = (MiCols + (Constants.MiBlockSize - 1)) >> 3;
+ Lf.Lfm = allocator.Allocate<LoopFilterMask>(((MiRows + (Constants.MiBlockSize - 1)) >> 3) * Lf.LfmStride);
+ }
+
+ public void AllocContextBuffers(MemoryAllocator allocator, int width, int height)
+ {
+ SetMbMi(width, height);
+ int newMiSize = MiStride * CalcMiSize(MiRows);
+ if (newMiSize != 0)
+ {
+ DecAllocMi(allocator, newMiSize);
+ }
+
+ if (MiRows * MiCols != 0)
+ {
+ // Create the segmentation map structure and set to 0.
+ AllocSegMap(allocator, MiRows * MiCols);
+ }
+
+ if (MiCols != 0)
+ {
+ AboveContext = allocator.Allocate<sbyte>(2 * TileInfo.MiColsAlignedToSb(MiCols) * Constants.MaxMbPlane);
+ AboveSegContext = allocator.Allocate<sbyte>(TileInfo.MiColsAlignedToSb(MiCols));
+ }
+
+ AllocLoopFilter(allocator);
+
+ CurFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
+ // Using the same size as the current frame is fine here,
+ // as this is never true when we have a resolution change.
+ if (UsePrevFrameMvs)
+ {
+ PrevFrameMvs = allocator.Allocate<MvRef>(MiRows * MiCols);
+ }
+ }
+
+ private unsafe void DecSetupMi()
+ {
+ Mi = Mip.Slice(MiStride + 1);
+ MiGridVisible = MiGridBase.Slice(MiStride + 1);
+ MemoryUtil.Fill(MiGridBase.ToPointer(), Ptr<ModeInfo>.Null, MiStride * (MiRows + 1));
+ }
+
+ public unsafe void InitContextBuffers()
+ {
+ DecSetupMi();
+ if (!LastFrameSegMap.IsNull)
+ {
+ MemoryUtil.Fill(LastFrameSegMap.ToPointer(), (byte)0, MiRows * MiCols);
+ }
+ }
+
+ private void SetPartitionProbs(ref MacroBlockD xd)
+ {
+ xd.PartitionProbs = FrameIsIntraOnly()
+ ? new ArrayPtr<Array3<byte>>(ref Fc.Value.KfPartitionProb[0], 16)
+ : new ArrayPtr<Array3<byte>>(ref Fc.Value.PartitionProb[0], 16);
+ }
+
+ internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr<int> dqcoeff)
+ {
+ int i;
+
+ for (i = 0; i < Constants.MaxMbPlane; ++i)
+ {
+ xd.Plane[i].DqCoeff = dqcoeff;
+ xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols));
+
+ if (i == 0)
+ {
+ MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant);
+ }
+ else
+ {
+ MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant);
+ }
+ xd.Fc = new Ptr<Vp9EntropyProbs>(ref Fc.Value);
+ }
+
+ xd.AboveSegContext = AboveSegContext;
+ xd.MiStride = MiStride;
+ xd.ErrorInfo = new Ptr<InternalErrorInfo>(ref Error);
+
+ SetPartitionProbs(ref xd);
+ }
+
+ public void SetupSegmentationDequant()
+ {
+ const BitDepth bitDepth = BitDepth.Bits8; // TODO: Configurable
+ // Build y/uv dequant values based on segmentation.
+ if (Seg.Enabled)
+ {
+ int i;
+ for (i = 0; i < Constants.MaxSegments; ++i)
+ {
+ int qIndex = QuantCommon.GetQIndex(ref Seg, i, BaseQindex);
+ YDequant[i][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
+ YDequant[i][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
+ UvDequant[i][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
+ UvDequant[i][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
+ }
+ }
+ else
+ {
+ int qIndex = BaseQindex;
+ // When segmentation is disabled, only the first value is used. The
+ // remaining are don't cares.
+ YDequant[0][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth);
+ YDequant[0][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth);
+ UvDequant[0][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth);
+ UvDequant[0][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth);
+ }
+ }
+
+ public void SetupScaleFactors()
+ {
+ for (int i = 0; i < Constants.RefsPerFrame; ++i)
+ {
+ ref RefBuffer refBuf = ref FrameRefs[i];
+ refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height);
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs b/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs
deleted file mode 100644
index c54a95f9..00000000
--- a/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs
+++ /dev/null
@@ -1,103 +0,0 @@
-using Ryujinx.Graphics.Gpu;
-using Ryujinx.Graphics.VDec;
-using Ryujinx.Graphics.Vic;
-using System.Collections.Generic;
-
-namespace Ryujinx.Graphics
-{
- public class CdmaProcessor
- {
- private const int MethSetMethod = 0x10;
- private const int MethSetData = 0x11;
-
- private readonly VideoDecoder _videoDecoder;
- private readonly VideoImageComposer _videoImageComposer;
-
- public CdmaProcessor()
- {
- _videoDecoder = new VideoDecoder();
- _videoImageComposer = new VideoImageComposer(_videoDecoder);
- }
-
- public void PushCommands(GpuContext gpu, int[] cmdBuffer)
- {
- List<ChCommand> commands = new List<ChCommand>();
-
- ChClassId currentClass = 0;
-
- for (int index = 0; index < cmdBuffer.Length; index++)
- {
- int cmd = cmdBuffer[index];
-
- int value = (cmd >> 0) & 0xffff;
- int methodOffset = (cmd >> 16) & 0xfff;
-
- ChSubmissionMode submissionMode = (ChSubmissionMode)((cmd >> 28) & 0xf);
-
- switch (submissionMode)
- {
- case ChSubmissionMode.SetClass: currentClass = (ChClassId)(value >> 6); break;
-
- case ChSubmissionMode.Incrementing:
- {
- int count = value;
-
- for (int argIdx = 0; argIdx < count; argIdx++)
- {
- int argument = cmdBuffer[++index];
-
- commands.Add(new ChCommand(currentClass, methodOffset + argIdx, argument));
- }
-
- break;
- }
-
- case ChSubmissionMode.NonIncrementing:
- {
- int count = value;
-
- int[] arguments = new int[count];
-
- for (int argIdx = 0; argIdx < count; argIdx++)
- {
- arguments[argIdx] = cmdBuffer[++index];
- }
-
- commands.Add(new ChCommand(currentClass, methodOffset, arguments));
-
- break;
- }
- }
- }
-
- ProcessCommands(gpu, commands.ToArray());
- }
-
- private void ProcessCommands(GpuContext gpu, ChCommand[] commands)
- {
- int methodOffset = 0;
-
- foreach (ChCommand command in commands)
- {
- switch (command.MethodOffset)
- {
- case MethSetMethod: methodOffset = command.Arguments[0]; break;
-
- case MethSetData:
- {
- if (command.ClassId == ChClassId.NvDec)
- {
- _videoDecoder.Process(gpu, methodOffset, command.Arguments);
- }
- else if (command.ClassId == ChClassId.GraphicsVic)
- {
- _videoImageComposer.Process(gpu, methodOffset, command.Arguments);
- }
-
- break;
- }
- }
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/ChClassId.cs b/Ryujinx.Graphics.Nvdec/ChClassId.cs
deleted file mode 100644
index 115f0b89..00000000
--- a/Ryujinx.Graphics.Nvdec/ChClassId.cs
+++ /dev/null
@@ -1,20 +0,0 @@
-namespace Ryujinx.Graphics
-{
- enum ChClassId
- {
- Host1X = 0x1,
- VideoEncodeMpeg = 0x20,
- VideoEncodeNvEnc = 0x21,
- VideoStreamingVi = 0x30,
- VideoStreamingIsp = 0x32,
- VideoStreamingIspB = 0x34,
- VideoStreamingViI2c = 0x36,
- GraphicsVic = 0x5d,
- Graphics3D = 0x60,
- GraphicsGpu = 0x61,
- Tsec = 0xe0,
- TsecB = 0xe1,
- NvJpg = 0xc0,
- NvDec = 0xf0
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs b/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs
deleted file mode 100644
index b01b77ed..00000000
--- a/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-namespace Ryujinx.Graphics
-{
- struct ChCommand
- {
- public ChClassId ClassId { get; private set; }
-
- public int MethodOffset { get; private set; }
-
- public int[] Arguments { get; private set; }
-
- public ChCommand(ChClassId classId, int methodOffset, params int[] arguments)
- {
- ClassId = classId;
- MethodOffset = methodOffset;
- Arguments = arguments;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs b/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs
deleted file mode 100644
index 5c653019..00000000
--- a/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-namespace Ryujinx.Graphics
-{
- enum ChSubmissionMode
- {
- SetClass = 0,
- Incrementing = 1,
- NonIncrementing = 2,
- Mask = 3,
- Immediate = 4,
- Restart = 5,
- Gather = 6
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/CodecId.cs b/Ryujinx.Graphics.Nvdec/CodecId.cs
new file mode 100644
index 00000000..9aaa3d02
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/CodecId.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Nvdec
+{
+ public enum CodecId
+ {
+ Mpeg = 1,
+ Vc1 = 2,
+ H264 = 3,
+ Mpeg4 = 4,
+ Vp8 = 5,
+ Hevc = 7,
+ Vp9 = 9
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs b/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs
new file mode 100644
index 00000000..f5074f48
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Nvdec
+{
+ public struct FrameDecodedEventArgs
+ {
+ public CodecId CodecId { get; }
+ public uint LumaOffset { get; }
+ public uint ChromaOffset { get; }
+
+ internal FrameDecodedEventArgs(CodecId codecId, uint lumaOffset, uint chromaOffset)
+ {
+ CodecId = codecId;
+ LumaOffset = lumaOffset;
+ ChromaOffset = chromaOffset;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/H264Decoder.cs
new file mode 100644
index 00000000..57ce12d0
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/H264Decoder.cs
@@ -0,0 +1,43 @@
+using Ryujinx.Graphics.Nvdec.H264;
+using Ryujinx.Graphics.Nvdec.Image;
+using Ryujinx.Graphics.Nvdec.Types.H264;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class H264Decoder
+ {
+ private const int MbSizeInPixels = 16;
+
+ private static readonly Decoder _decoder = new Decoder();
+
+ public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state)
+ {
+ PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetPictureInfoOffset);
+ H264PictureInfo info = pictureInfo.Convert();
+
+ ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetBitstreamOffset, (int)pictureInfo.BitstreamSize);
+
+ int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels;
+ int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels;
+
+ ISurface outputSurface = rm.Cache.Get(_decoder, CodecId.H264, 0, 0, width, height);
+
+ if (_decoder.Decode(ref info, outputSurface, bitstream))
+ {
+ int li = (int)pictureInfo.LumaOutputSurfaceIndex;
+ int ci = (int)pictureInfo.ChromaOutputSurfaceIndex;
+
+ uint lumaOffset = state.SetSurfaceLumaOffset[li];
+ uint chromaOffset = state.SetSurfaceChromaOffset[ci];
+
+ SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset);
+
+ device.OnFrameDecoded(CodecId.H264, lumaOffset, chromaOffset);
+ }
+
+ rm.Cache.Put(outputSurface);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs
new file mode 100644
index 00000000..c362185f
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs
@@ -0,0 +1,151 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ class SurfaceCache
+ {
+ // Must be equal to at least the maximum number of surfaces
+ // that can be in use simultaneously (which is 17, since H264
+ // can have up to 16 reference frames, and we need another one
+ // for the current frame).
+ // Realistically, most codecs won't ever use more than 4 simultaneously.
+ private const int MaxItems = 17;
+
+ private struct CacheItem
+ {
+ public int ReferenceCount;
+ public uint LumaOffset;
+ public uint ChromaOffset;
+ public int Width;
+ public int Height;
+ public CodecId CodecId;
+ public ISurface Surface;
+ }
+
+ private readonly CacheItem[] _pool = new CacheItem[MaxItems];
+
+ private readonly MemoryManager _gmm;
+
+ public SurfaceCache(MemoryManager gmm)
+ {
+ _gmm = gmm;
+ }
+
+ public ISurface Get(IDecoder decoder, CodecId codecId, uint lumaOffset, uint chromaOffset, int width, int height)
+ {
+ ISurface surface = null;
+
+ // Try to find a compatible surface with same parameters, and same offsets.
+ for (int i = 0; i < MaxItems; i++)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.LumaOffset == lumaOffset &&
+ item.ChromaOffset == chromaOffset &&
+ item.CodecId == codecId &&
+ item.Width == width &&
+ item.Height == height)
+ {
+ item.ReferenceCount++;
+ surface = item.Surface;
+ MoveToFront(i);
+ break;
+ }
+ }
+
+ // If we failed to find a perfect match, now ignore the offsets.
+ // Search backwards to replace the oldest compatible surface,
+ // this avoids thrashing frquently used surfaces.
+ // Now we need to ensure that the surface is not in use, as we'll change the data.
+ if (surface == null)
+ {
+ for (int i = MaxItems - 1; i >= 0; i--)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.ReferenceCount == 0 && item.CodecId == codecId && item.Width == width && item.Height == height)
+ {
+ item.ReferenceCount = 1;
+ item.LumaOffset = lumaOffset;
+ item.ChromaOffset = chromaOffset;
+ surface = item.Surface;
+
+ if ((lumaOffset | chromaOffset) != 0)
+ {
+ SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset);
+ }
+
+ MoveToFront(i);
+ break;
+ }
+ }
+ }
+
+ // If everything else failed, we try to create a new surface,
+ // and insert it on the pool. We replace the oldest item on the
+ // pool to avoid thrashing frequently used surfaces.
+ // If even the oldest item is in use, that means that the entire pool
+ // is in use, in that case we throw as there's no place to insert
+ // the new surface.
+ if (surface == null)
+ {
+ if (_pool[MaxItems - 1].ReferenceCount == 0)
+ {
+ surface = decoder.CreateSurface(width, height);
+
+ if ((lumaOffset | chromaOffset) != 0)
+ {
+ SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset);
+ }
+
+ MoveToFront(MaxItems - 1);
+ ref CacheItem item = ref _pool[0];
+ item.Surface?.Dispose();
+ item.ReferenceCount = 1;
+ item.LumaOffset = lumaOffset;
+ item.ChromaOffset = chromaOffset;
+ item.Width = width;
+ item.Height = height;
+ item.CodecId = codecId;
+ item.Surface = surface;
+ }
+ else
+ {
+ throw new InvalidOperationException("No free slot on the surface pool.");
+ }
+ }
+
+ return surface;
+ }
+
+ public void Put(ISurface surface)
+ {
+ for (int i = 0; i < MaxItems; i++)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.Surface == surface)
+ {
+ item.ReferenceCount--;
+ Debug.Assert(item.ReferenceCount >= 0);
+ break;
+ }
+ }
+ }
+
+ private void MoveToFront(int index)
+ {
+ // If index is 0 we don't need to do anything,
+ // as it's already on the front.
+ if (index != 0)
+ {
+ CacheItem temp = _pool[index];
+ Array.Copy(_pool, 0, _pool, 1, index);
+ _pool[0] = temp;
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs
new file mode 100644
index 00000000..6087f5b1
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs
@@ -0,0 +1,26 @@
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceCommon
+ {
+ public static int GetBlockLinearSize(int width, int height, int bytesPerPixel)
+ {
+ return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, 2, 1, 1).TotalSize;
+ }
+
+ public static void Copy(ISurface src, ISurface dst)
+ {
+ src.YPlane.AsSpan().CopyTo(dst.YPlane.AsSpan());
+ src.UPlane.AsSpan().CopyTo(dst.UPlane.AsSpan());
+ src.VPlane.AsSpan().CopyTo(dst.VPlane.AsSpan());
+ }
+
+ public unsafe static Span<byte> AsSpan(this Plane plane)
+ {
+ return new Span<byte>((void*)plane.Pointer, plane.Length);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs
new file mode 100644
index 00000000..a8199932
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs
@@ -0,0 +1,133 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceReader
+ {
+ public static void Read(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset)
+ {
+ int width = surface.Width;
+ int height = surface.Height;
+ int stride = surface.Stride;
+
+ ReadOnlySpan<byte> luma = gmm.DeviceGetSpan(lumaOffset, GetBlockLinearSize(width, height, 1));
+
+ ReadLuma(surface.YPlane.AsSpan(), luma, stride, width, height);
+
+ int uvWidth = surface.UvWidth;
+ int uvHeight = surface.UvHeight;
+ int uvStride = surface.UvStride;
+
+ ReadOnlySpan<byte> chroma = gmm.DeviceGetSpan(chromaOffset, GetBlockLinearSize(uvWidth, uvHeight, 2));
+
+ ReadChroma(surface.UPlane.AsSpan(), surface.VPlane.AsSpan(), chroma, uvStride, uvWidth, uvHeight);
+ }
+
+ private static void ReadLuma(Span<byte> dst, ReadOnlySpan<byte> src, int dstStride, int width, int height)
+ {
+ LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, 1, 2, src);
+ }
+
+ private unsafe static void ReadChroma(
+ Span<byte> dstU,
+ Span<byte> dstV,
+ ReadOnlySpan<byte> src,
+ int dstStride,
+ int width,
+ int height)
+ {
+ OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);
+
+ if (Sse2.IsSupported)
+ {
+ int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);
+
+ int outStrideGap = dstStride - width;
+
+ fixed (byte* dstUPtr = dstU, dstVPtr = dstV, dataPtr = src)
+ {
+ byte* uPtr = dstUPtr;
+ byte* vPtr = dstVPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ calc.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32)
+ {
+ byte* offset = dataPtr + calc.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)offset;
+ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
+ Vector128<byte> value3 = *(Vector128<byte>*)offset3;
+ Vector128<byte> value4 = *(Vector128<byte>*)offset4;
+
+ Vector128<byte> u00 = Sse2.UnpackLow(value, value2);
+ Vector128<byte> v00 = Sse2.UnpackHigh(value, value2);
+ Vector128<byte> u01 = Sse2.UnpackLow(value3, value4);
+ Vector128<byte> v01 = Sse2.UnpackHigh(value3, value4);
+
+ Vector128<byte> u10 = Sse2.UnpackLow(u00, v00);
+ Vector128<byte> v10 = Sse2.UnpackHigh(u00, v00);
+ Vector128<byte> u11 = Sse2.UnpackLow(u01, v01);
+ Vector128<byte> v11 = Sse2.UnpackHigh(u01, v01);
+
+ Vector128<byte> u20 = Sse2.UnpackLow(u10, v10);
+ Vector128<byte> v20 = Sse2.UnpackHigh(u10, v10);
+ Vector128<byte> u21 = Sse2.UnpackLow(u11, v11);
+ Vector128<byte> v21 = Sse2.UnpackHigh(u11, v11);
+
+ Vector128<byte> u30 = Sse2.UnpackLow(u20, v20);
+ Vector128<byte> v30 = Sse2.UnpackHigh(u20, v20);
+ Vector128<byte> u31 = Sse2.UnpackLow(u21, v21);
+ Vector128<byte> v31 = Sse2.UnpackHigh(u21, v21);
+
+ *(Vector128<byte>*)uPtr = u30;
+ *(Vector128<byte>*)(uPtr + 16) = u31;
+ *(Vector128<byte>*)vPtr = v30;
+ *(Vector128<byte>*)(vPtr + 16) = v31;
+ }
+
+ for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++)
+ {
+ byte* offset = dataPtr + calc.GetOffset(x);
+
+ *uPtr = *offset;
+ *vPtr = *(offset + 1);
+ }
+
+ uPtr += outStrideGap;
+ vPtr += outStrideGap;
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int dstBaseOffset = y * dstStride;
+
+ calc.SetY(y);
+
+ for (int x = 0; x < width; x++)
+ {
+ int srcOffset = calc.GetOffset(x);
+
+ dstU[dstBaseOffset + x] = src[srcOffset];
+ dstV[dstBaseOffset + x] = src[srcOffset + 1];
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs
new file mode 100644
index 00000000..5c294621
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs
@@ -0,0 +1,126 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon;
+using static Ryujinx.Graphics.Nvdec.MemoryExtensions;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceWriter
+ {
+ public static void Write(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset)
+ {
+ int lumaSize = GetBlockLinearSize(surface.Width, surface.Height, 1);
+
+ using var luma = gmm.GetWritableRegion(ExtendOffset(lumaOffset), lumaSize);
+
+ WriteLuma(
+ luma.Memory.Span,
+ surface.YPlane.AsSpan(),
+ surface.Stride,
+ surface.Width,
+ surface.Height);
+
+ int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight, 2);
+
+ using var chroma = gmm.GetWritableRegion(ExtendOffset(chromaOffset), chromaSize);
+
+ WriteChroma(
+ chroma.Memory.Span,
+ surface.UPlane.AsSpan(),
+ surface.VPlane.AsSpan(),
+ surface.UvStride,
+ surface.UvWidth,
+ surface.UvHeight);
+ }
+
+ private static void WriteLuma(Span<byte> dst, ReadOnlySpan<byte> src, int srcStride, int width, int height)
+ {
+ LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src);
+ }
+
+ private unsafe static void WriteChroma(
+ Span<byte> dst,
+ ReadOnlySpan<byte> srcU,
+ ReadOnlySpan<byte> srcV,
+ int srcStride,
+ int width,
+ int height)
+ {
+ OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);
+
+ if (Sse2.IsSupported)
+ {
+ int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);
+
+ int inStrideGap = srcStride - width;
+
+ fixed (byte* outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV)
+ {
+ byte* inUPtr = srcUPtr;
+ byte* inVPtr = srcVPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ calc.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32)
+ {
+ byte* offset = outputPtr + calc.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)inUPtr;
+ Vector128<byte> value2 = *(Vector128<byte>*)inVPtr;
+ Vector128<byte> value3 = *(Vector128<byte>*)(inUPtr + 16);
+ Vector128<byte> value4 = *(Vector128<byte>*)(inVPtr + 16);
+
+ Vector128<byte> uv0 = Sse2.UnpackLow(value, value2);
+ Vector128<byte> uv1 = Sse2.UnpackHigh(value, value2);
+ Vector128<byte> uv2 = Sse2.UnpackLow(value3, value4);
+ Vector128<byte> uv3 = Sse2.UnpackHigh(value3, value4);
+
+ *(Vector128<byte>*)offset = uv0;
+ *(Vector128<byte>*)offset2 = uv1;
+ *(Vector128<byte>*)offset3 = uv2;
+ *(Vector128<byte>*)offset4 = uv3;
+ }
+
+ for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++)
+ {
+ byte* offset = outputPtr + calc.GetOffset(x);
+
+ *offset = *inUPtr;
+ *(offset + 1) = *inVPtr;
+ }
+
+ inUPtr += inStrideGap;
+ inVPtr += inStrideGap;
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int srcBaseOffset = y * srcStride;
+
+ calc.SetY(y);
+
+ for (int x = 0; x < width; x++)
+ {
+ int dstOffset = calc.GetOffset(x);
+
+ dst[dstOffset + 0] = srcU[srcBaseOffset + x];
+ dst[dstOffset + 1] = srcV[srcBaseOffset + x];
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs b/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs
new file mode 100644
index 00000000..2855a8c7
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs
@@ -0,0 +1,28 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class MemoryExtensions
+ {
+ public static T DeviceRead<T>(this MemoryManager gmm, uint offset) where T : unmanaged
+ {
+ return gmm.Read<T>((ulong)offset << 8);
+ }
+
+ public static ReadOnlySpan<byte> DeviceGetSpan(this MemoryManager gmm, uint offset, int size)
+ {
+ return gmm.GetSpan((ulong)offset << 8, size);
+ }
+
+ public static void DeviceWrite(this MemoryManager gmm, uint offset, ReadOnlySpan<byte> data)
+ {
+ gmm.Write((ulong)offset << 8, data);
+ }
+
+ public static ulong ExtendOffset(uint offset)
+ {
+ return (ulong)offset << 8;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs
new file mode 100644
index 00000000..cc22cb2a
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs
@@ -0,0 +1,55 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ public class NvdecDevice : IDeviceState
+ {
+ private readonly ResourceManager _rm;
+ private readonly DeviceState<NvdecRegisters> _state;
+
+ public event Action<FrameDecodedEventArgs> FrameDecoded;
+
+ public NvdecDevice(MemoryManager gmm)
+ {
+ _rm = new ResourceManager(gmm, new SurfaceCache(gmm));
+ _state = new DeviceState<NvdecRegisters>(new Dictionary<string, RwCallback>
+ {
+ { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) }
+ });
+ }
+
+ public int Read(int offset) => _state.Read(offset);
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ private void Execute(int data)
+ {
+ Decode((CodecId)_state.State.SetCodecID);
+ }
+
+ private void Decode(CodecId codecId)
+ {
+ switch (codecId)
+ {
+ case CodecId.H264:
+ H264Decoder.Decode(this, _rm, ref _state.State);
+ break;
+ case CodecId.Vp9:
+ Vp9Decoder.Decode(this, _rm, ref _state.State);
+ break;
+ default:
+ Logger.PrintError(LogClass.Nvdec, $"Unsupported codec \"{codecId}\".");
+ break;
+ }
+ }
+
+ internal void OnFrameDecoded(CodecId codecId, uint lumaOffset, uint chromaOffset)
+ {
+ FrameDecoded?.Invoke(new FrameDecodedEventArgs(codecId, lumaOffset, chromaOffset));
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs b/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs
new file mode 100644
index 00000000..b40e08b0
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs
@@ -0,0 +1,41 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ // Note: Most of those names are not official.
+ unsafe struct NvdecRegisters
+ {
+ public fixed uint Reserved0[128];
+ public uint SetCodecID;
+ public fixed uint Reserved204[63];
+ public uint Execute;
+ public fixed uint Reserved304[63];
+ public uint SetPlatformID;
+ public uint SetPictureInfoOffset;
+ public uint SetBitstreamOffset;
+ public uint SetFrameNumber;
+ public uint SetH264SliceDataOffsetsOffset; // Also used by VC1
+ public uint SetH264MvDumpOffset; // Also used by VC1
+ public uint Unknown418; // Used by VC1
+ public uint Unknown41C;
+ public uint Unknown420; // Used by VC1
+ public uint SetFrameStatsOffset;
+ public uint SetH264LastSurfaceLumaOffset;
+ public uint SetH264LastSurfaceChromaOffset;
+ public Array17<uint> SetSurfaceLumaOffset;
+ public Array17<uint> SetSurfaceChromaOffset;
+ public uint Unknown4B8;
+ public uint Unknown4BC;
+ public uint SetCryptoData0Offset;
+ public uint SetCryptoData1Offset;
+ public Array62<uint> Unknown4C8;
+ public uint SetVp9EntropyProbsOffset;
+ public uint SetVp9BackwardUpdatesOffset;
+ public uint SetVp9LastFrameSegMapOffset;
+ public uint SetVp9CurrFrameSegMapOffset;
+ public uint Unknown5D0;
+ public uint SetVp9LastFrameMvsOffset;
+ public uint SetVp9CurrFrameMvsOffset;
+ public uint Unknown5DC;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/ResourceManager.cs b/Ryujinx.Graphics.Nvdec/ResourceManager.cs
new file mode 100644
index 00000000..6e0d9ab2
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/ResourceManager.cs
@@ -0,0 +1,17 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ struct ResourceManager
+ {
+ public MemoryManager Gmm { get; }
+ public SurfaceCache Cache { get; }
+
+ public ResourceManager(MemoryManager gmm, SurfaceCache cache)
+ {
+ Gmm = gmm;
+ Cache = cache;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj
index ddc3a8af..3561cf80 100644
--- a/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj
+++ b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
@@ -15,11 +15,13 @@
</PropertyGroup>
<ItemGroup>
- <PackageReference Include="FFmpeg.AutoGen" Version="4.2.2.1" />
- </ItemGroup>
-
- <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.H264\Ryujinx.Graphics.Nvdec.H264.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>
</Project>
diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs
new file mode 100644
index 00000000..92767e35
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs
@@ -0,0 +1,120 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.H264
+{
+ struct PictureInfo
+ {
+ Array18<uint> Unknown0;
+ public uint BitstreamSize;
+ public uint NumSlices;
+ public uint Unknown50;
+ public uint Unknown54;
+ public uint Log2MaxPicOrderCntLsbMinus4;
+ public uint DeltaPicOrderAlwaysZeroFlag;
+ public uint FrameMbsOnlyFlag;
+ public uint PicWidthInMbs;
+ public uint PicHeightInMbs;
+ public uint BlockLayout; // Not supported on T210
+ public uint EntropyCodingModeFlag;
+ public uint PicOrderPresentFlag;
+ public uint NumRefIdxL0ActiveMinus1;
+ public uint NumRefIdxL1ActiveMinus1;
+ public uint DeblockingFilterControlPresentFlag;
+ public uint RedundantPicCntPresentFlag;
+ public uint Transform8x8ModeFlag;
+ public uint LumaPitch;
+ public uint ChromaPitch;
+ public uint Unknown94;
+ public uint LumaSecondFieldOffset;
+ public uint Unknown9C;
+ public uint UnknownA0;
+ public uint ChromaSecondFieldOffset;
+ public uint UnknownA8;
+ public uint UnknownAC;
+ public ulong Flags;
+ public Array2<int> FieldOrderCnt;
+ public Array16<ReferenceFrame> RefFrames;
+ public Array6<Array16<byte>> ScalingLists4x4;
+ public Array2<Array64<byte>> ScalingLists8x8;
+ public byte MvcextNumInterViewRefsL0;
+ public byte MvcextNumInterViewRefsL1;
+ public ushort Padding2A2;
+ public uint Unknown2A4;
+ public uint Unknown2A8;
+ public uint Unknown2AC;
+ public Array16<byte> MvcextViewRefMasksL0;
+ public Array16<byte> MvcextViewRefMasksL1;
+ public uint Flags2;
+ public Array10<uint> Unknown2D4;
+
+ public bool MbAdaptiveFrameFieldFlag => (Flags & (1 << 0)) != 0;
+ public bool Direct8x8InferenceFlag => (Flags & (1 << 1)) != 0;
+ public bool WeightedPredFlag => (Flags & (1 << 2)) != 0;
+ public bool ConstrainedIntraPredFlag => (Flags & (1 << 3)) != 0;
+ public bool IsReference => (Flags & (1 << 4)) != 0;
+ public bool FieldPicFlag => (Flags & (1 << 5)) != 0;
+ public bool BottomFieldFlag => (Flags & (1 << 6)) != 0;
+ public uint Log2MaxFrameNumMinus4 => (uint)(Flags >> 8) & 0xf;
+ public ushort ChromaFormatIdc => (ushort)((Flags >> 12) & 3);
+ public uint PicOrderCntType => (uint)(Flags >> 14) & 3;
+ public int PicInitQpMinus26 => ExtractSx(Flags, 16, 6);
+ public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5);
+ public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5);
+ public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3;
+ public uint LumaOutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f;
+ public uint ChromaOutputSurfaceIndex => (uint)(Flags >> 41) & 0x1f;
+ public ushort FrameNum => (ushort)(Flags >> 46);
+ public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0;
+
+ private static int ExtractSx(ulong packed, int lsb, int length)
+ {
+ return (int)((long)packed << (64 - (lsb + length)) >> (64 - length));
+ }
+
+ public H264PictureInfo Convert()
+ {
+ return new H264PictureInfo()
+ {
+ FieldOrderCnt = FieldOrderCnt,
+ IsReference = IsReference,
+ ChromaFormatIdc = ChromaFormatIdc,
+ FrameNum = FrameNum,
+ FieldPicFlag = FieldPicFlag,
+ BottomFieldFlag = BottomFieldFlag,
+ NumRefFrames = 0,
+ MbAdaptiveFrameFieldFlag = MbAdaptiveFrameFieldFlag,
+ ConstrainedIntraPredFlag = ConstrainedIntraPredFlag,
+ WeightedPredFlag = WeightedPredFlag,
+ WeightedBipredIdc = WeightedBipredIdc,
+ FrameMbsOnlyFlag = FrameMbsOnlyFlag != 0,
+ Transform8x8ModeFlag = Transform8x8ModeFlag != 0,
+ ChromaQpIndexOffset = ChromaQpIndexOffset,
+ SecondChromaQpIndexOffset = SecondChromaQpIndexOffset,
+ PicInitQpMinus26 = PicInitQpMinus26,
+ NumRefIdxL0ActiveMinus1 = NumRefIdxL0ActiveMinus1,
+ NumRefIdxL1ActiveMinus1 = NumRefIdxL1ActiveMinus1,
+ Log2MaxFrameNumMinus4 = Log2MaxFrameNumMinus4,
+ PicOrderCntType = PicOrderCntType,
+ Log2MaxPicOrderCntLsbMinus4 = Log2MaxPicOrderCntLsbMinus4,
+ DeltaPicOrderAlwaysZeroFlag = DeltaPicOrderAlwaysZeroFlag != 0,
+ Direct8x8InferenceFlag = Direct8x8InferenceFlag,
+ EntropyCodingModeFlag = EntropyCodingModeFlag != 0,
+ PicOrderPresentFlag = PicOrderPresentFlag != 0,
+ DeblockingFilterControlPresentFlag = DeblockingFilterControlPresentFlag != 0,
+ RedundantPicCntPresentFlag = RedundantPicCntPresentFlag != 0,
+ NumSliceGroupsMinus1 = 0,
+ SliceGroupMapType = 0,
+ SliceGroupChangeRateMinus1 = 0,
+ FmoAsoEnable = false,
+ ScalingMatrixPresent = true,
+ ScalingLists4x4 = ScalingLists4x4,
+ ScalingLists8x8 = ScalingLists8x8,
+ FrameType = 0,
+ PicWidthInMbsMinus1 = PicWidthInMbs - 1,
+ PicHeightInMapUnitsMinus1 = (PicHeightInMbs >> (FrameMbsOnlyFlag != 0 ? 0 : 1)) - 1,
+ QpprimeYZeroTransformBypassFlag = QpprimeYZeroTransformBypassFlag
+ };
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs
new file mode 100644
index 00000000..5db311ae
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Nvdec.Types.H264
+{
+ struct ReferenceFrame
+ {
+ public uint Unknown0;
+ public uint Unknown4;
+ public uint Unknown8;
+ public uint UnknownC;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs
new file mode 100644
index 00000000..661e6cdd
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs
@@ -0,0 +1,72 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct BackwardUpdates
+ {
+ public Array7<Array3<Array2<uint>>> InterModeCounts;
+ public Array4<Array10<uint>> YModeCounts;
+ public Array10<Array10<uint>> UvModeCounts;
+ public Array16<Array4<uint>> PartitionCounts;
+ public Array4<Array3<uint>> SwitchableInterpsCount;
+ public Array4<Array2<uint>> IntraInterCount;
+ public Array5<Array2<uint>> CompInterCount;
+ public Array5<Array2<Array2<uint>>> SingleRefCount;
+ public Array5<Array2<uint>> CompRefCount;
+ public Array2<Array4<uint>> Tx32x32;
+ public Array2<Array3<uint>> Tx16x16;
+ public Array2<Array2<uint>> Tx8x8;
+ public Array3<Array2<uint>> MbSkipCount;
+ public Array4<uint> Joints;
+ public Array2<Array2<uint>> Sign;
+ public Array2<Array11<uint>> Classes;
+ public Array2<Array2<uint>> Class0;
+ public Array2<Array10<Array2<uint>>> Bits;
+ public Array2<Array2<Array4<uint>>> Class0Fp;
+ public Array2<Array4<uint>> Fp;
+ public Array2<Array2<uint>> Class0Hp;
+ public Array2<Array2<uint>> Hp;
+ public Array4<Array2<Array2<Array6<Array6<Array4<uint>>>>>> CoefCounts;
+ public Array4<Array2<Array2<Array6<Array6<uint>>>>> EobCounts;
+
+ public BackwardUpdates(ref Vp9BackwardUpdates counts)
+ {
+ InterModeCounts = new Array7<Array3<Array2<uint>>>();
+
+ for (int i = 0; i < 7; i++)
+ {
+ InterModeCounts[i][0][0] = counts.InterMode[i][2];
+ InterModeCounts[i][0][1] = counts.InterMode[i][0] + counts.InterMode[i][1] + counts.InterMode[i][3];
+ InterModeCounts[i][1][0] = counts.InterMode[i][0];
+ InterModeCounts[i][1][1] = counts.InterMode[i][1] + counts.InterMode[i][3];
+ InterModeCounts[i][2][0] = counts.InterMode[i][1];
+ InterModeCounts[i][2][1] = counts.InterMode[i][3];
+ }
+
+ YModeCounts = counts.YMode;
+ UvModeCounts = counts.UvMode;
+ PartitionCounts = counts.Partition;
+ SwitchableInterpsCount = counts.SwitchableInterp;
+ IntraInterCount = counts.IntraInter;
+ CompInterCount = counts.CompInter;
+ SingleRefCount = counts.SingleRef;
+ CompRefCount = counts.CompRef;
+ Tx32x32 = counts.Tx32x32;
+ Tx16x16 = counts.Tx16x16;
+ Tx8x8 = counts.Tx8x8;
+ MbSkipCount = counts.Skip;
+ Joints = counts.Joints;
+ Sign = counts.Sign;
+ Classes = counts.Classes;
+ Class0 = counts.Class0;
+ Bits = counts.Bits;
+ Class0Fp = counts.Class0Fp;
+ Fp = counts.Fp;
+ Class0Hp = counts.Class0Hp;
+ Hp = counts.Hp;
+ CoefCounts = counts.Coef;
+ EobCounts = counts.EobBranch;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs
new file mode 100644
index 00000000..bc848454
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs
@@ -0,0 +1,139 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct EntropyProbs
+ {
+ public Array10<Array10<Array8<byte>>> KfYModeProbE0ToE7;
+ public Array10<Array10<byte>> KfYModeProbE8;
+ public Array3<byte> Padding384;
+ public Array7<byte> SegTreeProbs;
+ public Array3<byte> SegPredProbs;
+ public Array15<byte> Padding391;
+ public Array10<Array8<byte>> KfUvModeProbE0ToE7;
+ public Array10<byte> KfUvModeProbE8;
+ public Array6<byte> Padding3FA;
+ public Array7<Array4<byte>> InterModeProb;
+ public Array4<byte> IntraInterProb;
+ public Array10<Array8<byte>> UvModeProbE0ToE7;
+ public Array2<Array1<byte>> Tx8x8Prob;
+ public Array2<Array2<byte>> Tx16x16Prob;
+ public Array2<Array3<byte>> Tx32x32Prob;
+ public Array4<byte> YModeProbE8;
+ public Array4<Array8<byte>> YModeProbE0ToE7;
+ public Array16<Array4<byte>> KfPartitionProb;
+ public Array16<Array4<byte>> PartitionProb;
+ public Array10<byte> UvModeProbE8;
+ public Array4<Array2<byte>> SwitchableInterpProb;
+ public Array5<byte> CompInterProb;
+ public Array4<byte> SkipProbs;
+ public Array3<byte> Joints;
+ public Array2<byte> Sign;
+ public Array2<Array1<byte>> Class0;
+ public Array2<Array3<byte>> Fp;
+ public Array2<byte> Class0Hp;
+ public Array2<byte> Hp;
+ public Array2<Array10<byte>> Classes;
+ public Array2<Array2<Array3<byte>>> Class0Fp;
+ public Array2<Array10<byte>> Bits;
+ public Array5<Array2<byte>> SingleRefProb;
+ public Array5<byte> CompRefProb;
+ public Array17<byte> Padding58F;
+ public Array4<Array2<Array2<Array6<Array6<Array4<byte>>>>>> CoefProbs;
+
+ public void Convert(ref Vp9EntropyProbs fc)
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ for (int j = 0; j < 10; j++)
+ {
+ for (int k = 0; k < 9; k++)
+ {
+ fc.KfYModeProb[i][j][k] = k < 8 ? KfYModeProbE0ToE7[i][j][k] : KfYModeProbE8[i][j];
+ }
+ }
+ }
+
+ fc.SegTreeProb = SegTreeProbs;
+ fc.SegPredProb = SegPredProbs;
+
+ for (int i = 0; i < 7; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ fc.InterModeProb[i][j] = InterModeProb[i][j];
+ }
+ }
+
+ fc.IntraInterProb = IntraInterProb;
+
+ for (int i = 0; i < 10; i++)
+ {
+ for (int j = 0; j < 9; j++)
+ {
+ fc.KfUvModeProb[i][j] = j < 8 ? KfUvModeProbE0ToE7[i][j] : KfUvModeProbE8[i];
+ fc.UvModeProb[i][j] = j < 8 ? UvModeProbE0ToE7[i][j] : UvModeProbE8[i];
+ }
+ }
+
+ fc.Tx8x8Prob = Tx8x8Prob;
+ fc.Tx16x16Prob = Tx16x16Prob;
+ fc.Tx32x32Prob = Tx32x32Prob;
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 9; j++)
+ {
+ fc.YModeProb[i][j] = j < 8 ? YModeProbE0ToE7[i][j] : YModeProbE8[i];
+ }
+ }
+
+ for (int i = 0; i < 16; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ fc.KfPartitionProb[i][j] = KfPartitionProb[i][j];
+ fc.PartitionProb[i][j] = PartitionProb[i][j];
+ }
+ }
+
+ fc.SwitchableInterpProb = SwitchableInterpProb;
+ fc.CompInterProb = CompInterProb;
+ fc.SkipProb[0] = SkipProbs[0];
+ fc.SkipProb[1] = SkipProbs[1];
+ fc.SkipProb[2] = SkipProbs[2];
+ fc.Joints = Joints;
+ fc.Sign = Sign;
+ fc.Class0 = Class0;
+ fc.Fp = Fp;
+ fc.Class0Hp = Class0Hp;
+ fc.Hp = Hp;
+ fc.Classes = Classes;
+ fc.Class0Fp = Class0Fp;
+ fc.Bits = Bits;
+ fc.SingleRefProb = SingleRefProb;
+ fc.CompRefProb = CompRefProb;
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 2; j++)
+ {
+ for (int k = 0; k < 2; k++)
+ {
+ for (int l = 0; l < 6; l++)
+ {
+ for (int m = 0; m < 6; m++)
+ {
+ for (int n = 0; n < 3; n++)
+ {
+ fc.CoefProbs[i][j][k][l][m][n] = CoefProbs[i][j][k][l][m][n];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs
new file mode 100644
index 00000000..88f1ac20
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ enum FrameFlags : uint
+ {
+ IsKeyFrame = 1 << 0,
+ LastFrameIsKeyFrame = 1 << 1,
+ FrameSizeChanged = 1 << 2,
+ ErrorResilientMode = 1 << 3,
+ LastShowFrame = 1 << 4,
+ IntraOnly = 1 << 5
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs
new file mode 100644
index 00000000..70988b48
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct FrameSize
+ {
+ public ushort Width;
+ public ushort Height;
+ public ushort LumaPitch;
+ public ushort ChromaPitch;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs
new file mode 100644
index 00000000..3a3d4762
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct FrameStats
+ {
+ public uint Unknown0;
+ public uint Unknown4;
+ public uint Pass2CycleCount;
+ public uint ErrorStatus;
+ public uint FrameStatusIntraCnt;
+ public uint FrameStatusInterCnt;
+ public uint FrameStatusSkipCtuCount;
+ public uint FrameStatusFwdMvxCnt;
+ public uint FrameStatusFwdMvyCnt;
+ public uint FrameStatusBwdMvxCnt;
+ public uint FrameStatusBwdMvyCnt;
+ public uint ErrorCtbPos;
+ public uint ErrorSlicePos;
+ public uint Unknown34;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs
new file mode 100644
index 00000000..d8d5ab20
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs
@@ -0,0 +1,11 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct LoopFilter
+ {
+ public byte ModeRefDeltaEnabled;
+ public Array4<sbyte> RefDeltas;
+ public Array2<sbyte> ModeDeltas;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs
new file mode 100644
index 00000000..f1f9e2f1
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs
@@ -0,0 +1,85 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct PictureInfo
+ {
+ public Array12<uint> Unknown0;
+ public uint BitstreamSize;
+ public uint IsEncrypted;
+ public uint Unknown38;
+ public uint Reserved3C;
+ public uint BlockLayout; // Not supported on T210
+ public uint WorkBufferSizeShr8;
+ public FrameSize LastFrameSize;
+ public FrameSize GoldenFrameSize;
+ public FrameSize AltFrameSize;
+ public FrameSize CurrentFrameSize;
+ public FrameFlags Flags;
+ public Array4<sbyte> RefFrameSignBias;
+ public byte FirstLevel;
+ public byte SharpnessLevel;
+ public byte BaseQIndex;
+ public byte YDcDeltaQ;
+ public byte UvAcDeltaQ;
+ public byte UvDcDeltaQ;
+ public byte Lossless;
+ public byte TxMode;
+ public byte AllowHighPrecisionMv;
+ public byte InterpFilter;
+ public byte ReferenceMode;
+ public sbyte CompFixedRef;
+ public Array2<sbyte> CompVarRef;
+ public byte Log2TileCols;
+ public byte Log2TileRows;
+ public Segmentation Seg;
+ public LoopFilter Lf;
+ public byte PaddingEB;
+ public uint WorkBufferSizeShr8New; // Not supported on T210
+ public uint SurfaceParams; // Not supported on T210
+ public uint UnknownF4;
+ public uint UnknownF8;
+ public uint UnknownFC;
+
+ public uint BitDepth => (SurfaceParams >> 1) & 0xf;
+
+ public Vp9PictureInfo Convert()
+ {
+ return new Vp9PictureInfo()
+ {
+ IsKeyFrame = Flags.HasFlag(FrameFlags.IsKeyFrame),
+ IntraOnly = Flags.HasFlag(FrameFlags.IntraOnly),
+ UsePrevInFindMvRefs =
+ !Flags.HasFlag(FrameFlags.ErrorResilientMode) &&
+ !Flags.HasFlag(FrameFlags.FrameSizeChanged) &&
+ !Flags.HasFlag(FrameFlags.IntraOnly) &&
+ Flags.HasFlag(FrameFlags.LastShowFrame) &&
+ !Flags.HasFlag(FrameFlags.LastFrameIsKeyFrame),
+ RefFrameSignBias = RefFrameSignBias,
+ BaseQIndex = BaseQIndex,
+ YDcDeltaQ = YDcDeltaQ,
+ UvDcDeltaQ = UvDcDeltaQ,
+ UvAcDeltaQ = UvAcDeltaQ,
+ Lossless = Lossless != 0,
+ TransformMode = TxMode,
+ AllowHighPrecisionMv = AllowHighPrecisionMv != 0,
+ InterpFilter = InterpFilter,
+ ReferenceMode = ReferenceMode,
+ CompFixedRef = CompFixedRef,
+ CompVarRef = CompVarRef,
+ Log2TileCols = Log2TileCols,
+ Log2TileRows = Log2TileRows,
+ SegmentEnabled = Seg.Enabled != 0,
+ SegmentMapUpdate = Seg.UpdateMap != 0,
+ SegmentMapTemporalUpdate = Seg.TemporalUpdate != 0,
+ SegmentAbsDelta = Seg.AbsDelta,
+ SegmentFeatureEnable = Seg.FeatureMask,
+ SegmentFeatureData = Seg.FeatureData,
+ ModeRefDeltaEnabled = Lf.ModeRefDeltaEnabled != 0,
+ RefDeltas = Lf.RefDeltas,
+ ModeDeltas = Lf.ModeDeltas
+ };
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs
new file mode 100644
index 00000000..ed62293d
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs
@@ -0,0 +1,14 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct Segmentation
+ {
+ public byte Enabled;
+ public byte UpdateMap;
+ public byte TemporalUpdate;
+ public byte AbsDelta;
+ public Array8<uint> FeatureMask;
+ public Array8<Array4<short>> FeatureData;
+ }
+}
diff --git a/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs
deleted file mode 100644
index db2d39e5..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class BitStreamWriter
- {
- private const int BufferSize = 8;
-
- private Stream _baseStream;
-
- private int _buffer;
- private int _bufferPos;
-
- public BitStreamWriter(Stream baseStream)
- {
- _baseStream = baseStream;
- }
-
- public void WriteBit(bool value)
- {
- WriteBits(value ? 1 : 0, 1);
- }
-
- public void WriteBits(int value, int valueSize)
- {
- int valuePos = 0;
-
- int remaining = valueSize;
-
- while (remaining > 0)
- {
- int copySize = remaining;
-
- int free = GetFreeBufferBits();
-
- if (copySize > free)
- {
- copySize = free;
- }
-
- int mask = (1 << copySize) - 1;
-
- int srcShift = (valueSize - valuePos) - copySize;
- int dstShift = (BufferSize - _bufferPos) - copySize;
-
- _buffer |= ((value >> srcShift) & mask) << dstShift;
-
- valuePos += copySize;
- _bufferPos += copySize;
- remaining -= copySize;
- }
- }
-
- private int GetFreeBufferBits()
- {
- if (_bufferPos == BufferSize)
- {
- Flush();
- }
-
- return BufferSize - _bufferPos;
- }
-
- public void Flush()
- {
- if (_bufferPos != 0)
- {
- _baseStream.WriteByte((byte)_buffer);
-
- _buffer = 0;
- _bufferPos = 0;
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs b/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs
deleted file mode 100644
index 4f17d8d1..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-using System;
-
-namespace Ryujinx.Graphics.VDec
-{
- static class DecoderHelper
- {
- public static byte[] Combine(byte[] arr0, byte[] arr1)
- {
- byte[] output = new byte[arr0.Length + arr1.Length];
-
- Buffer.BlockCopy(arr0, 0, output, 0, arr0.Length);
- Buffer.BlockCopy(arr1, 0, output, arr0.Length, arr1.Length);
-
- return output;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs
deleted file mode 100644
index ccd01f0d..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs
+++ /dev/null
@@ -1,168 +0,0 @@
-using FFmpeg.AutoGen;
-using System;
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.VDec
-{
- static unsafe class FFmpegWrapper
- {
- private static AVCodec* _codec;
- private static AVCodecContext* _context;
- private static AVFrame* _frame;
- private static SwsContext* _scalerCtx;
-
- private static int _scalerWidth;
- private static int _scalerHeight;
-
- public static bool IsInitialized { get; private set; }
-
- public static void H264Initialize()
- {
- EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_H264);
- }
-
- public static void Vp9Initialize()
- {
- EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_VP9);
- }
-
- private static void EnsureCodecInitialized(AVCodecID codecId)
- {
- if (IsInitialized)
- {
- Uninitialize();
- }
-
- _codec = ffmpeg.avcodec_find_decoder(codecId);
- _context = ffmpeg.avcodec_alloc_context3(_codec);
- _frame = ffmpeg.av_frame_alloc();
-
- ffmpeg.avcodec_open2(_context, _codec, null);
-
- IsInitialized = true;
- }
-
- public static int DecodeFrame(byte[] data)
- {
- if (!IsInitialized)
- {
- throw new InvalidOperationException("Tried to use uninitialized codec!");
- }
-
- AVPacket packet;
-
- ffmpeg.av_init_packet(&packet);
-
- fixed (byte* ptr = data)
- {
- packet.data = ptr;
- packet.size = data.Length;
-
- ffmpeg.avcodec_send_packet(_context, &packet);
- }
-
- return ffmpeg.avcodec_receive_frame(_context, _frame);
- }
-
- public static FFmpegFrame GetFrame()
- {
- if (!IsInitialized)
- {
- throw new InvalidOperationException("Tried to use uninitialized codec!");
- }
-
- AVFrame managedFrame = Marshal.PtrToStructure<AVFrame>((IntPtr)_frame);
-
- byte*[] data = managedFrame.data.ToArray();
-
- return new FFmpegFrame()
- {
- Width = managedFrame.width,
- Height = managedFrame.height,
-
- LumaPtr = data[0],
- ChromaBPtr = data[1],
- ChromaRPtr = data[2]
- };
- }
-
- public static FFmpegFrame GetFrameRgba()
- {
- if (!IsInitialized)
- {
- throw new InvalidOperationException("Tried to use uninitialized codec!");
- }
-
- AVFrame managedFrame = Marshal.PtrToStructure<AVFrame>((IntPtr)_frame);
-
- EnsureScalerSetup(managedFrame.width, managedFrame.height);
-
- byte*[] data = managedFrame.data.ToArray();
-
- int[] lineSizes = managedFrame.linesize.ToArray();
-
- byte[] dst = new byte[managedFrame.width * managedFrame.height * 4];
-
- fixed (byte* ptr = dst)
- {
- byte*[] dstData = new byte*[] { ptr };
-
- int[] dstLineSizes = new int[] { managedFrame.width * 4 };
-
- ffmpeg.sws_scale(_scalerCtx, data, lineSizes, 0, managedFrame.height, dstData, dstLineSizes);
- }
-
- return new FFmpegFrame()
- {
- Width = managedFrame.width,
- Height = managedFrame.height,
-
- Data = dst
- };
- }
-
- private static void EnsureScalerSetup(int width, int height)
- {
- if (width == 0 || height == 0)
- {
- return;
- }
-
- if (_scalerCtx == null || _scalerWidth != width || _scalerHeight != height)
- {
- FreeScaler();
-
- _scalerCtx = ffmpeg.sws_getContext(
- width, height, AVPixelFormat.AV_PIX_FMT_YUV420P,
- width, height, AVPixelFormat.AV_PIX_FMT_RGBA, 0, null, null, null);
-
- _scalerWidth = width;
- _scalerHeight = height;
- }
- }
-
- public static void Uninitialize()
- {
- if (IsInitialized)
- {
- ffmpeg.av_frame_unref(_frame);
- ffmpeg.av_free(_frame);
- ffmpeg.avcodec_close(_context);
-
- FreeScaler();
-
- IsInitialized = false;
- }
- }
-
- private static void FreeScaler()
- {
- if (_scalerCtx != null)
- {
- ffmpeg.sws_freeContext(_scalerCtx);
-
- _scalerCtx = null;
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs
deleted file mode 100644
index 535a70c9..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- unsafe struct FFmpegFrame
- {
- public int Width;
- public int Height;
-
- public byte* LumaPtr;
- public byte* ChromaBPtr;
- public byte* ChromaRPtr;
-
- public byte[] Data;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs
deleted file mode 100644
index b4fad59b..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class H264BitStreamWriter : BitStreamWriter
- {
- public H264BitStreamWriter(Stream baseStream) : base(baseStream) { }
-
- public void WriteU(int value, int valueSize)
- {
- WriteBits(value, valueSize);
- }
-
- public void WriteSe(int value)
- {
- WriteExpGolombCodedInt(value);
- }
-
- public void WriteUe(int value)
- {
- WriteExpGolombCodedUInt((uint)value);
- }
-
- public void End()
- {
- WriteBit(true);
-
- Flush();
- }
-
- private void WriteExpGolombCodedInt(int value)
- {
- int sign = value <= 0 ? 0 : 1;
-
- if (value < 0)
- {
- value = -value;
- }
-
- value = (value << 1) - sign;
-
- WriteExpGolombCodedUInt((uint)value);
- }
-
- private void WriteExpGolombCodedUInt(uint value)
- {
- int size = 32 - CountLeadingZeros((int)value + 1);
-
- WriteBits(1, size);
-
- value -= (1u << (size - 1)) - 1;
-
- WriteBits((int)value, size - 1);
- }
-
- private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
-
- private static int CountLeadingZeros(int value)
- {
- if (value == 0)
- {
- return 32;
- }
-
- int nibbleIdx = 32;
- int preCount, count = 0;
-
- do
- {
- nibbleIdx -= 4;
- preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111];
- count += preCount;
- }
- while (preCount == 4);
-
- return count;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs
deleted file mode 100644
index 24c7e0b9..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs
+++ /dev/null
@@ -1,238 +0,0 @@
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class H264Decoder
- {
- private int _log2MaxPicOrderCntLsbMinus4;
- private bool _deltaPicOrderAlwaysZeroFlag;
- private bool _frameMbsOnlyFlag;
- private int _picWidthInMbs;
- private int _picHeightInMapUnits;
- private bool _entropyCodingModeFlag;
- private bool _bottomFieldPicOrderInFramePresentFlag;
- private int _numRefIdxL0DefaultActiveMinus1;
- private int _numRefIdxL1DefaultActiveMinus1;
- private bool _deblockingFilterControlPresentFlag;
- private bool _redundantPicCntPresentFlag;
- private bool _transform8x8ModeFlag;
- private bool _mbAdaptiveFrameFieldFlag;
- private bool _direct8x8InferenceFlag;
- private bool _weightedPredFlag;
- private bool _constrainedIntraPredFlag;
- private bool _fieldPicFlag;
- private bool _bottomFieldFlag;
- private int _log2MaxFrameNumMinus4;
- private int _chromaFormatIdc;
- private int _picOrderCntType;
- private int _picInitQpMinus26;
- private int _chromaQpIndexOffset;
- private int _chromaQpIndexOffset2;
- private int _weightedBipredIdc;
- private int _frameNumber;
- private byte[] _scalingMatrix4;
- private byte[] _scalingMatrix8;
-
- public void Decode(H264ParameterSets Params, H264Matrices matrices, byte[] frameData)
- {
- _log2MaxPicOrderCntLsbMinus4 = Params.Log2MaxPicOrderCntLsbMinus4;
- _deltaPicOrderAlwaysZeroFlag = Params.DeltaPicOrderAlwaysZeroFlag;
- _frameMbsOnlyFlag = Params.FrameMbsOnlyFlag;
- _picWidthInMbs = Params.PicWidthInMbs;
- _picHeightInMapUnits = Params.PicHeightInMapUnits;
- _entropyCodingModeFlag = Params.EntropyCodingModeFlag;
- _bottomFieldPicOrderInFramePresentFlag = Params.BottomFieldPicOrderInFramePresentFlag;
- _numRefIdxL0DefaultActiveMinus1 = Params.NumRefIdxL0DefaultActiveMinus1;
- _numRefIdxL1DefaultActiveMinus1 = Params.NumRefIdxL1DefaultActiveMinus1;
- _deblockingFilterControlPresentFlag = Params.DeblockingFilterControlPresentFlag;
- _redundantPicCntPresentFlag = Params.RedundantPicCntPresentFlag;
- _transform8x8ModeFlag = Params.Transform8x8ModeFlag;
-
- _mbAdaptiveFrameFieldFlag = ((Params.Flags >> 0) & 1) != 0;
- _direct8x8InferenceFlag = ((Params.Flags >> 1) & 1) != 0;
- _weightedPredFlag = ((Params.Flags >> 2) & 1) != 0;
- _constrainedIntraPredFlag = ((Params.Flags >> 3) & 1) != 0;
- _fieldPicFlag = ((Params.Flags >> 5) & 1) != 0;
- _bottomFieldFlag = ((Params.Flags >> 6) & 1) != 0;
-
- _log2MaxFrameNumMinus4 = (int)(Params.Flags >> 8) & 0xf;
- _chromaFormatIdc = (int)(Params.Flags >> 12) & 0x3;
- _picOrderCntType = (int)(Params.Flags >> 14) & 0x3;
- _picInitQpMinus26 = (int)(Params.Flags >> 16) & 0x3f;
- _chromaQpIndexOffset = (int)(Params.Flags >> 22) & 0x1f;
- _chromaQpIndexOffset2 = (int)(Params.Flags >> 27) & 0x1f;
- _weightedBipredIdc = (int)(Params.Flags >> 32) & 0x3;
- _frameNumber = (int)(Params.Flags >> 46) & 0x1ffff;
-
- _picInitQpMinus26 = (_picInitQpMinus26 << 26) >> 26;
- _chromaQpIndexOffset = (_chromaQpIndexOffset << 27) >> 27;
- _chromaQpIndexOffset2 = (_chromaQpIndexOffset2 << 27) >> 27;
-
- _scalingMatrix4 = matrices.ScalingMatrix4;
- _scalingMatrix8 = matrices.ScalingMatrix8;
-
- if (FFmpegWrapper.IsInitialized)
- {
- FFmpegWrapper.DecodeFrame(frameData);
- }
- else
- {
- FFmpegWrapper.H264Initialize();
-
- FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(EncodeHeader(), frameData));
- }
- }
-
- private byte[] EncodeHeader()
- {
- using (MemoryStream data = new MemoryStream())
- {
- H264BitStreamWriter writer = new H264BitStreamWriter(data);
-
- // Sequence Parameter Set.
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(7, 5);
- writer.WriteU(100, 8);
- writer.WriteU(0, 8);
- writer.WriteU(31, 8);
- writer.WriteUe(0);
- writer.WriteUe(_chromaFormatIdc);
-
- if (_chromaFormatIdc == 3)
- {
- writer.WriteBit(false);
- }
-
- writer.WriteUe(0);
- writer.WriteUe(0);
- writer.WriteBit(false);
- writer.WriteBit(false); //Scaling matrix present flag
-
- writer.WriteUe(_log2MaxFrameNumMinus4);
- writer.WriteUe(_picOrderCntType);
-
- if (_picOrderCntType == 0)
- {
- writer.WriteUe(_log2MaxPicOrderCntLsbMinus4);
- }
- else if (_picOrderCntType == 1)
- {
- writer.WriteBit(_deltaPicOrderAlwaysZeroFlag);
-
- writer.WriteSe(0);
- writer.WriteSe(0);
- writer.WriteUe(0);
- }
-
- int picHeightInMbs = _picHeightInMapUnits / (_frameMbsOnlyFlag ? 1 : 2);
-
- writer.WriteUe(16);
- writer.WriteBit(false);
- writer.WriteUe(_picWidthInMbs - 1);
- writer.WriteUe(picHeightInMbs - 1);
- writer.WriteBit(_frameMbsOnlyFlag);
-
- if (!_frameMbsOnlyFlag)
- {
- writer.WriteBit(_mbAdaptiveFrameFieldFlag);
- }
-
- writer.WriteBit(_direct8x8InferenceFlag);
- writer.WriteBit(false); //Frame cropping flag
- writer.WriteBit(false); //VUI parameter present flag
-
- writer.End();
-
- // Picture Parameter Set.
- writer.WriteU(1, 24);
- writer.WriteU(0, 1);
- writer.WriteU(3, 2);
- writer.WriteU(8, 5);
-
- writer.WriteUe(0);
- writer.WriteUe(0);
-
- writer.WriteBit(_entropyCodingModeFlag);
- writer.WriteBit(false);
- writer.WriteUe(0);
- writer.WriteUe(_numRefIdxL0DefaultActiveMinus1);
- writer.WriteUe(_numRefIdxL1DefaultActiveMinus1);
- writer.WriteBit(_weightedPredFlag);
- writer.WriteU(_weightedBipredIdc, 2);
- writer.WriteSe(_picInitQpMinus26);
- writer.WriteSe(0);
- writer.WriteSe(_chromaQpIndexOffset);
- writer.WriteBit(_deblockingFilterControlPresentFlag);
- writer.WriteBit(_constrainedIntraPredFlag);
- writer.WriteBit(_redundantPicCntPresentFlag);
- writer.WriteBit(_transform8x8ModeFlag);
-
- writer.WriteBit(true);
-
- for (int index = 0; index < 6; index++)
- {
- writer.WriteBit(true);
-
- WriteScalingList(writer, _scalingMatrix4, index * 16, 16);
- }
-
- if (_transform8x8ModeFlag)
- {
- for (int index = 0; index < 2; index++)
- {
- writer.WriteBit(true);
-
- WriteScalingList(writer, _scalingMatrix8, index * 64, 64);
- }
- }
-
- writer.WriteSe(_chromaQpIndexOffset2);
-
- writer.End();
-
- return data.ToArray();
- }
- }
-
- // ZigZag LUTs from libavcodec.
- private static readonly byte[] ZigZagDirect = new byte[]
- {
- 0, 1, 8, 16, 9, 2, 3, 10,
- 17, 24, 32, 25, 18, 11, 4, 5,
- 12, 19, 26, 33, 40, 48, 41, 34,
- 27, 20, 13, 6, 7, 14, 21, 28,
- 35, 42, 49, 56, 57, 50, 43, 36,
- 29, 22, 15, 23, 30, 37, 44, 51,
- 58, 59, 52, 45, 38, 31, 39, 46,
- 53, 60, 61, 54, 47, 55, 62, 63
- };
-
- private static readonly byte[] ZigZagScan = new byte[]
- {
- 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
- 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
- 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
- 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
- };
-
- private static void WriteScalingList(H264BitStreamWriter writer, byte[] list, int start, int count)
- {
- byte[] scan = count == 16 ? ZigZagScan : ZigZagDirect;
-
- int lastScale = 8;
-
- for (int index = 0; index < count; index++)
- {
- byte value = list[start + scan[index]];
-
- int deltaScale = value - lastScale;
-
- writer.WriteSe(deltaScale);
-
- lastScale = value;
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs
deleted file mode 100644
index a1524214..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- struct H264Matrices
- {
- public byte[] ScalingMatrix4;
- public byte[] ScalingMatrix8;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs b/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs
deleted file mode 100644
index f242f0f2..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.VDec
-{
- [StructLayout(LayoutKind.Sequential, Pack = 4)]
- struct H264ParameterSets
- {
- public int Log2MaxPicOrderCntLsbMinus4;
- public bool DeltaPicOrderAlwaysZeroFlag;
- public bool FrameMbsOnlyFlag;
- public int PicWidthInMbs;
- public int PicHeightInMapUnits;
- public int Reserved6C;
- public bool EntropyCodingModeFlag;
- public bool BottomFieldPicOrderInFramePresentFlag;
- public int NumRefIdxL0DefaultActiveMinus1;
- public int NumRefIdxL1DefaultActiveMinus1;
- public bool DeblockingFilterControlPresentFlag;
- public bool RedundantPicCntPresentFlag;
- public bool Transform8x8ModeFlag;
- public int Unknown8C;
- public int Unknown90;
- public int Reserved94;
- public int Unknown98;
- public int Reserved9C;
- public int ReservedA0;
- public int UnknownA4;
- public int ReservedA8;
- public int UnknownAC;
- public long Flags;
- public int FrameNumber;
- public int FrameNumber2;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs
deleted file mode 100644
index f031919d..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- enum VideoCodec
- {
- H264 = 3,
- Vp8 = 5,
- H265 = 7,
- Vp9 = 9
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs
deleted file mode 100644
index 9afc9485..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs
+++ /dev/null
@@ -1,281 +0,0 @@
-using Ryujinx.Graphics.Gpu;
-using Ryujinx.Graphics.Gpu.Memory;
-using Ryujinx.Graphics.Vic;
-using System;
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.VDec
-{
- unsafe class VideoDecoder
- {
- private H264Decoder _h264Decoder;
- private Vp9Decoder _vp9Decoder;
-
- private VideoCodec _currentVideoCodec;
-
- private ulong _decoderContextAddress;
- private ulong _frameDataAddress;
- private ulong _vpxCurrLumaAddress;
- private ulong _vpxRef0LumaAddress;
- private ulong _vpxRef1LumaAddress;
- private ulong _vpxRef2LumaAddress;
- private ulong _vpxCurrChromaAddress;
- private ulong _vpxRef0ChromaAddress;
- private ulong _vpxRef1ChromaAddress;
- private ulong _vpxRef2ChromaAddress;
- private ulong _vpxProbTablesAddress;
-
- public VideoDecoder()
- {
- _h264Decoder = new H264Decoder();
- _vp9Decoder = new Vp9Decoder();
- }
-
- public void Process(GpuContext gpu, int methodOffset, int[] arguments)
- {
- VideoDecoderMeth method = (VideoDecoderMeth)methodOffset;
-
- switch (method)
- {
- case VideoDecoderMeth.SetVideoCodec: SetVideoCodec(arguments); break;
- case VideoDecoderMeth.Execute: Execute(gpu); break;
- case VideoDecoderMeth.SetDecoderCtxAddr: SetDecoderCtxAddr(arguments); break;
- case VideoDecoderMeth.SetFrameDataAddr: SetFrameDataAddr(arguments); break;
- case VideoDecoderMeth.SetVpxCurrLumaAddr: SetVpxCurrLumaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef0LumaAddr: SetVpxRef0LumaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef1LumaAddr: SetVpxRef1LumaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef2LumaAddr: SetVpxRef2LumaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxCurrChromaAddr: SetVpxCurrChromaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef0ChromaAddr: SetVpxRef0ChromaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef1ChromaAddr: SetVpxRef1ChromaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxRef2ChromaAddr: SetVpxRef2ChromaAddr(arguments); break;
- case VideoDecoderMeth.SetVpxProbTablesAddr: SetVpxProbTablesAddr(arguments); break;
- }
- }
-
- private void SetVideoCodec(int[] arguments)
- {
- _currentVideoCodec = (VideoCodec)arguments[0];
- }
-
- private void Execute(GpuContext gpu)
- {
- if (_currentVideoCodec == VideoCodec.H264)
- {
- int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x48);
-
- H264ParameterSets Params = gpu.MemoryAccessor.Read<H264ParameterSets>(_decoderContextAddress + 0x58);
-
- H264Matrices matrices = new H264Matrices()
- {
- ScalingMatrix4 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x1c0, 6 * 16),
- ScalingMatrix8 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x220, 2 * 64)
- };
-
- byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, frameDataSize);
-
- _h264Decoder.Decode(Params, matrices, frameData);
- }
- else if (_currentVideoCodec == VideoCodec.Vp9)
- {
- int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x30);
-
- Vp9FrameKeys keys = new Vp9FrameKeys()
- {
- CurrKey = (long)gpu.MemoryManager.Translate(_vpxCurrLumaAddress),
- Ref0Key = (long)gpu.MemoryManager.Translate(_vpxRef0LumaAddress),
- Ref1Key = (long)gpu.MemoryManager.Translate(_vpxRef1LumaAddress),
- Ref2Key = (long)gpu.MemoryManager.Translate(_vpxRef2LumaAddress)
- };
-
- Vp9FrameHeader header = ReadStruct<Vp9FrameHeader>(gpu.MemoryAccessor, _decoderContextAddress + 0x48);
-
- Vp9ProbabilityTables probs = new Vp9ProbabilityTables()
- {
- SegmentationTreeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x387, 0x7),
- SegmentationPredProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x38e, 0x3),
- Tx8x8Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x470, 0x2),
- Tx16x16Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x472, 0x4),
- Tx32x32Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x476, 0x6),
- CoefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x5a0, 0x900),
- SkipProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x537, 0x3),
- InterModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x400, 0x1c),
- InterpFilterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x52a, 0x8),
- IsInterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x41c, 0x4),
- CompModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x532, 0x5),
- SingleRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x580, 0xa),
- CompRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x58a, 0x5),
- YModeProbs0 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x480, 0x20),
- YModeProbs1 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x47c, 0x4),
- PartitionProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x4e0, 0x40),
- MvJointProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53b, 0x3),
- MvSignProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53e, 0x3),
- MvClassProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54c, 0x14),
- MvClass0BitProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x540, 0x3),
- MvBitsProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x56c, 0x14),
- MvClass0FrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x560, 0xc),
- MvFrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x542, 0x6),
- MvClass0HpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x548, 0x2),
- MvHpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54a, 0x2)
- };
-
- byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, frameDataSize);
-
- _vp9Decoder.Decode(keys, header, probs, frameData);
- }
- else
- {
- ThrowUnimplementedCodec();
- }
- }
-
- private T ReadStruct<T>(MemoryAccessor accessor, ulong address) where T : struct
- {
- byte[] data = accessor.ReadBytes(address, Marshal.SizeOf<T>());
-
- unsafe
- {
- fixed (byte* ptr = data)
- {
- return Marshal.PtrToStructure<T>((IntPtr)ptr);
- }
- }
- }
-
- private void SetDecoderCtxAddr(int[] arguments)
- {
- _decoderContextAddress = GetAddress(arguments);
- }
-
- private void SetFrameDataAddr(int[] arguments)
- {
- _frameDataAddress = GetAddress(arguments);
- }
-
- private void SetVpxCurrLumaAddr(int[] arguments)
- {
- _vpxCurrLumaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef0LumaAddr(int[] arguments)
- {
- _vpxRef0LumaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef1LumaAddr(int[] arguments)
- {
- _vpxRef1LumaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef2LumaAddr(int[] arguments)
- {
- _vpxRef2LumaAddress = GetAddress(arguments);
- }
-
- private void SetVpxCurrChromaAddr(int[] arguments)
- {
- _vpxCurrChromaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef0ChromaAddr(int[] arguments)
- {
- _vpxRef0ChromaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef1ChromaAddr(int[] arguments)
- {
- _vpxRef1ChromaAddress = GetAddress(arguments);
- }
-
- private void SetVpxRef2ChromaAddr(int[] arguments)
- {
- _vpxRef2ChromaAddress = GetAddress(arguments);
- }
-
- private void SetVpxProbTablesAddr(int[] arguments)
- {
- _vpxProbTablesAddress = GetAddress(arguments);
- }
-
- private static ulong GetAddress(int[] arguments)
- {
- return (ulong)(uint)arguments[0] << 8;
- }
-
- internal void CopyPlanes(GpuContext gpu, SurfaceOutputConfig outputConfig)
- {
- switch (outputConfig.PixelFormat)
- {
- case SurfacePixelFormat.Rgba8: CopyPlanesRgba8 (gpu, outputConfig); break;
- case SurfacePixelFormat.Yuv420P: CopyPlanesYuv420P(gpu, outputConfig); break;
-
- default: ThrowUnimplementedPixelFormat(outputConfig.PixelFormat); break;
- }
- }
-
- private void CopyPlanesRgba8(GpuContext gpu, SurfaceOutputConfig outputConfig)
- {
- FFmpegFrame frame = FFmpegWrapper.GetFrameRgba();
-
- if ((frame.Width | frame.Height) == 0)
- {
- return;
- }
-
- throw new NotImplementedException();
- }
-
- private void CopyPlanesYuv420P(GpuContext gpu, SurfaceOutputConfig outputConfig)
- {
- FFmpegFrame frame = FFmpegWrapper.GetFrame();
-
- if ((frame.Width | frame.Height) == 0)
- {
- return;
- }
-
- int halfSrcWidth = frame.Width / 2;
-
- int halfWidth = frame.Width / 2;
- int halfHeight = frame.Height / 2;
-
- int alignedWidth = (outputConfig.SurfaceWidth + 0xff) & ~0xff;
-
- for (int y = 0; y < frame.Height; y++)
- {
- int src = y * frame.Width;
- int dst = y * alignedWidth;
-
- int size = frame.Width;
-
- for (int offset = 0; offset < size; offset++)
- {
- gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceLumaAddress + (ulong)dst + (ulong)offset, *(frame.LumaPtr + src + offset));
- }
- }
-
- // Copy chroma data from both channels with interleaving.
- for (int y = 0; y < halfHeight; y++)
- {
- int src = y * halfSrcWidth;
- int dst = y * alignedWidth;
-
- for (int x = 0; x < halfWidth; x++)
- {
- gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 0, *(frame.ChromaBPtr + src + x));
- gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 1, *(frame.ChromaRPtr + src + x));
- }
- }
- }
-
- private void ThrowUnimplementedCodec()
- {
- throw new NotImplementedException($"Codec \"{_currentVideoCodec}\" is not supported!");
- }
-
- private void ThrowUnimplementedPixelFormat(SurfacePixelFormat pixelFormat)
- {
- throw new NotImplementedException($"Pixel format \"{pixelFormat}\" is not supported!");
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs
deleted file mode 100644
index 12286386..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- enum VideoDecoderMeth
- {
- SetVideoCodec = 0x80,
- Execute = 0xc0,
- SetDecoderCtxAddr = 0x101,
- SetFrameDataAddr = 0x102,
- SetVpxRef0LumaAddr = 0x10c,
- SetVpxRef1LumaAddr = 0x10d,
- SetVpxRef2LumaAddr = 0x10e,
- SetVpxCurrLumaAddr = 0x10f,
- SetVpxRef0ChromaAddr = 0x11d,
- SetVpxRef1ChromaAddr = 0x11e,
- SetVpxRef2ChromaAddr = 0x11f,
- SetVpxCurrChromaAddr = 0x120,
- SetVpxProbTablesAddr = 0x170
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs
deleted file mode 100644
index b20a40be..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs
+++ /dev/null
@@ -1,879 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class Vp9Decoder
- {
- private const int DiffUpdateProbability = 252;
-
- private const int FrameSyncCode = 0x498342;
-
- private static readonly int[] MapLut = new int[]
- {
- 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63,
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78,
- 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93,
- 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
- 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122,
- 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137,
- 138, 139, 140, 141, 142, 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152,
- 153, 154, 155, 156, 157, 11, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
- 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13,
- 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196,
- 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211,
- 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226,
- 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
- 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19
- };
-
- private byte[] DefaultTx8x8Probs = new byte[] { 100, 66 };
- private byte[] DefaultTx16x16Probs = new byte[] { 20, 152, 15, 101 };
- private byte[] DefaultTx32x32Probs = new byte[] { 3, 136, 37, 5, 52, 13 };
-
- private byte[] _defaultCoefProbs = new byte[]
- {
- 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0,
- 17, 82, 140, 0, 8, 66, 114, 0, 2, 44, 76, 0, 1, 19, 32, 0,
- 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, 7, 75, 127, 0,
- 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0,
- 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0,
- 102, 148, 228, 0, 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0,
- 2, 39, 75, 0, 1, 15, 29, 0, 156, 57, 233, 0, 119, 57, 212, 0,
- 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, 3, 12, 31, 0,
- 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0,
- 8, 93, 157, 0, 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0,
- 41, 151, 213, 0, 27, 123, 193, 0, 3, 82, 144, 0, 1, 58, 105, 0,
- 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, 23, 126, 198, 0,
- 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0,
- 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0,
- 1, 33, 65, 0, 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0,
- 61, 49, 166, 0, 28, 36, 114, 0, 12, 25, 76, 0, 3, 16, 42, 0,
- 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0,
- 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0,
- 89, 163, 230, 0, 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0,
- 2, 42, 81, 0, 1, 17, 33, 0, 108, 167, 237, 0, 55, 133, 222, 0,
- 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, 1, 19, 38, 0,
- 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0,
- 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0,
- 66, 58, 182, 0, 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0,
- 229, 99, 249, 0, 143, 111, 235, 0, 46, 109, 192, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, 94, 146, 224, 0,
- 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0,
- 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0,
- 1, 41, 79, 0, 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0,
- 10, 104, 178, 0, 2, 73, 133, 0, 1, 44, 85, 0, 1, 22, 47, 0,
- 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, 3, 61, 124, 0,
- 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0,
- 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0,
- 125, 34, 187, 0, 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 37, 109, 153, 0, 51, 102, 147, 0,
- 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, 1, 19, 29, 0,
- 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0,
- 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0,
- 2, 93, 148, 0, 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0,
- 29, 176, 217, 0, 12, 145, 201, 0, 3, 101, 156, 0, 1, 69, 111, 0,
- 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, 25, 154, 215, 0,
- 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0,
- 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0,
- 14, 117, 177, 0, 5, 90, 141, 0, 2, 61, 95, 0, 1, 37, 57, 0,
- 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, 1, 60, 104, 0,
- 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0,
- 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0,
- 32, 186, 224, 0, 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0,
- 1, 31, 55, 0, 1, 12, 22, 0, 57, 192, 227, 0, 20, 143, 204, 0,
- 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, 1, 19, 32, 0,
- 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0,
- 49, 107, 178, 0, 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0,
- 71, 172, 217, 0, 44, 141, 209, 0, 15, 102, 173, 0, 6, 76, 133, 0,
- 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, 31, 148, 216, 0,
- 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0,
- 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0,
- 1, 38, 69, 0, 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0,
- 7, 107, 177, 0, 2, 70, 124, 0, 1, 42, 73, 0, 1, 18, 34, 0,
- 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0,
- 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0,
- 75, 183, 239, 0, 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0,
- 1, 44, 76, 0, 1, 17, 28, 0, 73, 185, 240, 0, 27, 159, 222, 0,
- 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, 1, 17, 29, 0,
- 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0,
- 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0,
- 4, 113, 180, 0, 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0,
- 7, 27, 153, 0, 5, 30, 95, 0, 1, 16, 30, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, 57, 75, 124, 0,
- 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0,
- 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0,
- 1, 38, 60, 0, 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0,
- 2, 75, 117, 0, 1, 50, 81, 0, 1, 31, 51, 0, 1, 14, 23, 0,
- 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, 1, 51, 86, 0,
- 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0,
- 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0,
- 19, 55, 240, 0, 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 41, 166, 207, 0, 104, 153, 199, 0,
- 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, 1, 36, 52, 0,
- 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0,
- 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0,
- 1, 86, 142, 0, 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0,
- 20, 190, 215, 0, 4, 135, 192, 0, 1, 84, 139, 0, 1, 53, 91, 0,
- 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, 2, 137, 192, 0,
- 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0,
- 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0,
- 46, 102, 164, 0, 15, 80, 128, 0, 2, 49, 76, 0, 1, 18, 28, 0,
- 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, 3, 69, 109, 0,
- 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0,
- 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0,
- 47, 199, 217, 0, 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0,
- 1, 36, 62, 0, 1, 15, 26, 0, 26, 219, 229, 0, 5, 155, 207, 0,
- 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, 1, 16, 28, 0,
- 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0,
- 63, 142, 204, 0, 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0,
- 85, 181, 230, 0, 32, 146, 209, 0, 7, 100, 164, 0, 3, 71, 121, 0,
- 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, 20, 148, 207, 0,
- 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0,
- 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0,
- 1, 39, 66, 0, 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0,
- 1, 98, 160, 0, 1, 67, 117, 0, 1, 41, 74, 0, 1, 17, 31, 0,
- 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0,
- 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0,
- 50, 127, 154, 0, 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0,
- 1, 35, 54, 0, 1, 13, 20, 0, 40, 142, 167, 0, 17, 110, 157, 0,
- 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, 1, 11, 17, 0,
- 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0,
- 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0,
- 1, 83, 128, 0, 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0,
- 36, 41, 235, 0, 29, 36, 193, 0, 10, 27, 111, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, 177, 162, 215, 0,
- 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0,
- 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0,
- 1, 38, 65, 0, 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0,
- 2, 86, 140, 0, 1, 56, 97, 0, 1, 36, 61, 0, 1, 16, 27, 0,
- 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, 1, 57, 99, 0,
- 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0,
- 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0,
- 181, 21, 201, 0, 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 47, 106, 172, 0, 95, 104, 173, 0,
- 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, 1, 17, 23, 0,
- 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0,
- 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0,
- 3, 93, 146, 0, 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0,
- 49, 186, 223, 0, 17, 148, 204, 0, 1, 96, 142, 0, 1, 53, 83, 0,
- 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, 2, 136, 180, 0,
- 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0,
- 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0,
- 176, 177, 234, 0, 104, 158, 220, 0, 66, 128, 186, 0, 55, 90, 137, 0,
- 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, 2, 65, 125, 0,
- 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0,
- 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0,
- 84, 220, 246, 0, 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0,
- 1, 55, 77, 0, 1, 60, 79, 0, 43, 243, 240, 0, 8, 180, 217, 0,
- 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, 1, 16, 6, 0
- };
-
- private byte[] _defaultSkipProbs = new byte[] { 192, 128, 64 };
-
- private byte[] _defaultInterModeProbs = new byte[]
- {
- 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, 66, 0,
- 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0
- };
-
- private byte[] _defaultInterpFilterProbs = new byte[]
- {
- 235, 162, 36, 255, 34, 3, 149, 144
- };
-
- private byte[] _defaultIsInterProbs = new byte[] { 9, 102, 187, 225 };
-
- private byte[] _defaultCompModeProbs = new byte[] { 239, 183, 119, 96, 41 };
-
- private byte[] _defaultSingleRefProbs = new byte[]
- {
- 33, 16, 77, 74, 142, 142, 172, 170, 238, 247
- };
-
- private byte[] _defaultCompRefProbs = new byte[] { 50, 126, 123, 221, 226 };
-
- private byte[] _defaultYModeProbs0 = new byte[]
- {
- 65, 32, 18, 144, 162, 194, 41, 51, 132, 68, 18, 165, 217, 196, 45, 40,
- 173, 80, 19, 176, 240, 193, 64, 35, 221, 135, 38, 194, 248, 121, 96, 85
- };
-
- private byte[] _defaultYModeProbs1 = new byte[] { 98, 78, 46, 29 };
-
- private byte[] _defaultPartitionProbs = new byte[]
- {
- 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
- 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
- 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
- 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0
- };
-
- private byte[] _defaultMvJointProbs = new byte[] { 32, 64, 96 };
-
- private byte[] _defaultMvSignProbs = new byte[] { 128, 128 };
-
- private byte[] _defaultMvClassProbs = new byte[]
- {
- 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, 216, 128, 176, 160, 176, 176,
- 192, 198, 198, 208
- };
-
- private byte[] _defaultMvClass0BitProbs = new byte[] { 216, 208 };
-
- private byte[] _defaultMvBitsProbs = new byte[]
- {
- 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, 136, 140, 148, 160, 176, 192,
- 224, 234, 234, 240
- };
-
- private byte[] _defaultMvClass0FrProbs = new byte[]
- {
- 128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64
- };
-
- private byte[] _defaultMvFrProbs = new byte[] { 64, 96, 64, 64, 96, 64 };
-
- private byte[] _defaultMvClass0HpProbs = new byte[] { 160, 160 };
-
- private byte[] _defaultMvHpProbs = new byte[] { 128, 128 };
-
- private sbyte[] _loopFilterRefDeltas;
- private sbyte[] _loopFilterModeDeltas;
-
- private LinkedList<int> _frameSlotByLastUse;
-
- private Dictionary<long, LinkedListNode<int>> _cachedRefFrames;
-
- public Vp9Decoder()
- {
- _loopFilterRefDeltas = new sbyte[4];
- _loopFilterModeDeltas = new sbyte[2];
-
- _frameSlotByLastUse = new LinkedList<int>();
-
- for (int slot = 0; slot < 8; slot++)
- {
- _frameSlotByLastUse.AddFirst(slot);
- }
-
- _cachedRefFrames = new Dictionary<long, LinkedListNode<int>>();
- }
-
- public void Decode(
- Vp9FrameKeys keys,
- Vp9FrameHeader header,
- Vp9ProbabilityTables probs,
- byte[] frameData)
- {
- bool isKeyFrame = ((header.Flags >> 0) & 1) != 0;
- bool lastIsKeyFrame = ((header.Flags >> 1) & 1) != 0;
- bool frameSizeChanged = ((header.Flags >> 2) & 1) != 0;
- bool errorResilientMode = ((header.Flags >> 3) & 1) != 0;
- bool lastShowFrame = ((header.Flags >> 4) & 1) != 0;
- bool isFrameIntra = ((header.Flags >> 5) & 1) != 0;
-
- bool showFrame = !isFrameIntra;
-
- // Write compressed header.
- byte[] compressedHeaderData;
-
- using (MemoryStream compressedHeader = new MemoryStream())
- {
- VpxRangeEncoder writer = new VpxRangeEncoder(compressedHeader);
-
- if (!header.Lossless)
- {
- if ((uint)header.TxMode >= 3)
- {
- writer.Write(3, 2);
- writer.Write(header.TxMode == 4);
- }
- else
- {
- writer.Write(header.TxMode, 2);
- }
- }
-
- if (header.TxMode == 4)
- {
- WriteProbabilityUpdate(writer, probs.Tx8x8Probs, DefaultTx8x8Probs);
- WriteProbabilityUpdate(writer, probs.Tx16x16Probs, DefaultTx16x16Probs);
- WriteProbabilityUpdate(writer, probs.Tx32x32Probs, DefaultTx32x32Probs);
- }
-
- WriteCoefProbabilityUpdate(writer, header.TxMode, probs.CoefProbs, _defaultCoefProbs);
-
- WriteProbabilityUpdate(writer, probs.SkipProbs, _defaultSkipProbs);
-
- if (!isFrameIntra)
- {
- WriteProbabilityUpdateAligned4(writer, probs.InterModeProbs, _defaultInterModeProbs);
-
- if (header.RawInterpolationFilter == 4)
- {
- WriteProbabilityUpdate(writer, probs.InterpFilterProbs, _defaultInterpFilterProbs);
- }
-
- WriteProbabilityUpdate(writer, probs.IsInterProbs, _defaultIsInterProbs);
-
- if ((header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[2] & 1) ||
- (header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[3] & 1))
- {
- if ((uint)header.CompPredMode >= 1)
- {
- writer.Write(1, 1);
- writer.Write(header.CompPredMode == 2);
- }
- else
- {
- writer.Write(0, 1);
- }
- }
-
- if (header.CompPredMode == 2)
- {
- WriteProbabilityUpdate(writer, probs.CompModeProbs, _defaultCompModeProbs);
- }
-
- if (header.CompPredMode != 1)
- {
- WriteProbabilityUpdate(writer, probs.SingleRefProbs, _defaultSingleRefProbs);
- }
-
- if (header.CompPredMode != 0)
- {
- WriteProbabilityUpdate(writer, probs.CompRefProbs, _defaultCompRefProbs);
- }
-
- for (int index = 0; index < 4; index++)
- {
- int i = index * 8;
- int j = index;
-
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 0], _defaultYModeProbs0[i + 0]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 1], _defaultYModeProbs0[i + 1]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 2], _defaultYModeProbs0[i + 2]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 3], _defaultYModeProbs0[i + 3]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 4], _defaultYModeProbs0[i + 4]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 5], _defaultYModeProbs0[i + 5]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 6], _defaultYModeProbs0[i + 6]);
- WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 7], _defaultYModeProbs0[i + 7]);
- WriteProbabilityUpdate(writer, probs.YModeProbs1[j + 0], _defaultYModeProbs1[j + 0]);
- }
-
- WriteProbabilityUpdateAligned4(writer, probs.PartitionProbs, _defaultPartitionProbs);
-
- for (int i = 0; i < 3; i++)
- {
- WriteMvProbabilityUpdate(writer, probs.MvJointProbs[i], _defaultMvJointProbs[i]);
- }
-
- for (int i = 0; i < 2; i++)
- {
- WriteMvProbabilityUpdate(writer, probs.MvSignProbs[i], _defaultMvSignProbs[i]);
-
- for (int j = 0; j < 10; j++)
- {
- int index = i * 10 + j;
-
- WriteMvProbabilityUpdate(writer, probs.MvClassProbs[index], _defaultMvClassProbs[index]);
- }
-
- WriteMvProbabilityUpdate(writer, probs.MvClass0BitProbs[i], _defaultMvClass0BitProbs[i]);
-
- for (int j = 0; j < 10; j++)
- {
- int index = i * 10 + j;
-
- WriteMvProbabilityUpdate(writer, probs.MvBitsProbs[index], _defaultMvBitsProbs[index]);
- }
- }
-
- for (int i = 0; i < 2; i++)
- {
- for (int j = 0; j < 2; j++)
- {
- for (int k = 0; k < 3; k++)
- {
- int index = i * 2 * 3 + j * 3 + k;
-
- WriteMvProbabilityUpdate(writer, probs.MvClass0FrProbs[index], _defaultMvClass0FrProbs[index]);
- }
- }
-
- for (int j = 0; j < 3; j++)
- {
- int index = i * 3 + j;
-
- WriteMvProbabilityUpdate(writer, probs.MvFrProbs[index], _defaultMvFrProbs[index]);
- }
- }
-
- if (header.AllowHighPrecisionMv)
- {
- for (int index = 0; index < 2; index++)
- {
- WriteMvProbabilityUpdate(writer, probs.MvClass0HpProbs[index], _defaultMvClass0HpProbs[index]);
- WriteMvProbabilityUpdate(writer, probs.MvHpProbs[index], _defaultMvHpProbs[index]);
- }
- }
- }
-
- writer.End();
-
- compressedHeaderData = compressedHeader.ToArray();
- }
-
- // Write uncompressed header.
- using (MemoryStream encodedHeader = new MemoryStream())
- {
- VpxBitStreamWriter writer = new VpxBitStreamWriter(encodedHeader);
-
- writer.WriteU(2, 2); //Frame marker.
- writer.WriteU(0, 2); //Profile.
- writer.WriteBit(false); //Show existing frame.
- writer.WriteBit(!isKeyFrame);
- writer.WriteBit(showFrame);
- writer.WriteBit(errorResilientMode);
-
- if (isKeyFrame)
- {
- writer.WriteU(FrameSyncCode, 24);
- writer.WriteU(0, 3); //Color space.
- writer.WriteU(0, 1); //Color range.
- writer.WriteU(header.CurrentFrame.Width - 1, 16);
- writer.WriteU(header.CurrentFrame.Height - 1, 16);
- writer.WriteBit(false); //Render and frame size different.
-
- _cachedRefFrames.Clear();
-
- // On key frames, all frame slots are set to the current frame,
- // so the value of the selected slot doesn't really matter.
- GetNewFrameSlot(keys.CurrKey);
- }
- else
- {
- if (!showFrame)
- {
- writer.WriteBit(isFrameIntra);
- }
-
- if (!errorResilientMode)
- {
- writer.WriteU(0, 2); //Reset frame context.
- }
-
- int refreshFrameFlags = 1 << GetNewFrameSlot(keys.CurrKey);
-
- if (isFrameIntra)
- {
- writer.WriteU(FrameSyncCode, 24);
- writer.WriteU(refreshFrameFlags, 8);
- writer.WriteU(header.CurrentFrame.Width - 1, 16);
- writer.WriteU(header.CurrentFrame.Height - 1, 16);
- writer.WriteBit(false); //Render and frame size different.
- }
- else
- {
- writer.WriteU(refreshFrameFlags, 8);
-
- int[] refFrameIndex = new int[]
- {
- GetFrameSlot(keys.Ref0Key),
- GetFrameSlot(keys.Ref1Key),
- GetFrameSlot(keys.Ref2Key)
- };
-
- byte[] refFrameSignBias = header.RefFrameSignBias;
-
- for (int index = 1; index < 4; index++)
- {
- writer.WriteU(refFrameIndex[index - 1], 3);
- writer.WriteU(refFrameSignBias[index], 1);
- }
-
- writer.WriteBit(true); //Frame size with refs.
- writer.WriteBit(false); //Render and frame size different.
- writer.WriteBit(header.AllowHighPrecisionMv);
- writer.WriteBit(header.RawInterpolationFilter == 4);
-
- if (header.RawInterpolationFilter != 4)
- {
- writer.WriteU(header.RawInterpolationFilter, 2);
- }
- }
- }
-
- if (!errorResilientMode)
- {
- writer.WriteBit(false); //Refresh frame context.
- writer.WriteBit(true); //Frame parallel decoding mode.
- }
-
- writer.WriteU(0, 2); //Frame context index.
-
- writer.WriteU(header.LoopFilterLevel, 6);
- writer.WriteU(header.LoopFilterSharpness, 3);
- writer.WriteBit(header.LoopFilterDeltaEnabled);
-
- if (header.LoopFilterDeltaEnabled)
- {
- bool[] updateLoopFilterRefDeltas = new bool[4];
- bool[] updateLoopFilterModeDeltas = new bool[2];
-
- bool loopFilterDeltaUpdate = false;
-
- for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++)
- {
- sbyte old = _loopFilterRefDeltas[index];
- sbyte New = header.LoopFilterRefDeltas[index];
-
- loopFilterDeltaUpdate |= (updateLoopFilterRefDeltas[index] = old != New);
- }
-
- for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++)
- {
- sbyte old = _loopFilterModeDeltas[index];
- sbyte New = header.LoopFilterModeDeltas[index];
-
- loopFilterDeltaUpdate |= (updateLoopFilterModeDeltas[index] = old != New);
- }
-
- writer.WriteBit(loopFilterDeltaUpdate);
-
- if (loopFilterDeltaUpdate)
- {
- for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++)
- {
- writer.WriteBit(updateLoopFilterRefDeltas[index]);
-
- if (updateLoopFilterRefDeltas[index])
- {
- writer.WriteS(header.LoopFilterRefDeltas[index], 6);
- }
- }
-
- for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++)
- {
- writer.WriteBit(updateLoopFilterModeDeltas[index]);
-
- if (updateLoopFilterModeDeltas[index])
- {
- writer.WriteS(header.LoopFilterModeDeltas[index], 6);
- }
- }
- }
- }
-
- writer.WriteU(header.BaseQIndex, 8);
-
- writer.WriteDeltaQ(header.DeltaQYDc);
- writer.WriteDeltaQ(header.DeltaQUvDc);
- writer.WriteDeltaQ(header.DeltaQUvAc);
-
- writer.WriteBit(false); //Segmentation enabled (TODO).
-
- int minTileColsLog2 = CalcMinLog2TileCols(header.CurrentFrame.Width);
- int maxTileColsLog2 = CalcMaxLog2TileCols(header.CurrentFrame.Width);
-
- int tileColsLog2Diff = header.TileColsLog2 - minTileColsLog2;
-
- int tileColsLog2IncMask = (1 << tileColsLog2Diff) - 1;
-
- // If it's less than the maximum, we need to add an extra 0 on the bitstream
- // to indicate that it should stop reading.
- if (header.TileColsLog2 < maxTileColsLog2)
- {
- writer.WriteU(tileColsLog2IncMask << 1, tileColsLog2Diff + 1);
- }
- else
- {
- writer.WriteU(tileColsLog2IncMask, tileColsLog2Diff);
- }
-
- bool tileRowsLog2IsNonZero = header.TileRowsLog2 != 0;
-
- writer.WriteBit(tileRowsLog2IsNonZero);
-
- if (tileRowsLog2IsNonZero)
- {
- writer.WriteBit(header.TileRowsLog2 > 1);
- }
-
- writer.WriteU(compressedHeaderData.Length, 16);
-
- writer.Flush();
-
- encodedHeader.Write(compressedHeaderData, 0, compressedHeaderData.Length);
-
- if (!FFmpegWrapper.IsInitialized)
- {
- FFmpegWrapper.Vp9Initialize();
- }
-
- FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(encodedHeader.ToArray(), frameData));
- }
-
- _loopFilterRefDeltas = header.LoopFilterRefDeltas;
- _loopFilterModeDeltas = header.LoopFilterModeDeltas;
- }
-
- private int GetNewFrameSlot(long key)
- {
- LinkedListNode<int> node = _frameSlotByLastUse.Last;
-
- _frameSlotByLastUse.RemoveLast();
- _frameSlotByLastUse.AddFirst(node);
-
- _cachedRefFrames[key] = node;
-
- return node.Value;
- }
-
- private int GetFrameSlot(long key)
- {
- if (_cachedRefFrames.TryGetValue(key, out LinkedListNode<int> node))
- {
- _frameSlotByLastUse.Remove(node);
- _frameSlotByLastUse.AddFirst(node);
-
- return node.Value;
- }
-
- // Reference frame was lost.
- // What we should do in this case?
- return 0;
- }
-
- private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte[] New, byte[] old)
- {
- for (int offset = 0; offset < New.Length; offset++)
- {
- WriteProbabilityUpdate(writer, New[offset], old[offset]);
- }
- }
-
- private void WriteCoefProbabilityUpdate(VpxRangeEncoder writer, int txMode, byte[] New, byte[] old)
- {
- // Note: There's 1 byte added on each packet for alignment,
- // this byte is ignored when doing updates.
- const int blockBytes = 2 * 2 * 6 * 6 * 4;
-
- bool NeedsUpdate(int baseIndex)
- {
- int index = baseIndex;
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++)
- for (int k = 0; k < 6; k++)
- for (int l = 0; l < 6; l++)
- {
- if (New[index + 0] != old[index + 0] ||
- New[index + 1] != old[index + 1] ||
- New[index + 2] != old[index + 2])
- {
- return true;
- }
-
- index += 4;
- }
-
- return false;
- }
-
- for (int blockIndex = 0; blockIndex < 4; blockIndex++)
- {
- int baseIndex = blockIndex * blockBytes;
-
- bool update = NeedsUpdate(baseIndex);
-
- writer.Write(update);
-
- if (update)
- {
- int index = baseIndex;
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++)
- for (int k = 0; k < 6; k++)
- for (int l = 0; l < 6; l++)
- {
- if (k != 0 || l < 3)
- {
- WriteProbabilityUpdate(writer, New[index + 0], old[index + 0]);
- WriteProbabilityUpdate(writer, New[index + 1], old[index + 1]);
- WriteProbabilityUpdate(writer, New[index + 2], old[index + 2]);
- }
-
- index += 4;
- }
- }
-
- if (blockIndex == txMode)
- {
- break;
- }
- }
- }
-
- private void WriteProbabilityUpdateAligned4(VpxRangeEncoder writer, byte[] New, byte[] old)
- {
- for (int offset = 0; offset < New.Length; offset += 4)
- {
- WriteProbabilityUpdate(writer, New[offset + 0], old[offset + 0]);
- WriteProbabilityUpdate(writer, New[offset + 1], old[offset + 1]);
- WriteProbabilityUpdate(writer, New[offset + 2], old[offset + 2]);
- }
- }
-
- private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old)
- {
- bool update = New != old;
-
- writer.Write(update, DiffUpdateProbability);
-
- if (update)
- {
- WriteProbabilityDelta(writer, New, old);
- }
- }
-
- private void WriteProbabilityDelta(VpxRangeEncoder writer, int New, int old)
- {
- int delta = RemapProbability(New, old);
-
- EncodeTermSubExp(writer, delta);
- }
-
- private int RemapProbability(int New, int old)
- {
- New--;
- old--;
-
- int index;
-
- if (old * 2 <= 0xff)
- {
- index = RecenterNonNeg(New, old) - 1;
- }
- else
- {
- index = RecenterNonNeg(0xff - 1 - New, 0xff - 1 - old) - 1;
- }
-
- return MapLut[index];
- }
-
- private int RecenterNonNeg(int New, int old)
- {
- if (New > old * 2)
- {
- return New;
- }
- else if (New >= old)
- {
- return (New - old) * 2;
- }
- else /* if (New < Old) */
- {
- return (old - New) * 2 - 1;
- }
- }
-
- private void EncodeTermSubExp(VpxRangeEncoder writer, int value)
- {
- if (WriteLessThan(writer, value, 16))
- {
- writer.Write(value, 4);
- }
- else if (WriteLessThan(writer, value, 32))
- {
- writer.Write(value - 16, 4);
- }
- else if (WriteLessThan(writer, value, 64))
- {
- writer.Write(value - 32, 5);
- }
- else
- {
- value -= 64;
-
- const int size = 8;
-
- int mask = (1 << size) - 191;
-
- int delta = value - mask;
-
- if (delta < 0)
- {
- writer.Write(value, size - 1);
- }
- else
- {
- writer.Write(delta / 2 + mask, size - 1);
- writer.Write(delta & 1, 1);
- }
- }
- }
-
- private bool WriteLessThan(VpxRangeEncoder writer, int value, int test)
- {
- bool isLessThan = value < test;
-
- writer.Write(!isLessThan);
-
- return isLessThan;
- }
-
- private void WriteMvProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old)
- {
- bool update = New != old;
-
- writer.Write(update, DiffUpdateProbability);
-
- if (update)
- {
- writer.Write(New >> 1, 7);
- }
- }
-
- private static int CalcMinLog2TileCols(int frameWidth)
- {
- int sb64Cols = (frameWidth + 63) / 64;
- int minLog2 = 0;
-
- while ((64 << minLog2) < sb64Cols)
- {
- minLog2++;
- }
-
- return minLog2;
- }
-
- private static int CalcMaxLog2TileCols(int frameWidth)
- {
- int sb64Cols = (frameWidth + 63) / 64;
- int maxLog2 = 1;
-
- while ((sb64Cols >> maxLog2) >= 4)
- {
- maxLog2++;
- }
-
- return maxLog2 - 1;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs
deleted file mode 100644
index bdba6de5..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-using System.Runtime.InteropServices;
-
-namespace Ryujinx.Graphics.VDec
-{
- [StructLayout(LayoutKind.Sequential, Pack = 2)]
- struct Vp9FrameDimensions
- {
- public short Width;
- public short Height;
- public short SubsamplingX; //?
- public short SubsamplingY; //?
- }
-
- [StructLayout(LayoutKind.Sequential, Pack = 1)]
- struct Vp9FrameHeader
- {
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)]
- public Vp9FrameDimensions[] RefFrames;
-
- public Vp9FrameDimensions CurrentFrame;
-
- public int Flags;
-
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
- public byte[] RefFrameSignBias;
-
- public byte LoopFilterLevel;
- public byte LoopFilterSharpness;
-
- public byte BaseQIndex;
- public sbyte DeltaQYDc;
- public sbyte DeltaQUvDc;
- public sbyte DeltaQUvAc;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool Lossless;
-
- public byte TxMode;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool AllowHighPrecisionMv;
-
- public byte RawInterpolationFilter;
- public byte CompPredMode;
- public byte FixCompRef;
- public byte VarCompRef0;
- public byte VarCompRef1;
-
- public byte TileColsLog2;
- public byte TileRowsLog2;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool SegmentationEnabled;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool SegmentationUpdate;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool SegmentationTemporalUpdate;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool SegmentationAbsOrDeltaUpdate;
-
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4, ArraySubType = UnmanagedType.I1)]
- public bool[] FeatureEnabled;
-
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4)]
- public short[] FeatureData;
-
- [MarshalAs(UnmanagedType.I1)]
- public bool LoopFilterDeltaEnabled;
-
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)]
- public sbyte[] LoopFilterRefDeltas;
-
- [MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)]
- public sbyte[] LoopFilterModeDeltas;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs
deleted file mode 100644
index dfc31ea3..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- struct Vp9FrameKeys
- {
- public long CurrKey;
- public long Ref0Key;
- public long Ref1Key;
- public long Ref2Key;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs
deleted file mode 100644
index 5a6dd0cf..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace Ryujinx.Graphics.VDec
-{
- struct Vp9ProbabilityTables
- {
- public byte[] SegmentationTreeProbs;
- public byte[] SegmentationPredProbs;
- public byte[] Tx8x8Probs;
- public byte[] Tx16x16Probs;
- public byte[] Tx32x32Probs;
- public byte[] CoefProbs;
- public byte[] SkipProbs;
- public byte[] InterModeProbs;
- public byte[] InterpFilterProbs;
- public byte[] IsInterProbs;
- public byte[] CompModeProbs;
- public byte[] SingleRefProbs;
- public byte[] CompRefProbs;
- public byte[] YModeProbs0;
- public byte[] YModeProbs1;
- public byte[] PartitionProbs;
- public byte[] MvJointProbs;
- public byte[] MvSignProbs;
- public byte[] MvClassProbs;
- public byte[] MvClass0BitProbs;
- public byte[] MvBitsProbs;
- public byte[] MvClass0FrProbs;
- public byte[] MvFrProbs;
- public byte[] MvClass0HpProbs;
- public byte[] MvHpProbs;
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs
deleted file mode 100644
index 97ada333..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class VpxBitStreamWriter : BitStreamWriter
- {
- public VpxBitStreamWriter(Stream baseStream) : base(baseStream) { }
-
- public void WriteU(int value, int valueSize)
- {
- WriteBits(value, valueSize);
- }
-
- public void WriteS(int value, int valueSize)
- {
- bool sign = value < 0;
-
- if (sign)
- {
- value = -value;
- }
-
- WriteBits((value << 1) | (sign ? 1 : 0), valueSize + 1);
- }
-
- public void WriteDeltaQ(int value)
- {
- bool deltaCoded = value != 0;
-
- WriteBit(deltaCoded);
-
- if (deltaCoded)
- {
- WriteBits(value, 4);
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs
deleted file mode 100644
index c854c9d9..00000000
--- a/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs
+++ /dev/null
@@ -1,134 +0,0 @@
-using System.IO;
-
-namespace Ryujinx.Graphics.VDec
-{
- class VpxRangeEncoder
- {
- private const int HalfProbability = 128;
-
- private static readonly int[] NormLut = new int[]
- {
- 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- };
-
- private Stream _baseStream;
-
- private uint _lowValue;
- private uint _range;
- private int _count;
-
- public VpxRangeEncoder(Stream baseStream)
- {
- _baseStream = baseStream;
-
- _range = 0xff;
- _count = -24;
-
- Write(false);
- }
-
- public void WriteByte(byte value)
- {
- Write(value, 8);
- }
-
- public void Write(int value, int valueSize)
- {
- for (int bit = valueSize - 1; bit >= 0; bit--)
- {
- Write(((value >> bit) & 1) != 0);
- }
- }
-
- public void Write(bool bit)
- {
- Write(bit, HalfProbability);
- }
-
- public void Write(bool bit, int probability)
- {
- uint range = _range;
-
- uint split = 1 + (((range - 1) * (uint)probability) >> 8);
-
- range = split;
-
- if (bit)
- {
- _lowValue += split;
- range = _range - split;
- }
-
- int shift = NormLut[range];
-
- range <<= shift;
- _count += shift;
-
- if (_count >= 0)
- {
- int offset = shift - _count;
-
- if (((_lowValue << (offset - 1)) >> 31) != 0)
- {
- long currentPos = _baseStream.Position;
-
- _baseStream.Seek(-1, SeekOrigin.Current);
-
- while (_baseStream.Position >= 0 && PeekByte() == 0xff)
- {
- _baseStream.WriteByte(0);
-
- _baseStream.Seek(-2, SeekOrigin.Current);
- }
-
- _baseStream.WriteByte((byte)(PeekByte() + 1));
-
- _baseStream.Seek(currentPos, SeekOrigin.Begin);
- }
-
- _baseStream.WriteByte((byte)(_lowValue >> (24 - offset)));
-
- _lowValue <<= offset;
- shift = _count;
- _lowValue &= 0xffffff;
- _count -= 8;
- }
-
- _lowValue <<= shift;
-
- _range = range;
- }
-
- private byte PeekByte()
- {
- byte value = (byte)_baseStream.ReadByte();
-
- _baseStream.Seek(-1, SeekOrigin.Current);
-
- return value;
- }
-
- public void End()
- {
- for (int index = 0; index < 32; index++)
- {
- Write(false);
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs b/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs
deleted file mode 100644
index 4957e6b6..00000000
--- a/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs
+++ /dev/null
@@ -1,69 +0,0 @@
-using Ryujinx.Graphics.Gpu.Memory;
-using System;
-
-namespace Ryujinx.Graphics.Vic
-{
- class StructUnpacker
- {
- private MemoryAccessor _vmm;
-
- private ulong _position;
-
- private ulong _buffer;
- private int _buffPos;
-
- public StructUnpacker(MemoryAccessor vmm, ulong position)
- {
- _vmm = vmm;
- _position = position;
-
- _buffPos = 64;
- }
-
- public int Read(int bits)
- {
- if ((uint)bits > 32)
- {
- throw new ArgumentOutOfRangeException(nameof(bits));
- }
-
- int value = 0;
-
- while (bits > 0)
- {
- RefillBufferIfNeeded();
-
- int readBits = bits;
-
- int maxReadBits = 64 - _buffPos;
-
- if (readBits > maxReadBits)
- {
- readBits = maxReadBits;
- }
-
- value <<= readBits;
-
- value |= (int)(_buffer >> _buffPos) & (int)(0xffffffff >> (32 - readBits));
-
- _buffPos += readBits;
-
- bits -= readBits;
- }
-
- return value;
- }
-
- private void RefillBufferIfNeeded()
- {
- if (_buffPos >= 64)
- {
- _buffer = _vmm.ReadUInt64(_position);
-
- _position += 8;
-
- _buffPos = 0;
- }
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs
deleted file mode 100644
index bcb01e70..00000000
--- a/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-namespace Ryujinx.Graphics.Vic
-{
- struct SurfaceOutputConfig
- {
- public SurfacePixelFormat PixelFormat;
-
- public int SurfaceWidth;
- public int SurfaceHeight;
- public int GobBlockHeight;
-
- public ulong SurfaceLumaAddress;
- public ulong SurfaceChromaUAddress;
- public ulong SurfaceChromaVAddress;
-
- public SurfaceOutputConfig(
- SurfacePixelFormat pixelFormat,
- int surfaceWidth,
- int surfaceHeight,
- int gobBlockHeight,
- ulong outputSurfaceLumaAddress,
- ulong outputSurfaceChromaUAddress,
- ulong outputSurfaceChromaVAddress)
- {
- PixelFormat = pixelFormat;
- SurfaceWidth = surfaceWidth;
- SurfaceHeight = surfaceHeight;
- GobBlockHeight = gobBlockHeight;
- SurfaceLumaAddress = outputSurfaceLumaAddress;
- SurfaceChromaUAddress = outputSurfaceChromaUAddress;
- SurfaceChromaVAddress = outputSurfaceChromaVAddress;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs
deleted file mode 100644
index 8dabd094..00000000
--- a/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace Ryujinx.Graphics.Vic
-{
- enum SurfacePixelFormat
- {
- Rgba8 = 0x1f,
- Yuv420P = 0x44
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs
deleted file mode 100644
index 39e18fa6..00000000
--- a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-using Ryujinx.Graphics.Gpu;
-using Ryujinx.Graphics.VDec;
-
-namespace Ryujinx.Graphics.Vic
-{
- class VideoImageComposer
- {
- private ulong _configStructAddress;
- private ulong _outputSurfaceLumaAddress;
- private ulong _outputSurfaceChromaUAddress;
- private ulong _outputSurfaceChromaVAddress;
-
- private VideoDecoder _vdec;
-
- public VideoImageComposer(VideoDecoder vdec)
- {
- _vdec = vdec;
- }
-
- public void Process(GpuContext gpu, int methodOffset, int[] arguments)
- {
- VideoImageComposerMeth method = (VideoImageComposerMeth)methodOffset;
-
- switch (method)
- {
- case VideoImageComposerMeth.Execute: Execute(gpu); break;
- case VideoImageComposerMeth.SetConfigStructOffset: SetConfigStructOffset(arguments); break;
- case VideoImageComposerMeth.SetOutputSurfaceLumaOffset: SetOutputSurfaceLumaOffset(arguments); break;
- case VideoImageComposerMeth.SetOutputSurfaceChromaUOffset: SetOutputSurfaceChromaUOffset(arguments); break;
- case VideoImageComposerMeth.SetOutputSurfaceChromaVOffset: SetOutputSurfaceChromaVOffset(arguments); break;
- }
- }
-
- private void Execute(GpuContext gpu)
- {
- StructUnpacker unpacker = new StructUnpacker(gpu.MemoryAccessor, _configStructAddress + 0x20);
-
- SurfacePixelFormat pixelFormat = (SurfacePixelFormat)unpacker.Read(7);
-
- int chromaLocHoriz = unpacker.Read(2);
- int chromaLocVert = unpacker.Read(2);
-
- int blockLinearKind = unpacker.Read(4);
- int blockLinearHeightLog2 = unpacker.Read(4);
-
- int reserved0 = unpacker.Read(3);
- int reserved1 = unpacker.Read(10);
-
- int surfaceWidthMinus1 = unpacker.Read(14);
- int surfaceHeightMinus1 = unpacker.Read(14);
-
- int gobBlockHeight = 1 << blockLinearHeightLog2;
-
- int surfaceWidth = surfaceWidthMinus1 + 1;
- int surfaceHeight = surfaceHeightMinus1 + 1;
-
- SurfaceOutputConfig outputConfig = new SurfaceOutputConfig(
- pixelFormat,
- surfaceWidth,
- surfaceHeight,
- gobBlockHeight,
- _outputSurfaceLumaAddress,
- _outputSurfaceChromaUAddress,
- _outputSurfaceChromaVAddress);
-
- _vdec.CopyPlanes(gpu, outputConfig);
- }
-
- private void SetConfigStructOffset(int[] arguments)
- {
- _configStructAddress = GetAddress(arguments);
- }
-
- private void SetOutputSurfaceLumaOffset(int[] arguments)
- {
- _outputSurfaceLumaAddress = GetAddress(arguments);
- }
-
- private void SetOutputSurfaceChromaUOffset(int[] arguments)
- {
- _outputSurfaceChromaUAddress = GetAddress(arguments);
- }
-
- private void SetOutputSurfaceChromaVOffset(int[] arguments)
- {
- _outputSurfaceChromaVAddress = GetAddress(arguments);
- }
-
- private static ulong GetAddress(int[] arguments)
- {
- return (ulong)(uint)arguments[0] << 8;
- }
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs
deleted file mode 100644
index b30cabea..00000000
--- a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace Ryujinx.Graphics.Vic
-{
- enum VideoImageComposerMeth
- {
- Execute = 0xc0,
- SetControlParams = 0x1c1,
- SetConfigStructOffset = 0x1c2,
- SetOutputSurfaceLumaOffset = 0x1c8,
- SetOutputSurfaceChromaUOffset = 0x1c9,
- SetOutputSurfaceChromaVOffset = 0x1ca
- }
-} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs
new file mode 100644
index 00000000..f05555c6
--- /dev/null
+++ b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs
@@ -0,0 +1,92 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+using Ryujinx.Graphics.Nvdec.Types.Vp9;
+using Ryujinx.Graphics.Nvdec.Vp9;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using static Ryujinx.Graphics.Nvdec.MemoryExtensions;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class Vp9Decoder
+ {
+ private static Decoder _decoder = new Decoder();
+
+ public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state)
+ {
+ PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetPictureInfoOffset);
+ EntropyProbs entropy = rm.Gmm.DeviceRead<EntropyProbs>(state.SetVp9EntropyProbsOffset);
+
+ ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size)
+ {
+ return rm.Cache.Get(_decoder, CodecId.Vp9, lumaOffset, chromaOffset, size.Width, size.Height);
+ }
+
+ ISurface lastSurface = Rent(state.SetSurfaceLumaOffset[0], state.SetSurfaceChromaOffset[0], pictureInfo.LastFrameSize);
+ ISurface goldenSurface = Rent(state.SetSurfaceLumaOffset[1], state.SetSurfaceChromaOffset[1], pictureInfo.GoldenFrameSize);
+ ISurface altSurface = Rent(state.SetSurfaceLumaOffset[2], state.SetSurfaceChromaOffset[2], pictureInfo.AltFrameSize);
+ ISurface currentSurface = Rent(state.SetSurfaceLumaOffset[3], state.SetSurfaceChromaOffset[3], pictureInfo.CurrentFrameSize);
+
+ Vp9PictureInfo info = pictureInfo.Convert();
+
+ info.LastReference = lastSurface;
+ info.GoldenReference = goldenSurface;
+ info.AltReference = altSurface;
+
+ entropy.Convert(ref info.Entropy);
+
+ ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetBitstreamOffset, (int)pictureInfo.BitstreamSize);
+
+ ReadOnlySpan<Vp9MvRef> mvsIn = ReadOnlySpan<Vp9MvRef>.Empty;
+
+ if (info.UsePrevInFindMvRefs)
+ {
+ mvsIn = GetMvsInput(rm.Gmm, pictureInfo.CurrentFrameSize, state.SetVp9LastFrameMvsOffset);
+ }
+
+ int miCols = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Width, 8);
+ int miRows = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Height, 8);
+
+ using var mvsRegion = rm.Gmm.GetWritableRegion(ExtendOffset(state.SetVp9CurrFrameMvsOffset), miRows * miCols * 16);
+
+ Span<Vp9MvRef> mvsOut = MemoryMarshal.Cast<byte, Vp9MvRef>(mvsRegion.Memory.Span);
+
+ uint lumaOffset = state.SetSurfaceLumaOffset[3];
+ uint chromaOffset = state.SetSurfaceChromaOffset[3];
+
+ if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut))
+ {
+ SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset);
+
+ device.OnFrameDecoded(CodecId.Vp9, lumaOffset, chromaOffset);
+ }
+
+ WriteBackwardUpdates(rm.Gmm, state.SetVp9BackwardUpdatesOffset, ref info.BackwardUpdateCounts);
+
+ rm.Cache.Put(lastSurface);
+ rm.Cache.Put(goldenSurface);
+ rm.Cache.Put(altSurface);
+ rm.Cache.Put(currentSurface);
+ }
+
+ private static ReadOnlySpan<Vp9MvRef> GetMvsInput(MemoryManager gmm, FrameSize size, uint offset)
+ {
+ int miCols = BitUtils.DivRoundUp(size.Width, 8);
+ int miRows = BitUtils.DivRoundUp(size.Height, 8);
+
+ return MemoryMarshal.Cast<byte, Vp9MvRef>(gmm.DeviceGetSpan(offset, miRows * miCols * 16));
+ }
+
+ private static void WriteBackwardUpdates(MemoryManager gmm, uint offset, ref Vp9BackwardUpdates counts)
+ {
+ using var backwardUpdatesRegion = gmm.GetWritableRegion(ExtendOffset(offset), Unsafe.SizeOf<BackwardUpdates>());
+
+ ref var backwardUpdates = ref MemoryMarshal.Cast<byte, BackwardUpdates>(backwardUpdatesRegion.Memory.Span)[0];
+
+ backwardUpdates = new BackwardUpdates(ref counts);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs
index 2ad57d59..ed046fb5 100644
--- a/Ryujinx.Graphics.Texture/LayoutConverter.cs
+++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs
@@ -9,6 +9,90 @@ namespace Ryujinx.Graphics.Texture
{
private const int HostStrideAlignment = 4;
+ public static void ConvertBlockLinearToLinear(
+ Span<byte> dst,
+ int width,
+ int height,
+ int stride,
+ int bytesPerPixel,
+ int gobBlocksInY,
+ ReadOnlySpan<byte> data)
+ {
+ int gobHeight = gobBlocksInY * GobHeight;
+
+ int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16);
+ int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64);
+
+ int xStart = strideTrunc / bytesPerPixel;
+
+ int outStrideGap = stride - width * bytesPerPixel;
+
+ int alignment = GobStride / bytesPerPixel;
+
+ int wAligned = BitUtils.AlignUp(width, alignment);
+
+ BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel);
+
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
+ {
+ fixed (byte* outputPtr = output, dataPtr = data)
+ {
+ byte* outPtr = outputPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ layoutConverter.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
+ {
+ byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)offset;
+ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
+ Vector128<byte> value3 = *(Vector128<byte>*)offset3;
+ Vector128<byte> value4 = *(Vector128<byte>*)offset4;
+
+ *(Vector128<byte>*)outPtr = value;
+ *(Vector128<byte>*)(outPtr + 16) = value2;
+ *(Vector128<byte>*)(outPtr + 32) = value3;
+ *(Vector128<byte>*)(outPtr + 48) = value4;
+ }
+
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
+ {
+ byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset16(x);
+
+ *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
+ }
+
+ for (int x = xStart; x < width; x++, outPtr += bytesPerPixel)
+ {
+ byte* offset = dataPtr + layoutConverter.GetOffset(x);
+
+ *(T*)outPtr = *(T*)offset;
+ }
+
+ outPtr += outStrideGap;
+ }
+ }
+ return true;
+ }
+
+ bool _ = bytesPerPixel switch
+ {
+ 1 => Convert<byte>(dst, data),
+ 2 => Convert<ushort>(dst, data),
+ 4 => Convert<uint>(dst, data),
+ 8 => Convert<ulong>(dst, data),
+ 12 => Convert<Bpp12Pixel>(dst, data),
+ 16 => Convert<Vector128<byte>>(dst, data),
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
+ };
+ }
+
public static Span<byte> ConvertBlockLinearToLinear(
int width,
int height,
@@ -190,6 +274,90 @@ namespace Ryujinx.Graphics.Texture
return output;
}
+ public static void ConvertLinearToBlockLinear(
+ Span<byte> dst,
+ int width,
+ int height,
+ int stride,
+ int bytesPerPixel,
+ int gobBlocksInY,
+ ReadOnlySpan<byte> data)
+ {
+ int gobHeight = gobBlocksInY * GobHeight;
+
+ int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16);
+ int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64);
+
+ int xStart = strideTrunc / bytesPerPixel;
+
+ int inStrideGap = stride - width * bytesPerPixel;
+
+ int alignment = GobStride / bytesPerPixel;
+
+ int wAligned = BitUtils.AlignUp(width, alignment);
+
+ BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel);
+
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
+ {
+ fixed (byte* outputPtr = output, dataPtr = data)
+ {
+ byte* inPtr = dataPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ layoutConverter.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
+ {
+ byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)inPtr;
+ Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
+ Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
+ Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
+
+ *(Vector128<byte>*)offset = value;
+ *(Vector128<byte>*)offset2 = value2;
+ *(Vector128<byte>*)offset3 = value3;
+ *(Vector128<byte>*)offset4 = value4;
+ }
+
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
+ {
+ byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x);
+
+ *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
+ }
+
+ for (int x = xStart; x < width; x++, inPtr += bytesPerPixel)
+ {
+ byte* offset = outputPtr + layoutConverter.GetOffset(x);
+
+ *(T*)offset = *(T*)inPtr;
+ }
+
+ inPtr += inStrideGap;
+ }
+ }
+ return true;
+ }
+
+ bool _ = bytesPerPixel switch
+ {
+ 1 => Convert<byte>(dst, data),
+ 2 => Convert<ushort>(dst, data),
+ 4 => Convert<uint>(dst, data),
+ 8 => Convert<ulong>(dst, data),
+ 12 => Convert<Bpp12Pixel>(dst, data),
+ 16 => Convert<Vector128<byte>>(dst, data),
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
+ };
+ }
+
public static Span<byte> ConvertLinearToBlockLinear(
int width,
int height,
diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs
index 6d283954..dd4b6e7f 100644
--- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs
+++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs
@@ -94,6 +94,19 @@ namespace Ryujinx.Graphics.Texture
}
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public int GetOffsetWithLineOffset64(int x)
+ {
+ if (_isLinear)
+ {
+ return x + _yPart;
+ }
+ else
+ {
+ return _layoutConverter.GetOffsetWithLineOffset64(x);
+ }
+ }
+
public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
{
if (_isLinear)
diff --git a/Ryujinx.Graphics.Vic/Blender.cs b/Ryujinx.Graphics.Vic/Blender.cs
new file mode 100644
index 00000000..f00b9093
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Blender.cs
@@ -0,0 +1,157 @@
+using Ryujinx.Graphics.Vic.Image;
+using Ryujinx.Graphics.Vic.Types;
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace Ryujinx.Graphics.Vic
+{
+ static class Blender
+ {
+ public static void BlendOne(Surface dst, Surface src, ref SlotStruct slot)
+ {
+ if (Sse41.IsSupported && (dst.Width & 3) == 0)
+ {
+ BlendOneSse41(dst, src, ref slot);
+ return;
+ }
+
+ for (int y = 0; y < dst.Height; y++)
+ {
+ for (int x = 0; x < dst.Width; x++)
+ {
+ int inR = src.GetR(x, y);
+ int inG = src.GetG(x, y);
+ int inB = src.GetB(x, y);
+
+ MatrixMultiply(ref slot.ColorMatrixStruct, inR, inG, inB, out int r, out int g, out int b);
+
+ r = Math.Clamp(r, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh);
+ g = Math.Clamp(g, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh);
+ b = Math.Clamp(b, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh);
+
+ dst.SetR(x, y, (ushort)r);
+ dst.SetG(x, y, (ushort)g);
+ dst.SetB(x, y, (ushort)b);
+ dst.SetA(x, y, src.GetA(x, y));
+ }
+ }
+ }
+
+ private unsafe static void BlendOneSse41(Surface dst, Surface src, ref SlotStruct slot)
+ {
+ Debug.Assert((dst.Width & 3) == 0);
+
+ ref MatrixStruct mtx = ref slot.ColorMatrixStruct;
+
+ int one = 1 << (mtx.MatrixRShift + 8);
+
+ Vector128<int> col1 = Vector128.Create(mtx.MatrixCoeff00, mtx.MatrixCoeff10, mtx.MatrixCoeff20, 0);
+ Vector128<int> col2 = Vector128.Create(mtx.MatrixCoeff01, mtx.MatrixCoeff11, mtx.MatrixCoeff21, 0);
+ Vector128<int> col3 = Vector128.Create(mtx.MatrixCoeff02, mtx.MatrixCoeff12, mtx.MatrixCoeff22, one);
+ Vector128<int> col4 = Vector128.Create(mtx.MatrixCoeff03, mtx.MatrixCoeff13, mtx.MatrixCoeff23, 0);
+ Vector128<int> rShift = Vector128.CreateScalar(mtx.MatrixRShift);
+ Vector128<ushort> clMin = Vector128.Create((ushort)slot.SlotConfig.SoftClampLow);
+ Vector128<ushort> clMax = Vector128.Create((ushort)slot.SlotConfig.SoftClampHigh);
+
+ fixed (Pixel* srcPtr = src.Data, dstPtr = dst.Data)
+ {
+ Pixel* ip = srcPtr;
+ Pixel* op = dstPtr;
+
+ for (int y = 0; y < dst.Height; y++, ip += src.Width, op += dst.Width)
+ {
+ for (int x = 0; x < dst.Width; x += 4)
+ {
+ Vector128<int> pixel1 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x));
+ Vector128<int> pixel2 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 1));
+ Vector128<int> pixel3 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 2));
+ Vector128<int> pixel4 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 3));
+
+ Vector128<ushort> pixel12, pixel34;
+
+ if (mtx.MatrixEnable)
+ {
+ pixel12 = Sse41.PackUnsignedSaturate(
+ MatrixMultiplySse41(pixel1, col1, col2, col3, col4, rShift),
+ MatrixMultiplySse41(pixel2, col1, col2, col3, col4, rShift));
+ pixel34 = Sse41.PackUnsignedSaturate(
+ MatrixMultiplySse41(pixel3, col1, col2, col3, col4, rShift),
+ MatrixMultiplySse41(pixel4, col1, col2, col3, col4, rShift));
+ }
+ else
+ {
+ pixel12 = Sse41.PackUnsignedSaturate(pixel1, pixel2);
+ pixel34 = Sse41.PackUnsignedSaturate(pixel3, pixel4);
+ }
+
+ pixel12 = Sse41.Min(pixel12, clMax);
+ pixel34 = Sse41.Min(pixel34, clMax);
+ pixel12 = Sse41.Max(pixel12, clMin);
+ pixel34 = Sse41.Max(pixel34, clMin);
+
+ Sse2.Store((ushort*)(op + (uint)x + 0), pixel12);
+ Sse2.Store((ushort*)(op + (uint)x + 2), pixel34);
+ }
+ }
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void MatrixMultiply(ref MatrixStruct mtx, int x, int y, int z, out int r, out int g, out int b)
+ {
+ if (mtx.MatrixEnable)
+ {
+ r = x * mtx.MatrixCoeff00 + y * mtx.MatrixCoeff01 + z * mtx.MatrixCoeff02;
+ g = x * mtx.MatrixCoeff10 + y * mtx.MatrixCoeff11 + z * mtx.MatrixCoeff12;
+ b = x * mtx.MatrixCoeff20 + y * mtx.MatrixCoeff21 + z * mtx.MatrixCoeff22;
+
+ r >>= mtx.MatrixRShift;
+ g >>= mtx.MatrixRShift;
+ b >>= mtx.MatrixRShift;
+
+ r += mtx.MatrixCoeff03;
+ g += mtx.MatrixCoeff13;
+ b += mtx.MatrixCoeff23;
+
+ r >>= 8;
+ g >>= 8;
+ b >>= 8;
+ }
+ else
+ {
+ r = x;
+ g = y;
+ b = z;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<int> MatrixMultiplySse41(
+ Vector128<int> pixel,
+ Vector128<int> col1,
+ Vector128<int> col2,
+ Vector128<int> col3,
+ Vector128<int> col4,
+ Vector128<int> rShift)
+ {
+ Vector128<int> x = Sse2.Shuffle(pixel, 0);
+ Vector128<int> y = Sse2.Shuffle(pixel, 0x55);
+ Vector128<int> z = Sse2.Shuffle(pixel, 0xea);
+
+ col1 = Sse41.MultiplyLow(col1, x);
+ col2 = Sse41.MultiplyLow(col2, y);
+ col3 = Sse41.MultiplyLow(col3, z);
+
+ Vector128<int> res = Sse2.Add(col3, Sse2.Add(col1, col2));
+
+ res = Sse2.ShiftRightArithmetic(res, rShift);
+ res = Sse2.Add(res, col4);
+ res = Sse2.ShiftRightArithmetic(res, 8);
+
+ return res;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/BufferPool.cs b/Ryujinx.Graphics.Vic/Image/BufferPool.cs
new file mode 100644
index 00000000..932d3dc9
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/BufferPool.cs
@@ -0,0 +1,103 @@
+using System;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ class BufferPool<T>
+ {
+ /// <summary>
+ /// Maximum number of buffers on the pool.
+ /// </summary>
+ private const int MaxBuffers = 4;
+
+ /// <summary>
+ /// Maximum size of a buffer that can be added on the pool.
+ /// If the required buffer is larger than this, it won't be
+ /// added to the pool to avoid long term high memory usage.
+ /// </summary>
+ private const int MaxBufferSize = 2048 * 1280;
+
+ private struct PoolItem
+ {
+ public bool InUse;
+ public T[] Buffer;
+ }
+
+ private readonly PoolItem[] _pool = new PoolItem[MaxBuffers];
+
+ /// <summary>
+ /// Rents a buffer with the exact size requested.
+ /// </summary>
+ /// <param name="length">Size of the buffer</param>
+ /// <param name="buffer">Span of the requested size</param>
+ /// <returns>The index of the buffer on the pool</returns>
+ public int Rent(int length, out Span<T> buffer)
+ {
+ int index = RentMinimum(length, out T[] bufferArray);
+
+ buffer = new Span<T>(bufferArray).Slice(0, length);
+
+ return index;
+ }
+
+ /// <summary>
+ /// Rents a buffer with a size greater than or equal to the requested size.
+ /// </summary>
+ /// <param name="length">Size of the buffer</param>
+ /// <param name="buffer">Array with a length greater than or equal to the requested length</param>
+ /// <returns>The index of the buffer on the pool</returns>
+ public int RentMinimum(int length, out T[] buffer)
+ {
+ if ((uint)length > MaxBufferSize)
+ {
+ buffer = new T[length];
+ return -1;
+ }
+
+ // Try to find a buffer that is larger or the same size of the requested one.
+ // This will avoid an allocation.
+ for (int i = 0; i < MaxBuffers; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (!item.InUse && item.Buffer != null && item.Buffer.Length >= length)
+ {
+ buffer = item.Buffer;
+ item.InUse = true;
+ return i;
+ }
+ }
+
+ buffer = new T[length];
+
+ // Try to add the new buffer to the pool.
+ // We try to find a slot that is not in use, and replace the buffer in it.
+ for (int i = 0; i < MaxBuffers; i++)
+ {
+ ref PoolItem item = ref _pool[i];
+
+ if (!item.InUse)
+ {
+ item.Buffer = buffer;
+ item.InUse = true;
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Returns a buffer returned from <see cref="Rent(int)"/> to the pool.
+ /// </summary>
+ /// <param name="index">Index of the buffer on the pool</param>
+ public void Return(int index)
+ {
+ if (index < 0)
+ {
+ return;
+ }
+
+ _pool[index].InUse = false;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/InputSurface.cs b/Ryujinx.Graphics.Vic/Image/InputSurface.cs
new file mode 100644
index 00000000..de003194
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/InputSurface.cs
@@ -0,0 +1,17 @@
+using System;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ ref struct InputSurface
+ {
+ public ReadOnlySpan<byte> Buffer0;
+ public ReadOnlySpan<byte> Buffer1;
+ public ReadOnlySpan<byte> Buffer2;
+
+ public int Width;
+ public int Height;
+
+ public int UvWidth;
+ public int UvHeight;
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/Pixel.cs b/Ryujinx.Graphics.Vic/Image/Pixel.cs
new file mode 100644
index 00000000..35f25d16
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/Pixel.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Vic.Image
+{
+ struct Pixel
+ {
+ public ushort R;
+ public ushort G;
+ public ushort B;
+ public ushort A;
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/Surface.cs b/Ryujinx.Graphics.Vic/Image/Surface.cs
new file mode 100644
index 00000000..03767f8a
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/Surface.cs
@@ -0,0 +1,46 @@
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ struct Surface : IDisposable
+ {
+ private readonly int _bufferIndex;
+
+ private readonly BufferPool<Pixel> _pool;
+
+ public Pixel[] Data { get; }
+
+ public int Width { get; }
+ public int Height { get; }
+
+ public Surface(BufferPool<Pixel> pool, int width, int height)
+ {
+ _bufferIndex = pool.RentMinimum(width * height, out Pixel[] data);
+ _pool = pool;
+ Data = data;
+ Width = width;
+ Height = height;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ushort GetR(int x, int y) => Data[y * Width + x].R;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ushort GetG(int x, int y) => Data[y * Width + x].G;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ushort GetB(int x, int y) => Data[y * Width + x].B;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public ushort GetA(int x, int y) => Data[y * Width + x].A;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetR(int x, int y, ushort value) => Data[y * Width + x].R = value;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetG(int x, int y, ushort value) => Data[y * Width + x].G = value;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetB(int x, int y, ushort value) => Data[y * Width + x].B = value;
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void SetA(int x, int y, ushort value) => Data[y * Width + x].A = value;
+
+ public void Dispose() => _pool.Return(_bufferIndex);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs b/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs
new file mode 100644
index 00000000..10cdefe2
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs
@@ -0,0 +1,33 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Texture;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ static class SurfaceCommon
+ {
+ public static int GetPitch(int width, int bytesPerPixel)
+ {
+ return BitUtils.AlignUp(width * bytesPerPixel, 256);
+ }
+
+ public static int GetBlockLinearSize(int width, int height, int bytesPerPixel, int gobBlocksInY)
+ {
+ return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, gobBlocksInY, 1, 1).TotalSize;
+ }
+
+ public static ulong ExtendOffset(uint offset)
+ {
+ return (ulong)offset << 8;
+ }
+
+ public static ushort Upsample(byte value)
+ {
+ return (ushort)(value << 2);
+ }
+
+ public static byte Downsample(ushort value)
+ {
+ return (byte)(value >> 2);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs
new file mode 100644
index 00000000..ab591cd0
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs
@@ -0,0 +1,253 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Vic.Types;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ static class SurfaceReader
+ {
+ public static Surface Read(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets)
+ {
+ switch (config.SlotPixelFormat)
+ {
+ case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref offsets);
+ }
+
+ Logger.PrintError(LogClass.Vic, $"Unsupported pixel format \"{config.SlotPixelFormat}\".");
+
+ int lw = config.SlotLumaWidth + 1;
+ int lh = config.SlotLumaHeight + 1;
+
+ return new Surface(rm.SurfacePool, lw, lh);
+ }
+
+ private unsafe static Surface ReadNv12(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets)
+ {
+ InputSurface input = ReadSurface(rm.Gmm, ref config, ref offsets, 1, 2);
+
+ int width = input.Width;
+ int height = input.Height;
+
+ int yStride = GetPitch(width, 1);
+ int uvStride = GetPitch(input.UvWidth, 2);
+
+ Surface output = new Surface(rm.SurfacePool, width, height);
+
+ if (Sse41.IsSupported)
+ {
+ Vector128<byte> shufMask = Vector128.Create(
+ (byte)0, (byte)2, (byte)3, (byte)1,
+ (byte)4, (byte)6, (byte)7, (byte)5,
+ (byte)8, (byte)10, (byte)11, (byte)9,
+ (byte)12, (byte)14, (byte)15, (byte)13);
+ Vector128<short> alphaMask = Vector128.Create(0xffUL << 48).AsInt16();
+
+ int yStrideGap = yStride - width;
+ int uvStrideGap = uvStride - input.UvWidth;
+
+ int widthTrunc = width & ~0xf;
+
+ fixed (Pixel* dstPtr = output.Data)
+ {
+ Pixel* op = dstPtr;
+
+ fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
+ {
+ byte* i0p = src0Ptr;
+
+ for (int y = 0; y < height; y++)
+ {
+ byte* i1p = src1Ptr + (y >> 1) * uvStride;
+
+ int x = 0;
+
+ for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
+ {
+ Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p);
+ Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8);
+
+ Vector128<byte> uv = Sse2.LoadVector128(i1p);
+
+ Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16());
+ Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16());
+
+ Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0);
+ Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0);
+ Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1);
+ Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1);
+
+ rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16();
+ rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16();
+ rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16();
+ rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16();
+
+ Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte());
+ Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte()));
+ Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte());
+ Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte()));
+ Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte());
+ Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte()));
+ Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte());
+ Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte()));
+
+ rgba16_0 = Sse2.Or(rgba16_0, alphaMask);
+ rgba16_1 = Sse2.Or(rgba16_1, alphaMask);
+ rgba16_2 = Sse2.Or(rgba16_2, alphaMask);
+ rgba16_3 = Sse2.Or(rgba16_3, alphaMask);
+ rgba16_4 = Sse2.Or(rgba16_4, alphaMask);
+ rgba16_5 = Sse2.Or(rgba16_5, alphaMask);
+ rgba16_6 = Sse2.Or(rgba16_6, alphaMask);
+ rgba16_7 = Sse2.Or(rgba16_7, alphaMask);
+
+ rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2);
+ rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2);
+ rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2);
+ rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2);
+ rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2);
+ rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2);
+ rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2);
+ rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2);
+
+ Sse2.Store((short*)(op + (uint)x + 0), rgba16_0);
+ Sse2.Store((short*)(op + (uint)x + 2), rgba16_1);
+ Sse2.Store((short*)(op + (uint)x + 4), rgba16_2);
+ Sse2.Store((short*)(op + (uint)x + 6), rgba16_3);
+ Sse2.Store((short*)(op + (uint)x + 8), rgba16_4);
+ Sse2.Store((short*)(op + (uint)x + 10), rgba16_5);
+ Sse2.Store((short*)(op + (uint)x + 12), rgba16_6);
+ Sse2.Store((short*)(op + (uint)x + 14), rgba16_7);
+ }
+
+ for (; x < width; x++, i1p += (x & 1) * 2)
+ {
+ Pixel* px = op + (uint)x;
+
+ px->R = Upsample(*i0p++);
+ px->G = Upsample(*i1p);
+ px->B = Upsample(*(i1p + 1));
+ px->A = 0x3ff;
+ }
+
+ op += width;
+ i0p += yStrideGap;
+ i1p += uvStrideGap;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int uvBase = (y >> 1) * uvStride;
+
+ for (int x = 0; x < width; x++)
+ {
+ output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x]));
+
+ int uvOffs = uvBase + (x & ~1);
+
+ output.SetG(x, y, Upsample(input.Buffer1[uvOffs]));
+ output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1]));
+ output.SetA(x, y, 0x3ff);
+ }
+ }
+ }
+
+ return output;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<byte> HighToLow(Vector128<byte> value)
+ {
+ return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte();
+ }
+
+ private static InputSurface ReadSurface(
+ MemoryManager gmm,
+ ref SlotSurfaceConfig config,
+ ref PlaneOffsets offsets,
+ int bytesPerPixel,
+ int planes)
+ {
+ InputSurface surface = new InputSurface();
+
+ int gobBlocksInY = 1 << config.SlotBlkHeight;
+
+ bool linear = config.SlotBlkKind == 0;
+
+ int lw = config.SlotLumaWidth + 1;
+ int lh = config.SlotLumaHeight + 1;
+
+ int cw = config.SlotChromaWidth + 1;
+ int ch = config.SlotChromaHeight + 1;
+
+ surface.Width = lw;
+ surface.Height = lh;
+ surface.UvWidth = cw;
+ surface.UvHeight = ch;
+
+ if (planes > 0)
+ {
+ surface.Buffer0 = ReadBuffer(gmm, offsets.LumaOffset, linear, lw, lh, bytesPerPixel, gobBlocksInY);
+ }
+
+ if (planes > 1)
+ {
+ surface.Buffer1 = ReadBuffer(gmm, offsets.ChromaUOffset, linear, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY);
+ }
+
+ if (planes > 2)
+ {
+ surface.Buffer2 = ReadBuffer(gmm, offsets.ChromaVOffset, linear, cw, ch, 1, gobBlocksInY);
+ }
+
+ return surface;
+ }
+
+ private static ReadOnlySpan<byte> ReadBuffer(
+ MemoryManager gmm,
+ uint offset,
+ bool linear,
+ int width,
+ int height,
+ int bytesPerPixel,
+ int gobBlocksInY)
+ {
+ int stride = GetPitch(width, bytesPerPixel);
+
+ if (linear)
+ {
+ return gmm.GetSpan(ExtendOffset(offset), stride * height);
+ }
+
+ return ReadBuffer(gmm, offset, width, height, stride, bytesPerPixel, gobBlocksInY);
+ }
+
+ private static ReadOnlySpan<byte> ReadBuffer(
+ MemoryManager gmm,
+ uint offset,
+ int width,
+ int height,
+ int dstStride,
+ int bytesPerPixel,
+ int gobBlocksInY)
+ {
+ int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY);
+
+ ReadOnlySpan<byte> src = gmm.GetSpan(ExtendOffset(offset), inSize);
+
+ Span<byte> dst = new byte[dstStride * height];
+
+ LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src);
+
+ return dst;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs
new file mode 100644
index 00000000..cab1ec80
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs
@@ -0,0 +1,361 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Vic.Types;
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
+
+namespace Ryujinx.Graphics.Vic.Image
+{
+ class SurfaceWriter
+ {
+ public static void Write(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
+ {
+ switch (config.OutPixelFormat)
+ {
+ case PixelFormat.A8B8G8R8:
+ WriteA8B8G8R8(rm, input, ref config, ref offsets);
+ break;
+ case PixelFormat.Y8___V8U8_N420:
+ WriteNv12(rm, input, ref config, ref offsets);
+ break;
+ default:
+ Logger.PrintError(LogClass.Vic, $"Unsupported pixel format \"{config.OutPixelFormat}\".");
+ break;
+ }
+ }
+
+ private unsafe static void WriteA8B8G8R8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
+ {
+ int width = input.Width;
+ int height = input.Height;
+ int stride = GetPitch(width, 4);
+
+ int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst);
+
+ if (Sse2.IsSupported)
+ {
+ int widthTrunc = width & ~7;
+ int strideGap = stride - width * 4;
+
+ fixed (Pixel* srcPtr = input.Data)
+ {
+ Pixel* ip = srcPtr;
+
+ fixed (byte* dstPtr = dst)
+ {
+ byte* op = dstPtr;
+
+ for (int y = 0; y < height; y++, ip += input.Width)
+ {
+ int x = 0;
+
+ for (; x < widthTrunc; x += 8)
+ {
+ Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x));
+ Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2));
+ Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4));
+ Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6));
+
+ pixel12 = Sse2.ShiftRightLogical(pixel12, 2);
+ pixel34 = Sse2.ShiftRightLogical(pixel34, 2);
+ pixel56 = Sse2.ShiftRightLogical(pixel56, 2);
+ pixel78 = Sse2.ShiftRightLogical(pixel78, 2);
+
+ Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16());
+ Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16());
+
+ Sse2.Store(op + 0x00, pixel1234);
+ Sse2.Store(op + 0x10, pixel5678);
+
+ op += 0x20;
+ }
+
+ for (; x < width; x++)
+ {
+ Pixel* px = ip + (uint)x;
+
+ *(op + 0) = Downsample(px->R);
+ *(op + 1) = Downsample(px->G);
+ *(op + 2) = Downsample(px->B);
+ *(op + 3) = Downsample(px->A);
+
+ op += 4;
+ }
+
+ op += strideGap;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int baseOffs = y * stride;
+
+ for (int x = 0; x < width; x++)
+ {
+ int offs = baseOffs + x * 4;
+
+ dst[offs + 0] = Downsample(input.GetR(x, y));
+ dst[offs + 1] = Downsample(input.GetG(x, y));
+ dst[offs + 2] = Downsample(input.GetB(x, y));
+ dst[offs + 3] = Downsample(input.GetA(x, y));
+ }
+ }
+ }
+
+ bool outLinear = config.OutBlkKind == 0;
+
+ int gobBlocksInY = 1 << config.OutBlkHeight;
+
+ WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY);
+
+ rm.BufferPool.Return(dstIndex);
+ }
+
+ private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
+ {
+ int gobBlocksInY = 1 << config.OutBlkHeight;
+
+ bool outLinear = config.OutBlkKind == 0;
+
+ int width = Math.Min(config.OutLumaWidth + 1, input.Width);
+ int height = Math.Min(config.OutLumaHeight + 1, input.Height);
+ int yStride = GetPitch(config.OutLumaWidth + 1, 1);
+
+ int dstYIndex = rm.BufferPool.Rent((config.OutLumaHeight + 1) * yStride, out Span<byte> dstY);
+
+ if (Sse41.IsSupported)
+ {
+ Vector128<ushort> mask = Vector128.Create(0xffffUL).AsUInt16();
+
+ int widthTrunc = width & ~0xf;
+ int strideGap = yStride - width;
+
+ fixed (Pixel* srcPtr = input.Data)
+ {
+ Pixel* ip = srcPtr;
+
+ fixed (byte* dstPtr = dstY)
+ {
+ byte* op = dstPtr;
+
+ for (int y = 0; y < height; y++, ip += input.Width)
+ {
+ int x = 0;
+
+ for (; x < widthTrunc; x += 16)
+ {
+ byte* baseOffset = (byte*)(ip + (ulong)(uint)x);
+
+ Vector128<ushort> pixelp1 = Sse2.LoadVector128((ushort*)baseOffset);
+ Vector128<ushort> pixelp2 = Sse2.LoadVector128((ushort*)(baseOffset + 0x10));
+ Vector128<ushort> pixelp3 = Sse2.LoadVector128((ushort*)(baseOffset + 0x20));
+ Vector128<ushort> pixelp4 = Sse2.LoadVector128((ushort*)(baseOffset + 0x30));
+ Vector128<ushort> pixelp5 = Sse2.LoadVector128((ushort*)(baseOffset + 0x40));
+ Vector128<ushort> pixelp6 = Sse2.LoadVector128((ushort*)(baseOffset + 0x50));
+ Vector128<ushort> pixelp7 = Sse2.LoadVector128((ushort*)(baseOffset + 0x60));
+ Vector128<ushort> pixelp8 = Sse2.LoadVector128((ushort*)(baseOffset + 0x70));
+
+ pixelp1 = Sse2.And(pixelp1, mask);
+ pixelp2 = Sse2.And(pixelp2, mask);
+ pixelp3 = Sse2.And(pixelp3, mask);
+ pixelp4 = Sse2.And(pixelp4, mask);
+ pixelp5 = Sse2.And(pixelp5, mask);
+ pixelp6 = Sse2.And(pixelp6, mask);
+ pixelp7 = Sse2.And(pixelp7, mask);
+ pixelp8 = Sse2.And(pixelp8, mask);
+
+ Vector128<ushort> pixelq1 = Sse41.PackUnsignedSaturate(pixelp1.AsInt32(), pixelp2.AsInt32());
+ Vector128<ushort> pixelq2 = Sse41.PackUnsignedSaturate(pixelp3.AsInt32(), pixelp4.AsInt32());
+ Vector128<ushort> pixelq3 = Sse41.PackUnsignedSaturate(pixelp5.AsInt32(), pixelp6.AsInt32());
+ Vector128<ushort> pixelq4 = Sse41.PackUnsignedSaturate(pixelp7.AsInt32(), pixelp8.AsInt32());
+
+ pixelq1 = Sse41.PackUnsignedSaturate(pixelq1.AsInt32(), pixelq2.AsInt32());
+ pixelq2 = Sse41.PackUnsignedSaturate(pixelq3.AsInt32(), pixelq4.AsInt32());
+
+ pixelq1 = Sse2.ShiftRightLogical(pixelq1, 2);
+ pixelq2 = Sse2.ShiftRightLogical(pixelq2, 2);
+
+ Vector128<byte> pixel = Sse2.PackUnsignedSaturate(pixelq1.AsInt16(), pixelq2.AsInt16());
+
+ Sse2.Store(op, pixel);
+
+ op += 0x10;
+ }
+
+ for (; x < width; x++)
+ {
+ Pixel* px = ip + (uint)x;
+
+ *op++ = Downsample(px->R);
+ }
+
+ op += strideGap;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ for (int x = 0; x < width; x++)
+ {
+ dstY[y * yStride + x] = Downsample(input.GetR(x, y));
+ }
+ }
+ }
+
+ WriteBuffer(
+ rm,
+ dstY,
+ offsets.LumaOffset,
+ outLinear,
+ config.OutLumaWidth + 1,
+ config.OutLumaHeight + 1,
+ 1,
+ gobBlocksInY);
+
+ rm.BufferPool.Return(dstYIndex);
+
+ int uvWidth = Math.Min(config.OutChromaWidth + 1, (width + 1) >> 1);
+ int uvHeight = Math.Min(config.OutChromaHeight + 1, (height + 1) >> 1);
+ int uvStride = GetPitch(config.OutChromaWidth + 1, 2);
+
+ int dstUvIndex = rm.BufferPool.Rent((config.OutChromaHeight + 1) * uvStride, out Span<byte> dstUv);
+
+ if (Sse2.IsSupported)
+ {
+ int widthTrunc = uvWidth & ~7;
+ int strideGap = uvStride - uvWidth * 2;
+
+ fixed (Pixel* srcPtr = input.Data)
+ {
+ Pixel* ip = srcPtr;
+
+ fixed (byte* dstPtr = dstUv)
+ {
+ byte* op = dstPtr;
+
+ for (int y = 0; y < uvHeight; y++, ip += input.Width * 2)
+ {
+ int x = 0;
+
+ for (; x < widthTrunc; x += 8)
+ {
+ byte* baseOffset = (byte*)ip + (ulong)(uint)x * 16;
+
+ Vector128<uint> pixel1 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x02));
+ Vector128<uint> pixel2 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x12));
+ Vector128<uint> pixel3 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x22));
+ Vector128<uint> pixel4 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x32));
+ Vector128<uint> pixel5 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x42));
+ Vector128<uint> pixel6 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x52));
+ Vector128<uint> pixel7 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x62));
+ Vector128<uint> pixel8 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x72));
+
+ Vector128<uint> pixel12 = Sse2.UnpackLow(pixel1, pixel2);
+ Vector128<uint> pixel34 = Sse2.UnpackLow(pixel3, pixel4);
+ Vector128<uint> pixel56 = Sse2.UnpackLow(pixel5, pixel6);
+ Vector128<uint> pixel78 = Sse2.UnpackLow(pixel7, pixel8);
+
+ Vector128<ulong> pixel1234 = Sse2.UnpackLow(pixel12.AsUInt64(), pixel34.AsUInt64());
+ Vector128<ulong> pixel5678 = Sse2.UnpackLow(pixel56.AsUInt64(), pixel78.AsUInt64());
+
+ pixel1234 = Sse2.ShiftRightLogical(pixel1234, 2);
+ pixel5678 = Sse2.ShiftRightLogical(pixel5678, 2);
+
+ Vector128<byte> pixel = Sse2.PackUnsignedSaturate(pixel1234.AsInt16(), pixel5678.AsInt16());
+
+ Sse2.Store(op, pixel);
+
+ op += 0x10;
+ }
+
+ for (; x < uvWidth; x++)
+ {
+ Pixel* px = ip + (uint)(x << 1);
+
+ *op++ = Downsample(px->G);
+ *op++ = Downsample(px->B);
+ }
+
+ op += strideGap;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < uvHeight; y++)
+ {
+ for (int x = 0; x < uvWidth; x++)
+ {
+ int xx = x << 1;
+ int yy = y << 1;
+
+ int uvOffs = y * uvStride + xx;
+
+ dstUv[uvOffs + 0] = Downsample(input.GetG(xx, yy));
+ dstUv[uvOffs + 1] = Downsample(input.GetB(xx, yy));
+ }
+ }
+ }
+
+ WriteBuffer(
+ rm,
+ dstUv,
+ offsets.ChromaUOffset,
+ outLinear,
+ config.OutChromaWidth + 1,
+ config.OutChromaHeight + 1, 2,
+ gobBlocksInY);
+
+ rm.BufferPool.Return(dstUvIndex);
+ }
+
+ private static void WriteBuffer(
+ ResourceManager rm,
+ ReadOnlySpan<byte> src,
+ uint offset,
+ bool linear,
+ int width,
+ int height,
+ int bytesPerPixel,
+ int gobBlocksInY)
+ {
+ if (linear)
+ {
+ rm.Gmm.Write(ExtendOffset(offset), src);
+ return;
+ }
+
+ WriteBuffer(rm, src, offset, width, height, bytesPerPixel, gobBlocksInY);
+ }
+
+ private static void WriteBuffer(
+ ResourceManager rm,
+ ReadOnlySpan<byte> src,
+ uint offset,
+ int width,
+ int height,
+ int bytesPerPixel,
+ int gobBlocksInY)
+ {
+ int outSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY);
+ int dstStride = GetPitch(width, bytesPerPixel);
+
+ int dstIndex = rm.BufferPool.Rent(outSize, out Span<byte> dst);
+
+ LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src);
+
+ rm.Gmm.Write(ExtendOffset(offset), dst);
+
+ rm.BufferPool.Return(dstIndex);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/ResourceManager.cs b/Ryujinx.Graphics.Vic/ResourceManager.cs
new file mode 100644
index 00000000..036b30b6
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/ResourceManager.cs
@@ -0,0 +1,19 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Vic.Image;
+
+namespace Ryujinx.Graphics.Vic
+{
+ struct ResourceManager
+ {
+ public MemoryManager Gmm { get; }
+ public BufferPool<Pixel> SurfacePool { get; }
+ public BufferPool<byte> BufferPool { get; }
+
+ public ResourceManager(MemoryManager gmm, BufferPool<Pixel> surfacePool, BufferPool<byte> bufferPool)
+ {
+ Gmm = gmm;
+ SurfacePool = surfacePool;
+ BufferPool = bufferPool;
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj b/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj
new file mode 100644
index 00000000..f072fa36
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj
@@ -0,0 +1,23 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Host1x\Ryujinx.Graphics.Host1x.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs b/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs
new file mode 100644
index 00000000..06d0f006
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs
@@ -0,0 +1,39 @@
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Vic.Types
+{
+ static class BitfieldExtensions
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool Extract(this int value, int lsb)
+ {
+ return ((value >> (lsb & 0x1f)) & 1) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Extract(this int value, int lsb, int length)
+ {
+ return (value >> (lsb & 0x1f)) & (int)(uint.MaxValue >> (32 - length));
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool Extract(this long value, int lsb)
+ {
+ return ((int)(value >> (lsb & 0x3f)) & 1) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int Extract(this long value, int lsb, int length)
+ {
+ return (int)(value >> (lsb & 0x3f)) & (int)(uint.MaxValue >> (32 - length));
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int ExtractSx(this long value, int lsb, int length)
+ {
+ int shift = lsb & 0x3f;
+
+ return (int)((value << (64 - (shift + length))) >> (64 - length));
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs b/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs
new file mode 100644
index 00000000..fc5d315e
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs
@@ -0,0 +1,27 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct BlendingSlotStruct
+ {
+ private long _word0;
+ private long _word1;
+
+ public int AlphaK1 => _word0.Extract(0, 10);
+ public int AlphaK2 => _word0.Extract(16, 10);
+ public int SrcFactCMatchSelect => _word0.Extract(32, 3);
+ public int DstFactCMatchSelect => _word0.Extract(36, 3);
+ public int SrcFactAMatchSelect => _word0.Extract(40, 3);
+ public int DstFactAMatchSelect => _word0.Extract(44, 3);
+ public int OverrideR => _word1.Extract(66, 10);
+ public int OverrideG => _word1.Extract(76, 10);
+ public int OverrideB => _word1.Extract(86, 10);
+ public int OverrideA => _word1.Extract(96, 10);
+ public bool UseOverrideR => _word1.Extract(108);
+ public bool UseOverrideG => _word1.Extract(109);
+ public bool UseOverrideB => _word1.Extract(110);
+ public bool UseOverrideA => _word1.Extract(111);
+ public bool MaskR => _word1.Extract(112);
+ public bool MaskG => _word1.Extract(113);
+ public bool MaskB => _word1.Extract(114);
+ public bool MaskA => _word1.Extract(115);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs b/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs
new file mode 100644
index 00000000..21b7b598
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct ClearRectStruct
+ {
+ private long _word0;
+ private long _word1;
+
+ public int ClearRect0Left => _word0.Extract(0, 14);
+ public int ClearRect0Right => _word0.Extract(16, 14);
+ public int ClearRect0Top => _word0.Extract(32, 14);
+ public int ClearRect0Bottom => _word0.Extract(48, 14);
+ public int ClearRect1Left => _word1.Extract(64, 14);
+ public int ClearRect1Right => _word1.Extract(80, 14);
+ public int ClearRect1Top => _word1.Extract(96, 14);
+ public int ClearRect1Bottom => _word1.Extract(112, 14);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs b/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs
new file mode 100644
index 00000000..f1e6287a
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs
@@ -0,0 +1,14 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct ConfigStruct
+ {
+ public PipeConfig PipeConfig;
+ public OutputConfig OutputConfig;
+ public OutputSurfaceConfig OutputSurfaceConfig;
+ public MatrixStruct OutColorMatrix;
+ public Array4<ClearRectStruct> ClearRectStruct;
+ public Array8<SlotStruct> SlotStruct;
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs b/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs
new file mode 100644
index 00000000..df5e647e
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs
@@ -0,0 +1,17 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct LumaKeyStruct
+ {
+ private long _word0;
+ private long _word1;
+
+ public int LumaCoeff0 => _word0.Extract(0, 20);
+ public int LumaCoeff1 => _word0.Extract(20, 20);
+ public int LumaCoeff2 => _word0.Extract(40, 20);
+ public int LumaRShift => _word0.Extract(60, 4);
+ public int LumaCoeff3 => _word1.Extract(64, 20);
+ public int LumaKeyLower => _word1.Extract(84, 10);
+ public int LumaKeyUpper => _word1.Extract(94, 10);
+ public bool LumaKeyEnabled => _word1.Extract(104);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs b/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs
new file mode 100644
index 00000000..b9dcd8ff
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs
@@ -0,0 +1,25 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct MatrixStruct
+ {
+ private long _word0;
+ private long _word1;
+ private long _word2;
+ private long _word3;
+
+ public int MatrixCoeff00 => _word0.ExtractSx(0, 20);
+ public int MatrixCoeff10 => _word0.ExtractSx(20, 20);
+ public int MatrixCoeff20 => _word0.ExtractSx(40, 20);
+ public int MatrixRShift => _word0.Extract(60, 4);
+ public int MatrixCoeff01 => _word1.ExtractSx(64, 20);
+ public int MatrixCoeff11 => _word1.ExtractSx(84, 20);
+ public int MatrixCoeff21 => _word1.ExtractSx(104, 20);
+ public bool MatrixEnable => _word1.Extract(127);
+ public int MatrixCoeff02 => _word2.ExtractSx(128, 20);
+ public int MatrixCoeff12 => _word2.ExtractSx(148, 20);
+ public int MatrixCoeff22 => _word2.ExtractSx(168, 20);
+ public int MatrixCoeff03 => _word3.ExtractSx(192, 20);
+ public int MatrixCoeff13 => _word3.ExtractSx(212, 20);
+ public int MatrixCoeff23 => _word3.ExtractSx(232, 20);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/OutputConfig.cs b/Ryujinx.Graphics.Vic/Types/OutputConfig.cs
new file mode 100644
index 00000000..8ab46fe5
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/OutputConfig.cs
@@ -0,0 +1,23 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct OutputConfig
+ {
+ private long _word0;
+ private long _word1;
+
+ public int AlphaFillMode => _word0.Extract(0, 3);
+ public int AlphaFillSlot => _word0.Extract(3, 3);
+ public int BackgroundAlpha => _word0.Extract(6, 10);
+ public int BackgroundR => _word0.Extract(16, 10);
+ public int BackgroundG => _word0.Extract(26, 10);
+ public int BackgroundB => _word0.Extract(36, 10);
+ public int RegammaMode => _word0.Extract(46, 2);
+ public bool OutputFlipX => _word0.Extract(48);
+ public bool OutputFlipY => _word0.Extract(49);
+ public bool OutputTranspose => _word0.Extract(50);
+ public int TargetRectLeft => _word1.Extract(64, 14);
+ public int TargetRectRight => _word1.Extract(80, 14);
+ public int TargetRectTop => _word1.Extract(96, 14);
+ public int TargetRectBottom => _word1.Extract(112, 14);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs b/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs
new file mode 100644
index 00000000..9068cbbe
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs
@@ -0,0 +1,20 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct OutputSurfaceConfig
+ {
+ private long _word0;
+ private long _word1;
+
+ public PixelFormat OutPixelFormat => (PixelFormat)_word0.Extract(0, 7);
+ public int OutChromaLocHoriz => _word0.Extract(7, 2);
+ public int OutChromaLocVert => _word0.Extract(9, 2);
+ public int OutBlkKind => _word0.Extract(11, 4);
+ public int OutBlkHeight => _word0.Extract(15, 4);
+ public int OutSurfaceWidth => _word0.Extract(32, 14);
+ public int OutSurfaceHeight => _word0.Extract(46, 14);
+ public int OutLumaWidth => _word1.Extract(64, 14);
+ public int OutLumaHeight => _word1.Extract(78, 14);
+ public int OutChromaWidth => _word1.Extract(96, 14);
+ public int OutChromaHeight => _word1.Extract(110, 14);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/PipeConfig.cs b/Ryujinx.Graphics.Vic/Types/PipeConfig.cs
new file mode 100644
index 00000000..72d8cc99
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/PipeConfig.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct PipeConfig
+ {
+ private long _word0;
+ private long _word1;
+
+ public int DownsampleHoriz => _word0.Extract(0, 11);
+ public int DownsampleVert => _word0.Extract(16, 11);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/PixelFormat.cs b/Ryujinx.Graphics.Vic/Types/PixelFormat.cs
new file mode 100644
index 00000000..72dc7899
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/PixelFormat.cs
@@ -0,0 +1,81 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ enum PixelFormat
+ {
+ A8,
+ L8,
+ A4L4,
+ L4A4,
+ R8,
+ A8L8,
+ L8A8,
+ R8G8,
+ G8R8,
+ B5G6R5,
+ R5G6B5,
+ B6G5R5,
+ R5G5B6,
+ A1B5G5R5,
+ A1R5G5B5,
+ B5G5R5A1,
+ R5G5B5A1,
+ A5B5G5R1,
+ A5R1G5B5,
+ B5G5R1A5,
+ R1G5B5A5,
+ X1B5G5R5,
+ X1R5G5B5,
+ B5G5R5X1,
+ R5G5B5X1,
+ A4B4G4R4,
+ A4R4G4B4,
+ B4G4R4A4,
+ R4G4B4A4,
+ B8_G8_R8,
+ R8_G8_B8,
+ A8B8G8R8,
+ A8R8G8B8,
+ B8G8R8A8,
+ R8G8B8A8,
+ X8B8G8R8,
+ X8R8G8B8,
+ B8G8R8X8,
+ R8G8B8X8,
+ A2B10G10R10,
+ A2R10G10B10,
+ B10G10R10A2,
+ R10G10B10A2,
+ A4P4,
+ P4A4,
+ P8A845,
+ A8P8,
+ P8,
+ P1,
+ U8V8,
+ V8U8,
+ A8Y8U8V8,
+ V8U8Y8A8,
+ Y8_U8_V8,
+ Y8_V8_U8,
+ U8_V8_Y8,
+ V8_U8_Y8,
+ Y8_U8__Y8_V8,
+ Y8_V8__Y8_U8,
+ U8_Y8__V8_Y8,
+ V8_Y8__U8_Y8,
+ Y8___U8V8_N444,
+ Y8___V8U8_N444,
+ Y8___U8V8_N422,
+ Y8___V8U8_N422,
+ Y8___U8V8_N422R,
+ Y8___V8U8_N422R,
+ Y8___U8V8_N420,
+ Y8___V8U8_N420,
+ Y8___U8___V8_N444,
+ Y8___U8___V8_N422,
+ Y8___U8___V8_N422R,
+ Y8___U8___V8_N420,
+ U8,
+ V8
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/SlotConfig.cs b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs
new file mode 100644
index 00000000..183ee4ac
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs
@@ -0,0 +1,63 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct SlotConfig
+ {
+ private long _word0;
+ private long _word1;
+ private long _word2;
+ private long _word3;
+ private long _word4;
+ private long _word5;
+ private long _word6;
+ private long _word7;
+
+ public bool SlotEnable => _word0.Extract(0);
+ public bool DeNoise => _word0.Extract(1);
+ public bool AdvancedDenoise => _word0.Extract(2);
+ public bool CadenceDetect => _word0.Extract(3);
+ public bool MotionMap => _word0.Extract(4);
+ public bool MMapCombine => _word0.Extract(5);
+ public bool IsEven => _word0.Extract(6);
+ public bool ChromaEven => _word0.Extract(7);
+ public bool CurrentFieldEnable => _word0.Extract(8);
+ public bool PrevFieldEnable => _word0.Extract(9);
+ public bool NextFieldEnable => _word0.Extract(10);
+ public bool NextNrFieldEnable => _word0.Extract(11);
+ public bool CurMotionFieldEnable => _word0.Extract(12);
+ public bool PrevMotionFieldEnable => _word0.Extract(13);
+ public bool PpMotionFieldEnable => _word0.Extract(14);
+ public bool CombMotionFieldEnable => _word0.Extract(15);
+ public int FrameFormat => _word0.Extract(16, 4);
+ public int FilterLengthY => _word0.Extract(20, 2);
+ public int FilterLengthX => _word0.Extract(22, 2);
+ public int Panoramic => _word0.Extract(24, 12);
+ public int DetailFltClamp => _word0.Extract(58, 6);
+ public int FilterNoise => _word1.Extract(64, 10);
+ public int FilterDetail => _word1.Extract(74, 10);
+ public int ChromaNoise => _word1.Extract(84, 10);
+ public int ChromaDetail => _word1.Extract(94, 10);
+ public int DeinterlaceMode => _word1.Extract(104, 4);
+ public int MotionAccumWeight => _word1.Extract(108, 3);
+ public int NoiseIir => _word1.Extract(111, 11);
+ public int LightLevel => _word1.Extract(122, 4);
+ public int SoftClampLow => _word2.Extract(128, 10);
+ public int SoftClampHigh => _word2.Extract(138, 10);
+ public int PlanarAlpha => _word2.Extract(160, 10);
+ public bool ConstantAlpha => _word2.Extract(170);
+ public int StereoInterleave => _word2.Extract(171, 3);
+ public bool ClipEnabled => _word2.Extract(174);
+ public int ClearRectMask => _word2.Extract(175, 8);
+ public int DegammaMode => _word2.Extract(183, 2);
+ public bool DecompressEnable => _word2.Extract(186);
+ public int DecompressCtbCount => _word3.Extract(192, 8);
+ public int DecompressZbcColor => _word3.Extract(200, 32);
+ public int SourceRectLeft => _word4.Extract(256, 30);
+ public int SourceRectRight => _word4.Extract(288, 30);
+ public int SourceRectTop => _word5.Extract(320, 30);
+ public int SourceRectBottom => _word5.Extract(352, 30);
+ public int DstRectLeft => _word6.Extract(384, 14);
+ public int DstRectRight => _word6.Extract(400, 14);
+ public int DstRectTop => _word6.Extract(416, 14);
+ public int DstRectBottom => _word6.Extract(432, 14);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/SlotStruct.cs b/Ryujinx.Graphics.Vic/Types/SlotStruct.cs
new file mode 100644
index 00000000..96c6cce5
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/SlotStruct.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct SlotStruct
+ {
+ public SlotConfig SlotConfig;
+ public SlotSurfaceConfig SlotSurfaceConfig;
+ public LumaKeyStruct LumaKeyStruct;
+ public MatrixStruct ColorMatrixStruct;
+ public MatrixStruct GamutMatrixStruct;
+ public BlendingSlotStruct BlendingSlotStruct;
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs b/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs
new file mode 100644
index 00000000..7396afa1
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Graphics.Vic.Types
+{
+ struct SlotSurfaceConfig
+ {
+ private long _word0;
+ private long _word1;
+
+ public PixelFormat SlotPixelFormat => (PixelFormat)_word0.Extract(0, 7);
+ public int SlotChromaLocHoriz => _word0.Extract(7, 2);
+ public int SlotChromaLocVert => _word0.Extract(9, 2);
+ public int SlotBlkKind => _word0.Extract(11, 4);
+ public int SlotBlkHeight => _word0.Extract(15, 4);
+ public int SlotCacheWidth => _word0.Extract(19, 3);
+ public int SlotSurfaceWidth => _word0.Extract(32, 14);
+ public int SlotSurfaceHeight => _word0.Extract(46, 14);
+ public int SlotLumaWidth => _word1.Extract(64, 14);
+ public int SlotLumaHeight => _word1.Extract(78, 14);
+ public int SlotChromaWidth => _word1.Extract(96, 14);
+ public int SlotChromaHeight => _word1.Extract(110, 14);
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/VicDevice.cs b/Ryujinx.Graphics.Vic/VicDevice.cs
new file mode 100644
index 00000000..db4fe513
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/VicDevice.cs
@@ -0,0 +1,97 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Vic.Image;
+using Ryujinx.Graphics.Vic.Types;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Vic
+{
+ public class VicDevice : IDeviceState
+ {
+ private readonly MemoryManager _gmm;
+ private readonly ResourceManager _rm;
+ private readonly DeviceState<VicRegisters> _state;
+
+ private PlaneOffsets _overrideOffsets;
+ private bool _hasOverride;
+
+ public VicDevice(MemoryManager gmm)
+ {
+ _gmm = gmm;
+ _rm = new ResourceManager(gmm, new BufferPool<Pixel>(), new BufferPool<byte>());
+ _state = new DeviceState<VicRegisters>(new Dictionary<string, RwCallback>
+ {
+ { nameof(VicRegisters.Execute), new RwCallback(Execute, null) }
+ });
+ }
+
+ /// <summary>
+ /// Overrides all input surfaces with a custom surface.
+ /// </summary>
+ /// <param name="lumaOffset">Offset of the luma plane or packed data for this surface</param>
+ /// <param name="chromaUOffset">Offset of the U chroma plane (for planar formats) or both chroma planes (for semiplanar formats)</param>
+ /// <param name="chromaVOffset">Offset of the V chroma plane for planar formats</param>
+ public void SetSurfaceOverride(uint lumaOffset, uint chromaUOffset, uint chromaVOffset)
+ {
+ _overrideOffsets.LumaOffset = lumaOffset;
+ _overrideOffsets.ChromaUOffset = chromaUOffset;
+ _overrideOffsets.ChromaVOffset = chromaVOffset;
+ _hasOverride = true;
+ }
+
+ /// <summary>
+ /// Disables overriding input surfaces.
+ /// </summary>
+ /// <remarks>
+ /// Surface overrides are disabled by default.
+ /// Call this if you previously called <see cref="SetSurfaceOverride(uint, uint, uint)"/> and which to disable it.
+ /// </remarks>
+ public void DisableSurfaceOverride()
+ {
+ _hasOverride = false;
+ }
+
+ public int Read(int offset) => _state.Read(offset);
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ private void Execute(int data)
+ {
+ ConfigStruct config = ReadIndirect<ConfigStruct>(_state.State.SetConfigStructOffset);
+
+ using Surface output = new Surface(
+ _rm.SurfacePool,
+ config.OutputSurfaceConfig.OutSurfaceWidth + 1,
+ config.OutputSurfaceConfig.OutSurfaceHeight + 1);
+
+ for (int i = 0; i < config.SlotStruct.Length; i++)
+ {
+ ref SlotStruct slot = ref config.SlotStruct[i];
+
+ if (!slot.SlotConfig.SlotEnable)
+ {
+ continue;
+ }
+
+ var offsets = _state.State.SetSurfacexSlotx[i][0];
+
+ if (_hasOverride)
+ {
+ offsets = _overrideOffsets;
+ }
+
+ using Surface src = SurfaceReader.Read(_rm, ref slot.SlotSurfaceConfig, ref offsets);
+
+ Blender.BlendOne(output, src, ref slot);
+ }
+
+ SurfaceWriter.Write(_rm, output, ref config.OutputSurfaceConfig, ref _state.State.SetOutputSurface);
+ }
+
+ private T ReadIndirect<T>(uint offset) where T : unmanaged
+ {
+ return _gmm.Read<T>((ulong)offset << 8);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Vic/VicRegisters.cs b/Ryujinx.Graphics.Vic/VicRegisters.cs
new file mode 100644
index 00000000..71dc9eed
--- /dev/null
+++ b/Ryujinx.Graphics.Vic/VicRegisters.cs
@@ -0,0 +1,47 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Vic
+{
+ struct PlaneOffsets
+ {
+ public uint LumaOffset;
+ public uint ChromaUOffset;
+ public uint ChromaVOffset;
+ }
+
+ struct VicRegisters
+ {
+ public Array64<uint> Reserved0;
+ public uint Nop;
+ public Array15<uint> Reserved104;
+ public uint PmTrigger;
+ public Array47<uint> Reserved144;
+ public uint SetApplicationID;
+ public uint SetWatchdogTimer;
+ public Array14<uint> Reserved208;
+ public uint SemaphoreA;
+ public uint SemaphoreB;
+ public uint SemaphoreC;
+ public uint CtxSaveArea;
+ public uint CtxSwitch;
+ public Array43<uint> Reserved254;
+ public uint Execute;
+ public uint SemaphoreD;
+ public Array62<uint> Reserved308;
+ public Array8<Array8<PlaneOffsets>> SetSurfacexSlotx;
+ public uint SetPictureIndex;
+ public uint SetControlParams;
+ public uint SetConfigStructOffset;
+ public uint SetFilterStructOffset;
+ public uint SetPaletteOffset;
+ public uint SetHistOffset;
+ public uint SetContextId;
+ public uint SetFceUcodeSize;
+ public PlaneOffsets SetOutputSurface;
+ public uint SetFceUcodeOffset;
+ public Array4<uint> Reserved730;
+ public Array8<uint> SetSlotContextId;
+ public Array8<uint> SetCompTagBufferOffset;
+ public Array8<uint> SetHistoryBufferOffset;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/H264PictureInfo.cs b/Ryujinx.Graphics.Video/H264PictureInfo.cs
new file mode 100644
index 00000000..3b2c2fff
--- /dev/null
+++ b/Ryujinx.Graphics.Video/H264PictureInfo.cs
@@ -0,0 +1,47 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Video
+{
+ public struct H264PictureInfo
+ {
+ public Array2<int> FieldOrderCnt;
+ public bool IsReference;
+ public ushort ChromaFormatIdc;
+ public ushort FrameNum;
+ public bool FieldPicFlag;
+ public bool BottomFieldFlag;
+ public uint NumRefFrames;
+ public bool MbAdaptiveFrameFieldFlag;
+ public bool ConstrainedIntraPredFlag;
+ public bool WeightedPredFlag;
+ public uint WeightedBipredIdc;
+ public bool FrameMbsOnlyFlag;
+ public bool Transform8x8ModeFlag;
+ public int ChromaQpIndexOffset;
+ public int SecondChromaQpIndexOffset;
+ public int PicInitQpMinus26;
+ public uint NumRefIdxL0ActiveMinus1;
+ public uint NumRefIdxL1ActiveMinus1;
+ public uint Log2MaxFrameNumMinus4;
+ public uint PicOrderCntType;
+ public uint Log2MaxPicOrderCntLsbMinus4;
+ public bool DeltaPicOrderAlwaysZeroFlag;
+ public bool Direct8x8InferenceFlag;
+ public bool EntropyCodingModeFlag;
+ public bool PicOrderPresentFlag;
+ public bool DeblockingFilterControlPresentFlag;
+ public bool RedundantPicCntPresentFlag;
+ public uint NumSliceGroupsMinus1;
+ public uint SliceGroupMapType;
+ public uint SliceGroupChangeRateMinus1;
+ // TODO: Slice group map
+ public bool FmoAsoEnable;
+ public bool ScalingMatrixPresent;
+ public Array6<Array16<byte>> ScalingLists4x4;
+ public Array2<Array64<byte>> ScalingLists8x8;
+ public uint FrameType;
+ public uint PicWidthInMbsMinus1;
+ public uint PicHeightInMapUnitsMinus1;
+ public bool QpprimeYZeroTransformBypassFlag;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/IDecoder.cs b/Ryujinx.Graphics.Video/IDecoder.cs
new file mode 100644
index 00000000..5957af08
--- /dev/null
+++ b/Ryujinx.Graphics.Video/IDecoder.cs
@@ -0,0 +1,11 @@
+using System;
+
+namespace Ryujinx.Graphics.Video
+{
+ public interface IDecoder : IDisposable
+ {
+ bool IsHardwareAccelerated { get; }
+
+ ISurface CreateSurface(int width, int height);
+ }
+}
diff --git a/Ryujinx.Graphics.Video/IH264Decoder.cs b/Ryujinx.Graphics.Video/IH264Decoder.cs
new file mode 100644
index 00000000..127b9412
--- /dev/null
+++ b/Ryujinx.Graphics.Video/IH264Decoder.cs
@@ -0,0 +1,9 @@
+using System;
+
+namespace Ryujinx.Graphics.Video
+{
+ public interface IH264Decoder : IDecoder
+ {
+ bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream);
+ }
+}
diff --git a/Ryujinx.Graphics.Video/ISurface.cs b/Ryujinx.Graphics.Video/ISurface.cs
new file mode 100644
index 00000000..fb66f31a
--- /dev/null
+++ b/Ryujinx.Graphics.Video/ISurface.cs
@@ -0,0 +1,18 @@
+using System;
+
+namespace Ryujinx.Graphics.Video
+{
+ public interface ISurface : IDisposable
+ {
+ Plane YPlane { get; }
+ Plane UPlane { get; }
+ Plane VPlane { get; }
+
+ int Width { get; }
+ int Height { get; }
+ int Stride { get; }
+ int UvWidth { get; }
+ int UvHeight { get; }
+ int UvStride { get; }
+ }
+}
diff --git a/Ryujinx.Graphics.Video/IVp9Decoder.cs b/Ryujinx.Graphics.Video/IVp9Decoder.cs
new file mode 100644
index 00000000..ac79bc42
--- /dev/null
+++ b/Ryujinx.Graphics.Video/IVp9Decoder.cs
@@ -0,0 +1,14 @@
+using System;
+
+namespace Ryujinx.Graphics.Video
+{
+ public interface IVp9Decoder : IDecoder
+ {
+ bool Decode(
+ ref Vp9PictureInfo pictureInfo,
+ ISurface output,
+ ReadOnlySpan<byte> bitstream,
+ ReadOnlySpan<Vp9MvRef> mvsIn,
+ Span<Vp9MvRef> mvsOut);
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Plane.cs b/Ryujinx.Graphics.Video/Plane.cs
new file mode 100644
index 00000000..c0aca59c
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Plane.cs
@@ -0,0 +1,42 @@
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Ryujinx.Graphics.Video
+{
+ public struct Plane : IEquatable<Plane>
+ {
+ public IntPtr Pointer { get; }
+ public int Length { get; }
+
+ public Plane(IntPtr pointer, int length)
+ {
+ Pointer = pointer;
+ Length = length;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Plane other && Equals(other);
+ }
+
+ public bool Equals([AllowNull] Plane other)
+ {
+ return Pointer == other.Pointer && Length == other.Length;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(Pointer, Length);
+ }
+
+ public static bool operator ==(Plane left, Plane right)
+ {
+ return left.Equals(right);
+ }
+
+ public static bool operator !=(Plane left, Plane right)
+ {
+ return !(left == right);
+ }
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj b/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj
new file mode 100644
index 00000000..6710726c
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj
@@ -0,0 +1,11 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>netcoreapp3.1</TargetFramework>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs b/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs
new file mode 100644
index 00000000..a3aa4de7
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs
@@ -0,0 +1,32 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Video
+{
+ public struct Vp9BackwardUpdates
+ {
+ public Array4<Array10<uint>> YMode;
+ public Array10<Array10<uint>> UvMode;
+ public Array16<Array4<uint>> Partition;
+ public Array4<Array2<Array2<Array6<Array6<Array4<uint>>>>>> Coef;
+ public Array4<Array2<Array2<Array6<Array6<uint>>>>> EobBranch;
+ public Array4<Array3<uint>> SwitchableInterp;
+ public Array7<Array4<uint>> InterMode;
+ public Array4<Array2<uint>> IntraInter;
+ public Array5<Array2<uint>> CompInter;
+ public Array5<Array2<Array2<uint>>> SingleRef;
+ public Array5<Array2<uint>> CompRef;
+ public Array2<Array4<uint>> Tx32x32;
+ public Array2<Array3<uint>> Tx16x16;
+ public Array2<Array2<uint>> Tx8x8;
+ public Array3<Array2<uint>> Skip;
+ public Array4<uint> Joints;
+ public Array2<Array2<uint>> Sign;
+ public Array2<Array11<uint>> Classes;
+ public Array2<Array2<uint>> Class0;
+ public Array2<Array10<Array2<uint>>> Bits;
+ public Array2<Array2<Array4<uint>>> Class0Fp;
+ public Array2<Array4<uint>> Fp;
+ public Array2<Array2<uint>> Class0Hp;
+ public Array2<Array2<uint>> Hp;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs b/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs
new file mode 100644
index 00000000..10b997a5
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs
@@ -0,0 +1,36 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Video
+{
+ public struct Vp9EntropyProbs
+ {
+ public Array10<Array10<Array9<byte>>> KfYModeProb;
+ public Array7<byte> SegTreeProb;
+ public Array3<byte> SegPredProb;
+ public Array10<Array9<byte>> KfUvModeProb;
+ public Array4<Array9<byte>> YModeProb;
+ public Array10<Array9<byte>> UvModeProb;
+ public Array16<Array3<byte>> KfPartitionProb;
+ public Array16<Array3<byte>> PartitionProb;
+ public Array4<Array2<Array2<Array6<Array6<Array3<byte>>>>>> CoefProbs;
+ public Array4<Array2<byte>> SwitchableInterpProb;
+ public Array7<Array3<byte>> InterModeProb;
+ public Array4<byte> IntraInterProb;
+ public Array5<byte> CompInterProb;
+ public Array5<Array2<byte>> SingleRefProb;
+ public Array5<byte> CompRefProb;
+ public Array2<Array3<byte>> Tx32x32Prob;
+ public Array2<Array2<byte>> Tx16x16Prob;
+ public Array2<Array1<byte>> Tx8x8Prob;
+ public Array3<byte> SkipProb;
+ public Array3<byte> Joints;
+ public Array2<byte> Sign;
+ public Array2<Array10<byte>> Classes;
+ public Array2<Array1<byte>> Class0;
+ public Array2<Array10<byte>> Bits;
+ public Array2<Array2<Array3<byte>>> Class0Fp;
+ public Array2<Array3<byte>> Fp;
+ public Array2<byte> Class0Hp;
+ public Array2<byte> Hp;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Vp9Mv.cs b/Ryujinx.Graphics.Video/Vp9Mv.cs
new file mode 100644
index 00000000..9de41058
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Vp9Mv.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Video
+{
+ public struct Vp9Mv
+ {
+ public short Row;
+ public short Col;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Vp9MvRef.cs b/Ryujinx.Graphics.Video/Vp9MvRef.cs
new file mode 100644
index 00000000..6f2d8e81
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Vp9MvRef.cs
@@ -0,0 +1,11 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Video
+{
+ // This must match the structure used by NVDEC, do not modify.
+ public struct Vp9MvRef
+ {
+ public Array2<Vp9Mv> Mvs;
+ public Array2<int> RefFrames;
+ }
+}
diff --git a/Ryujinx.Graphics.Video/Vp9PictureInfo.cs b/Ryujinx.Graphics.Video/Vp9PictureInfo.cs
new file mode 100644
index 00000000..a5cc2b45
--- /dev/null
+++ b/Ryujinx.Graphics.Video/Vp9PictureInfo.cs
@@ -0,0 +1,39 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Video
+{
+ public ref struct Vp9PictureInfo
+ {
+ public ISurface LastReference;
+ public ISurface GoldenReference;
+ public ISurface AltReference;
+ public bool IsKeyFrame;
+ public bool IntraOnly;
+ public Array4<sbyte> RefFrameSignBias;
+ public int BaseQIndex;
+ public int YDcDeltaQ;
+ public int UvDcDeltaQ;
+ public int UvAcDeltaQ;
+ public bool Lossless;
+ public int TransformMode;
+ public bool AllowHighPrecisionMv;
+ public int InterpFilter;
+ public int ReferenceMode;
+ public sbyte CompFixedRef;
+ public Array2<sbyte> CompVarRef;
+ public int Log2TileCols;
+ public int Log2TileRows;
+ public bool SegmentEnabled;
+ public bool SegmentMapUpdate;
+ public bool SegmentMapTemporalUpdate;
+ public int SegmentAbsDelta;
+ public Array8<uint> SegmentFeatureEnable;
+ public Array8<Array4<short>> SegmentFeatureData;
+ public bool ModeRefDeltaEnabled;
+ public bool UsePrevInFindMvRefs;
+ public Array4<sbyte> RefDeltas;
+ public Array2<sbyte> ModeDeltas;
+ public Vp9EntropyProbs Entropy;
+ public Vp9BackwardUpdates BackwardUpdateCounts;
+ }
+}
diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs
index b3af3290..f302e98a 100644
--- a/Ryujinx.HLE/HOS/Horizon.cs
+++ b/Ryujinx.HLE/HOS/Horizon.cs
@@ -293,8 +293,6 @@ namespace Ryujinx.HLE.HOS
KernelContext.ThreadCounter.Wait();
KernelContext.Dispose();
-
- Device.Unload();
}
}
}
diff --git a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
index ce7314f4..d6cc85e9 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
@@ -60,6 +60,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv
NvDeviceFile deviceFile = (NvDeviceFile)constructor.Invoke(new object[] { context });
+ deviceFile.Path = path;
+
return _deviceFileIdRegistry.Add(deviceFile);
}
else
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
index fe3ae652..e426945d 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
@@ -14,6 +14,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
public readonly ServiceCtx Context;
public readonly KProcess Owner;
+ public string Path;
+
public NvDeviceFile(ServiceCtx context)
{
Context = context;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs
index 208bec3b..70c9a47b 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs
@@ -1,10 +1,10 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gpu.Memory;
-using Ryujinx.HLE.HOS.Services.Nv.Types;
using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostAsGpu;
using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel.Types;
using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl;
using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvMap;
+using Ryujinx.HLE.HOS.Services.Nv.Types;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@@ -130,28 +130,56 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel
private NvInternalResult Submit(Span<byte> arguments)
{
- int headerSize = Unsafe.SizeOf<SubmitArguments>();
- SubmitArguments submitHeader = MemoryMarshal.Cast<byte, SubmitArguments>(arguments)[0];
- Span<CommandBuffer> commandBufferEntries = MemoryMarshal.Cast<byte, CommandBuffer>(arguments.Slice(headerSize)).Slice(0, submitHeader.CmdBufsCount);
- MemoryManager gmm = NvHostAsGpuDeviceFile.GetAddressSpaceContext(Context).Gmm;
-
- foreach (CommandBuffer commandBufferEntry in commandBufferEntries)
+ SubmitArguments submitHeader = GetSpanAndSkip<SubmitArguments>(ref arguments, 1)[0];
+ Span<CommandBuffer> commandBuffers = GetSpanAndSkip<CommandBuffer>(ref arguments, submitHeader.CmdBufsCount);
+ Span<Reloc> relocs = GetSpanAndSkip<Reloc>(ref arguments, submitHeader.RelocsCount);
+ Span<uint> relocShifts = GetSpanAndSkip<uint>(ref arguments, submitHeader.RelocsCount);
+ Span<SyncptIncr> syncptIncrs = GetSpanAndSkip<SyncptIncr>(ref arguments, submitHeader.SyncptIncrsCount);
+ Span<SyncptIncr> waitChecks = GetSpanAndSkip<SyncptIncr>(ref arguments, submitHeader.SyncptIncrsCount); // ?
+ Span<Fence> fences = GetSpanAndSkip<Fence>(ref arguments, submitHeader.FencesCount);
+
+ lock (_device)
{
- NvMapHandle map = NvMapDeviceFile.GetMapFromHandle(Owner, commandBufferEntry.MemoryId);
+ for (int i = 0; i < syncptIncrs.Length; i++)
+ {
+ SyncptIncr syncptIncr = syncptIncrs[i];
- int[] commandBufferData = new int[commandBufferEntry.WordsCount];
+ uint id = syncptIncr.Id;
- for (int offset = 0; offset < commandBufferData.Length; offset++)
- {
- commandBufferData[offset] = _memory.Read<int>((ulong)(map.Address + commandBufferEntry.Offset + offset * 4));
+ fences[i].Id = id;
+ fences[i].Thresh = Context.Device.System.HostSyncpoint.IncrementSyncpointMax(id, syncptIncr.Incrs);
}
- // TODO: Submit command to engines.
+ foreach (CommandBuffer commandBuffer in commandBuffers)
+ {
+ NvMapHandle map = NvMapDeviceFile.GetMapFromHandle(Owner, commandBuffer.Mem);
+
+ var data = _memory.GetSpan((ulong)map.Address + commandBuffer.Offset, commandBuffer.WordsCount * 4);
+
+ _device.Host1x.Submit(MemoryMarshal.Cast<byte, int>(data));
+ }
}
+ fences[0].Thresh = Context.Device.System.HostSyncpoint.IncrementSyncpointMax(fences[0].Id, 1);
+
+ Span<int> tmpCmdBuff = stackalloc int[1];
+
+ tmpCmdBuff[0] = (4 << 28) | (int)fences[0].Id;
+
+ _device.Host1x.Submit(tmpCmdBuff);
+
return NvInternalResult.Success;
}
+ private Span<T> GetSpanAndSkip<T>(ref Span<byte> arguments, int count) where T : unmanaged
+ {
+ Span<T> output = MemoryMarshal.Cast<byte, T>(arguments).Slice(0, count);
+
+ arguments = arguments.Slice(Unsafe.SizeOf<T>() * count);
+
+ return output;
+ }
+
private NvInternalResult GetSyncpoint(ref GetParameterArguments arguments)
{
if (arguments.Parameter >= MaxModuleSyncpoint)
@@ -248,9 +276,13 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel
{
if (map.DmaMapAddress != 0)
{
- gmm.Free((ulong)map.DmaMapAddress, (uint)map.Size);
+ // FIXME:
+ // To make unmapping work, we need separate address space per channel.
+ // Right now NVDEC and VIC share the GPU address space which is not correct at all.
+
+ // gmm.Free((ulong)map.DmaMapAddress, (uint)map.Size);
- map.DmaMapAddress = 0;
+ // map.DmaMapAddress = 0;
}
}
}
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs
index bb2fd1cc..7ef7e39e 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs
@@ -5,17 +5,40 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel.Types
[StructLayout(LayoutKind.Sequential)]
struct CommandBuffer
{
- public int MemoryId;
- public int Offset;
+ public int Mem;
+ public uint Offset;
public int WordsCount;
}
[StructLayout(LayoutKind.Sequential)]
+ struct Reloc
+ {
+ public int CmdbufMem;
+ public int CmdbufOffset;
+ public int Target;
+ public int TargetOffset;
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ struct SyncptIncr
+ {
+ public uint Id;
+ public uint Incrs;
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ struct Fence
+ {
+ public uint Id;
+ public uint Thresh;
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
struct SubmitArguments
{
public int CmdBufsCount;
public int RelocsCount;
public int SyncptIncrsCount;
- public int WaitchecksCount;
+ public int FencesCount;
}
}
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs
index ff56fbf5..aa730b57 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs
@@ -1,6 +1,5 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gpu.Synchronization;
-using Ryujinx.HLE.HOS.Kernel.Threading;
using System;
using System.Threading;
@@ -172,6 +171,11 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
return (uint)Interlocked.Increment(ref _counterMax[id]);
}
+ public uint IncrementSyncpointMax(uint id, uint incrs)
+ {
+ return (uint)Interlocked.Add(ref _counterMax[id], (int)incrs);
+ }
+
public bool IsSyncpointExpired(uint id, uint threshold)
{
return MinCompare(id, _counterMin[id], _counterMax[id], (int)threshold);
diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj
index b05a9087..01e20792 100644
--- a/Ryujinx.HLE/Ryujinx.HLE.csproj
+++ b/Ryujinx.HLE/Ryujinx.HLE.csproj
@@ -47,6 +47,9 @@
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
<ProjectReference Include="..\Ryujinx.Cpu\Ryujinx.Cpu.csproj" />
<ProjectReference Include="..\Ryujinx.Debugger\Ryujinx.Debugger.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Host1x\Ryujinx.Graphics.Host1x.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Nvdec\Ryujinx.Graphics.Nvdec.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Vic\Ryujinx.Graphics.Vic.csproj" />
<ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" />
<ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
<ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs
index 5713bd9e..9defe25d 100644
--- a/Ryujinx.HLE/Switch.cs
+++ b/Ryujinx.HLE/Switch.cs
@@ -3,6 +3,9 @@ using Ryujinx.Audio;
using Ryujinx.Configuration;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu;
+using Ryujinx.Graphics.Host1x;
+using Ryujinx.Graphics.Nvdec;
+using Ryujinx.Graphics.Vic;
using Ryujinx.HLE.FileSystem;
using Ryujinx.HLE.FileSystem.Content;
using Ryujinx.HLE.HOS;
@@ -11,7 +14,6 @@ using Ryujinx.HLE.HOS.Services.Hid;
using Ryujinx.HLE.HOS.SystemState;
using Ryujinx.Memory;
using System;
-using System.Threading;
namespace Ryujinx.HLE
{
@@ -23,6 +25,8 @@ namespace Ryujinx.HLE
public GpuContext Gpu { get; private set; }
+ internal Host1xDevice Host1x { get; }
+
public VirtualFileSystem FileSystem { get; private set; }
public Horizon System { get; private set; }
@@ -53,6 +57,27 @@ namespace Ryujinx.HLE
Gpu = new GpuContext(renderer);
+ Host1x = new Host1xDevice(Gpu.Synchronization);
+ var nvdec = new NvdecDevice(Gpu.MemoryManager);
+ var vic = new VicDevice(Gpu.MemoryManager);
+ Host1x.RegisterDevice(ClassId.Nvdec, nvdec);
+ Host1x.RegisterDevice(ClassId.Vic, vic);
+
+ nvdec.FrameDecoded += (FrameDecodedEventArgs e) =>
+ {
+ // FIXME:
+ // Figure out what is causing frame ordering issues on H264.
+ // For now this is needed as workaround.
+ if (e.CodecId == CodecId.H264)
+ {
+ vic.SetSurfaceOverride(e.LumaOffset, e.ChromaOffset, 0);
+ }
+ else
+ {
+ vic.DisableSurfaceOverride();
+ }
+ };
+
FileSystem = fileSystem;
System = new Horizon(this, contentManager);
@@ -136,13 +161,6 @@ namespace Ryujinx.HLE
Gpu.Window.Present(swapBuffersCallback);
}
- internal void Unload()
- {
- FileSystem.Unload();
-
- Memory.Dispose();
- }
-
public void DisposeGpu()
{
Gpu.Dispose();
@@ -158,7 +176,10 @@ namespace Ryujinx.HLE
if (disposing)
{
System.Dispose();
+ Host1x.Dispose();
AudioOut.Dispose();
+ FileSystem.Unload();
+ Memory.Dispose();
}
}
}
diff --git a/Ryujinx.Memory/MemoryBlock.cs b/Ryujinx.Memory/MemoryBlock.cs
index 850fb115..37439a8a 100644
--- a/Ryujinx.Memory/MemoryBlock.cs
+++ b/Ryujinx.Memory/MemoryBlock.cs
@@ -135,13 +135,13 @@ namespace Ryujinx.Memory
public void Copy(ulong srcOffset, ulong dstOffset, ulong size)
{
const int MaxChunkSize = 1 << 30;
-
+
for (ulong offset = 0; offset < size; offset += MaxChunkSize)
{
int copySize = (int)Math.Min(MaxChunkSize, size - offset);
Write(dstOffset + offset, GetSpan(srcOffset + offset, copySize));
- }
+ }
}
/// <summary>
@@ -225,7 +225,7 @@ namespace Ryujinx.Memory
}
/// <summary>
- /// Gets the span of a given memory block region.
+ /// Gets the <see cref="System.Span{T}"/> of a given memory block region.
/// </summary>
/// <param name="offset">Start offset of the memory region</param>
/// <param name="size">Size in bytes of the region</param>
@@ -239,6 +239,20 @@ namespace Ryujinx.Memory
}
/// <summary>
+ /// Gets the <see cref="System.Memory{T}"/> of a given memory block region.
+ /// </summary>
+ /// <param name="offset">Start offset of the memory region</param>
+ /// <param name="size">Size in bytes of the region</param>
+ /// <returns>Memory of the memory region</returns>
+ /// <exception cref="ObjectDisposedException">Throw when the memory block has already been disposed</exception>
+ /// <exception cref="ArgumentOutOfRangeException">Throw when either <paramref name="offset"/> or <paramref name="size"/> are out of range</exception>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public unsafe Memory<byte> GetMemory(ulong offset, int size)
+ {
+ return new NativeMemoryManager<byte>((byte*)GetPointer(offset, size), size).Memory;
+ }
+
+ /// <summary>
/// Adds a 64-bits offset to a native pointer.
/// </summary>
/// <param name="pointer">Native pointer</param>
diff --git a/Ryujinx.Memory/NativeMemoryManager.cs b/Ryujinx.Memory/NativeMemoryManager.cs
new file mode 100644
index 00000000..ef599dd4
--- /dev/null
+++ b/Ryujinx.Memory/NativeMemoryManager.cs
@@ -0,0 +1,42 @@
+using System;
+using System.Buffers;
+
+namespace Ryujinx.Memory
+{
+ unsafe class NativeMemoryManager<T> : MemoryManager<T> where T : unmanaged
+ {
+ private readonly T* _pointer;
+ private readonly int _length;
+
+ public NativeMemoryManager(T* pointer, int length)
+ {
+ _pointer = pointer;
+ _length = length;
+ }
+
+ public override Span<T> GetSpan()
+ {
+ return new Span<T>((void*)_pointer, _length);
+ }
+
+ public override MemoryHandle Pin(int elementIndex = 0)
+ {
+ if ((uint)elementIndex >= _length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(elementIndex));
+ }
+
+ return new MemoryHandle((void*)(_pointer + elementIndex));
+ }
+
+ public override void Unpin()
+ {
+ // No need to do anything as pointer already points no native memory, not GC tracked.
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Nothing to dispose, MemoryBlock still owns the memory.
+ }
+ }
+}
diff --git a/Ryujinx.sln b/Ryujinx.sln
index 5ea6934e..d5e85c2a 100644
--- a/Ryujinx.sln
+++ b/Ryujinx.sln
@@ -44,6 +44,18 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Memory.Tests", "Ryu
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Cpu", "Ryujinx.Cpu\Ryujinx.Cpu.csproj", "{3DF35E3D-D844-4399-A9A1-A9E923264C17}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Device", "Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj", "{C3002C3C-7B09-4FE7-894A-372EDA22FC6E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Host1x", "Ryujinx.Graphics.Host1x\Ryujinx.Graphics.Host1x.csproj", "{C35F1536-7DE5-4F9D-9604-B5B4E1561947}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.Vp9", "Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj", "{B9AECA11-E248-4886-A10B-81B631CAAF29}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Vic", "Ryujinx.Graphics.Vic\Ryujinx.Graphics.Vic.csproj", "{81BB2C11-9408-4EA3-822E-42987AF54429}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.H264", "Ryujinx.Graphics.Nvdec.H264\Ryujinx.Graphics.Nvdec.H264.csproj", "{990F9601-343E-46CB-8529-B498FA761A92}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Video", "Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj", "{FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -196,6 +208,54 @@ Global
{3DF35E3D-D844-4399-A9A1-A9E923264C17}.Profile Release|Any CPU.Build.0 = Release|Any CPU
{3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.Build.0 = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.Build.0 = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.Build.0 = Release|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {990F9601-343E-46CB-8529-B498FA761A92}.Release|Any CPU.Build.0 = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Release|Any CPU.Build.0 = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/Ryujinx/Ryujinx.csproj b/Ryujinx/Ryujinx.csproj
index cf672131..c6a83cf7 100644
--- a/Ryujinx/Ryujinx.csproj
+++ b/Ryujinx/Ryujinx.csproj
@@ -83,6 +83,7 @@
<PackageReference Include="GLWidget" Version="1.0.2" />
<PackageReference Include="GtkSharp" Version="3.22.25.56" />
<PackageReference Include="GtkSharp.Dependencies" Version="1.1.0" Condition="'$(RuntimeIdentifier)' != 'linux-x64' AND '$(RuntimeIdentifier)' != 'osx-x64'" />
+ <PackageReference Include="Ryujinx.Graphics.Nvdec.Dependencies" Version="4.3.0" Condition="'$(RuntimeIdentifier)' != 'linux-x64' AND '$(RuntimeIdentifier)' != 'osx-x64'" />
<PackageReference Include="OpenTK.NetStandard" Version="1.0.5.12" />
</ItemGroup>
@@ -100,6 +101,9 @@
<None Update="Config.json" Condition="'$(Configuration)' == 'Debug' OR '$(Configuration)' == 'Profile Debug'">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
+ <None Update="THIRDPARTY.md">
+ <CopyToOutputDirectory>Always</CopyToOutputDirectory>
+ </None>
</ItemGroup>
</Project>
diff --git a/Ryujinx/THIRDPARTY.md b/Ryujinx/THIRDPARTY.md
new file mode 100644
index 00000000..94b7ec37
--- /dev/null
+++ b/Ryujinx/THIRDPARTY.md
@@ -0,0 +1,203 @@
+# ffmpeg (LGPLv3)
+```
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
+
+```
+
+# libvpx (BSD)
+```
+Copyright (c) 2010, The WebM Project authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the name of Google, nor the WebM Project, nor the names
+ of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+``` \ No newline at end of file