1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
|
using Ryujinx.Graphics.GAL;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Memory
{
/// <summary>
/// Type of backing memory.
/// In ascending order of priority when merging multiple buffer backing states.
/// </summary>
internal enum BufferBackingType
{
HostMemory,
DeviceMemory,
DeviceMemoryWithFlush
}
/// <summary>
/// Keeps track of buffer usage to decide what memory heap that buffer memory is placed on.
/// Dedicated GPUs prefer certain types of resources to be device local,
/// and if we need data to be read back, we might prefer that they're in host memory.
///
/// The measurements recorded here compare to a set of heruristics (thresholds and conditions)
/// that appear to produce good performance in most software.
/// </summary>
internal struct BufferBackingState
{
private const int DeviceLocalSizeThreshold = 256 * 1024; // 256kb
private const int SetCountThreshold = 100;
private const int WriteCountThreshold = 50;
private const int FlushCountThreshold = 5;
private const int DeviceLocalForceExpiry = 100;
public readonly bool IsDeviceLocal => _activeType != BufferBackingType.HostMemory;
private readonly SystemMemoryType _systemMemoryType;
private BufferBackingType _activeType;
private BufferBackingType _desiredType;
private bool _canSwap;
private int _setCount;
private int _writeCount;
private int _flushCount;
private int _flushTemp;
private int _lastFlushWrite;
private int _deviceLocalForceCount;
private readonly int _size;
/// <summary>
/// Initialize the buffer backing state for a given parent buffer.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="parent">Parent buffer</param>
/// <param name="stage">Initial buffer stage</param>
/// <param name="baseBuffers">Buffers to inherit state from</param>
public BufferBackingState(GpuContext context, Buffer parent, BufferStage stage, IEnumerable<Buffer> baseBuffers = null)
{
_size = (int)parent.Size;
_systemMemoryType = context.Capabilities.MemoryType;
// Backend managed is always auto, unified memory is always host.
_desiredType = BufferBackingType.HostMemory;
_canSwap = _systemMemoryType != SystemMemoryType.BackendManaged && _systemMemoryType != SystemMemoryType.UnifiedMemory;
if (_canSwap)
{
// Might want to start certain buffers as being device local,
// and the usage might also lock those buffers into being device local.
BufferStage storageFlags = stage & BufferStage.StorageMask;
if (parent.Size > DeviceLocalSizeThreshold && baseBuffers == null)
{
_desiredType = BufferBackingType.DeviceMemory;
}
if (storageFlags != 0)
{
// Storage buffer bindings may require special treatment.
var rawStage = stage & BufferStage.StageMask;
if (rawStage == BufferStage.Fragment)
{
// Fragment read should start device local.
_desiredType = BufferBackingType.DeviceMemory;
if (storageFlags != BufferStage.StorageRead)
{
// Fragment write should stay device local until the use doesn't happen anymore.
_deviceLocalForceCount = DeviceLocalForceExpiry;
}
}
// TODO: Might be nice to force atomic access to be device local for any stage.
}
if (baseBuffers != null)
{
foreach (Buffer buffer in baseBuffers)
{
CombineState(buffer.BackingState);
}
}
}
}
/// <summary>
/// Combine buffer backing types, selecting the one with highest priority.
/// </summary>
/// <param name="left">First buffer backing type</param>
/// <param name="right">Second buffer backing type</param>
/// <returns>Combined buffer backing type</returns>
private static BufferBackingType CombineTypes(BufferBackingType left, BufferBackingType right)
{
return (BufferBackingType)Math.Max((int)left, (int)right);
}
/// <summary>
/// Combine the state from the given buffer backing state with this one,
/// so that the state isn't lost when migrating buffers.
/// </summary>
/// <param name="oldState">Buffer state to combine into this state</param>
private void CombineState(BufferBackingState oldState)
{
_setCount += oldState._setCount;
_writeCount += oldState._writeCount;
_flushCount += oldState._flushCount;
_flushTemp += oldState._flushTemp;
_lastFlushWrite = -1;
_deviceLocalForceCount = Math.Max(_deviceLocalForceCount, oldState._deviceLocalForceCount);
_canSwap &= oldState._canSwap;
_desiredType = CombineTypes(_desiredType, oldState._desiredType);
}
/// <summary>
/// Get the buffer access for the desired backing type, and record that type as now being active.
/// </summary>
/// <param name="parent">Parent buffer</param>
/// <returns>Buffer access</returns>
public BufferAccess SwitchAccess(Buffer parent)
{
BufferAccess access = parent.SparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default;
bool isBackendManaged = _systemMemoryType == SystemMemoryType.BackendManaged;
if (!isBackendManaged)
{
switch (_desiredType)
{
case BufferBackingType.HostMemory:
access |= BufferAccess.HostMemory;
break;
case BufferBackingType.DeviceMemory:
access |= BufferAccess.DeviceMemory;
break;
case BufferBackingType.DeviceMemoryWithFlush:
access |= BufferAccess.DeviceMemoryMapped;
break;
}
}
_activeType = _desiredType;
return access;
}
/// <summary>
/// Record when data has been uploaded to the buffer.
/// </summary>
public void RecordSet()
{
_setCount++;
ConsiderUseCounts();
}
/// <summary>
/// Record when data has been flushed from the buffer.
/// </summary>
public void RecordFlush()
{
if (_lastFlushWrite != _writeCount)
{
// If it's on the same page as the last flush, ignore it.
_lastFlushWrite = _writeCount;
_flushCount++;
}
}
/// <summary>
/// Determine if the buffer backing should be changed.
/// </summary>
/// <returns>True if the desired backing type is different from the current type</returns>
public readonly bool ShouldChangeBacking()
{
return _desiredType != _activeType;
}
/// <summary>
/// Determine if the buffer backing should be changed, considering a new use with the given buffer stage.
/// </summary>
/// <param name="stage">Buffer stage for the use</param>
/// <returns>True if the desired backing type is different from the current type</returns>
public bool ShouldChangeBacking(BufferStage stage)
{
if (!_canSwap)
{
return false;
}
BufferStage storageFlags = stage & BufferStage.StorageMask;
if (storageFlags != 0)
{
if (storageFlags != BufferStage.StorageRead)
{
// Storage write.
_writeCount++;
var rawStage = stage & BufferStage.StageMask;
if (rawStage == BufferStage.Fragment)
{
// Switch to device memory, swap back only if this use disappears.
_desiredType = CombineTypes(_desiredType, BufferBackingType.DeviceMemory);
_deviceLocalForceCount = DeviceLocalForceExpiry;
// TODO: Might be nice to force atomic access to be device local for any stage.
}
}
ConsiderUseCounts();
}
return _desiredType != _activeType;
}
/// <summary>
/// Evaluate the current counts to determine what the buffer's desired backing type is.
/// This method depends on heuristics devised by testing a variety of software.
/// </summary>
private void ConsiderUseCounts()
{
if (_canSwap)
{
if (_writeCount >= WriteCountThreshold || _setCount >= SetCountThreshold || _flushCount >= FlushCountThreshold)
{
if (_deviceLocalForceCount > 0 && --_deviceLocalForceCount != 0)
{
// Some buffer usage demanded that the buffer stay device local.
// The desired type was selected when this counter was set.
}
else if (_flushCount > 0 || _flushTemp-- > 0)
{
// Buffers that flush should ideally be mapped in host address space for easy copies.
// If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages).
// If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached.
_desiredType = _size > DeviceLocalSizeThreshold ? BufferBackingType.DeviceMemoryWithFlush : BufferBackingType.HostMemory;
}
else if (_writeCount >= WriteCountThreshold)
{
// Buffers that are written often should ideally be in the device local heap. (Storage buffers)
_desiredType = BufferBackingType.DeviceMemory;
}
else if (_setCount > SetCountThreshold)
{
// Buffers that have their data set often should ideally be host mapped. (Constant buffers)
_desiredType = BufferBackingType.HostMemory;
}
// It's harder for a buffer that is flushed to revert to another type of mapping.
if (_flushCount > 0)
{
_flushTemp = 1000;
}
_lastFlushWrite = -1;
_flushCount = 0;
_writeCount = 0;
_setCount = 0;
}
}
}
}
}
|