aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2019-11-13 00:25:52 -0300
committerReinUsesLisp <reinuseslisp@airmail.cc>2019-11-22 21:28:48 -0300
commit287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 (patch)
treefeb26b3520031dfff59e7cf8e85018ab888cc2fa /src
parentdbeb52387979c7e28c0acb03dfc1468146947104 (diff)
gl_shader_cache: Specialize local memory size for compute shaders
Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/kepler_compute.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h16
6 files changed, 32 insertions, 21 deletions
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index bd49c6627c..c526287b7a 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -178,7 +178,12 @@ public:
BitField<24, 5, u32> gpr_alloc;
};
- INSERT_PADDING_WORDS(0x11);
+ union {
+ BitField<0, 20, u32> local_crs_alloc;
+ BitField<24, 5, u32> sass_version;
+ };
+
+ INSERT_PADDING_WORDS(0x10);
} launch_description{};
struct {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ebfe52e6dc..d890076f8f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z, launch_desc.shared_alloc);
+ launch_desc.block_dim_z, launch_desc.shared_alloc,
+ launch_desc.local_pos_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 982c4e23aa..b23a982d76 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
source += fmt::format("shared uint smem[{}];",
Common::AlignUp(variant.shared_memory_size, 4) / 4);
}
+
+ if (variant.local_memory_size > 0) {
+ source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
+ Common::AlignUp(variant.local_memory_size, 4) / 4);
+ }
}
source += '\n';
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fb2ba09056..fe016c05c3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -510,10 +510,14 @@ private:
}
void DeclareLocalMemory() {
- // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
- // specialization time.
- const u64 local_memory_size =
- stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
+ if (stage == ProgramType::Compute) {
+ code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
+ code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
+ code.AddLine("#endif");
+ return;
+ }
+
+ const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
@@ -851,9 +855,6 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
return {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
@@ -1228,9 +1229,6 @@ private:
}
target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d2bb8502a5..5ebcbbbbae 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -52,11 +52,11 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
-constexpr u32 NativeVersion = 8;
+constexpr u32 NativeVersion = 9;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
-static_assert(sizeof(ProgramVariant) == 32);
+static_assert(sizeof(ProgramVariant) == 36);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 6f8e51364f..28689f6c7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -64,10 +64,10 @@ struct ProgramVariant final {
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
/// Compute constructor.
- explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
- u32 shared_memory_size) noexcept
+ explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
+ u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
- shared_memory_size{shared_memory_size} {}
+ shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
BaseBindings base_bindings{};
@@ -78,12 +78,13 @@ struct ProgramVariant final {
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
+ u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
- shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
- rhs.block_x, rhs.block_y, rhs.block_z,
- rhs.shared_memory_size);
+ shared_memory_size, local_memory_size) ==
+ std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
+ rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
@@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
- (static_cast<std::size_t>(variant.shared_memory_size) << 16);
+ (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
+ (static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};