From a0c697124ced080f58866825e2e323e8682bbd7f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Jan 2023 10:01:25 -0500
Subject: Video_core: Address feedback

---
 src/common/range_map.h                             |   6 +-
 .../backend/glasm/emit_glasm_context_get_set.cpp   |  18 ++
 .../backend/glsl/emit_glsl_context_get_set.cpp     |   6 +
 .../backend/spirv/emit_spirv_context_get_set.cpp   |   4 +
 .../backend/spirv/spirv_emit_context.cpp           |   3 +
 .../backend/spirv/spirv_emit_context.h             |   1 +
 src/shader_recompiler/frontend/ir/attribute.cpp    |   2 +
 src/shader_recompiler/frontend/ir/attribute.h      |   1 +
 .../ir_opt/constant_propagation_pass.cpp           |   3 +
 src/shader_recompiler/shader_info.h                |   1 +
 src/video_core/engines/draw_manager.cpp            |  13 +-
 src/video_core/engines/draw_manager.h              |   2 +-
 src/video_core/engines/maxwell_3d.cpp              |  13 +-
 src/video_core/engines/maxwell_3d.h                |  30 +-
 src/video_core/macro/macro.cpp                     |   2 +-
 src/video_core/macro/macro_hle.cpp                 | 356 ++++++++++++++-------
 src/video_core/renderer_opengl/gl_rasterizer.cpp   |  18 +-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp   |  27 +-
 .../renderer_vulkan/vk_state_tracker.cpp           |   2 +-
 src/video_core/shader_environment.cpp              |   8 +-
 20 files changed, 346 insertions(+), 170 deletions(-)

(limited to 'src')

diff --git a/src/common/range_map.h b/src/common/range_map.h
index 051e713a7b..79c7ef5474 100644
--- a/src/common/range_map.h
+++ b/src/common/range_map.h
@@ -60,7 +60,7 @@ private:
     using ConstIteratorType = typename MapType::const_iterator;
 
     size_t ContinousSizeInternal(KeyT address) const {
-        const auto it = GetFirstElemnentBeforeOrOn(address);
+        const auto it = GetFirstElementBeforeOrOn(address);
         if (it == container.end() || it->second == null_value) {
             return 0;
         }
@@ -72,14 +72,14 @@ private:
     }
 
     ValueT GetValueInternal(KeyT address) const {
-        const auto it = GetFirstElemnentBeforeOrOn(address);
+        const auto it = GetFirstElementBeforeOrOn(address);
         if (it == container.end()) {
             return null_value;
         }
         return it->second;
     }
 
-    ConstIteratorType GetFirstElemnentBeforeOrOn(KeyT address) const {
+    ConstIteratorType GetFirstElementBeforeOrOn(KeyT address) const {
         auto it = container.lower_bound(address);
         if (it == container.begin()) {
             return it;
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
index f0bd84ab2d..c7d7d5fefb 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
@@ -137,6 +137,15 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal
     case IR::Attribute::VertexId:
         ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name);
         break;
+    case IR::Attribute::BaseInstance:
+        ctx.Add("MOV.F {}.x,{}.baseInstance;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::BaseVertex:
+        ctx.Add("MOV.F {}.x,{}.baseVertex;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::DrawID:
+        ctx.Add("MOV.F {}.x,{}.draw.id;", inst, ctx.attrib_name);
+        break;
     case IR::Attribute::FrontFace:
         ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name);
         break;
@@ -156,6 +165,15 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, S
     case IR::Attribute::VertexId:
         ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name);
         break;
+    case IR::Attribute::BaseInstance:
+        ctx.Add("MOV.S {}.x,{}.baseInstance;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::BaseVertex:
+        ctx.Add("MOV.S {}.x,{}.baseVertex;", inst, ctx.attrib_name);
+        break;
+    case IR::Attribute::DrawID:
+        ctx.Add("MOV.S {}.x,{}.draw.id;", inst, ctx.attrib_name);
+        break;
     default:
         throw NotImplementedException("Get U32 attribute {}", attr);
     }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 25106da672..2e369ed723 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -240,6 +240,9 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
     case IR::Attribute::BaseVertex:
         ctx.AddF32("{}=itof(gl_BaseVertex);", inst);
         break;
+    case IR::Attribute::DrawID:
+        ctx.AddF32("{}=itof(gl_DrawID);", inst);
+        break;
     default:
         throw NotImplementedException("Get attribute {}", attr);
     }
@@ -262,6 +265,9 @@ void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, s
     case IR::Attribute::BaseVertex:
         ctx.AddU32("{}=uint(gl_BaseVertex);", inst);
         break;
+    case IR::Attribute::DrawID:
+        ctx.AddU32("{}=uint(gl_DrawID);", inst);
+        break;
     default:
         throw NotImplementedException("Get U32 attribute {}", attr);
     }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index e4802bf9e7..db9c94ce8e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -343,6 +343,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
     case IR::Attribute::BaseVertex:
         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
+    case IR::Attribute::DrawID:
+        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.draw_index));
     case IR::Attribute::FrontFace:
         return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
                             ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
@@ -388,6 +390,8 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
         return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
     case IR::Attribute::BaseVertex:
         return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
+    case IR::Attribute::DrawID:
+        return ctx.OpLoad(ctx.U32[1], ctx.draw_index);
     default:
         throw NotImplementedException("Read U32 attribute {}", attr);
     }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 563a5fc49a..ecb2db4940 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1402,6 +1402,9 @@ void EmitContext::DefineInputs(const IR::Program& program) {
     } else if (loads[IR::Attribute::BaseVertex]) {
         base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
     }
+    if (loads[IR::Attribute::DrawID]) {
+        draw_index = DefineInput(*this, U32[1], true, spv::BuiltIn::DrawIndex);
+    }
     if (loads[IR::Attribute::FrontFace]) {
         front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
     }
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index dde45b4bc4..4414a51696 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -218,6 +218,7 @@ public:
     Id base_instance{};
     Id vertex_id{};
     Id vertex_index{};
+    Id draw_index{};
     Id base_vertex{};
     Id front_face{};
     Id point_coord{};
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
index 73e189a895..1bf9db9353 100644
--- a/src/shader_recompiler/frontend/ir/attribute.cpp
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -450,6 +450,8 @@ std::string NameOf(Attribute attribute) {
         return "BaseInstance";
     case Attribute::BaseVertex:
         return "BaseVertex";
+    case Attribute::DrawID:
+        return "DrawID";
     }
     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
 }
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
index 364d8a9124..5f039b6f65 100644
--- a/src/shader_recompiler/frontend/ir/attribute.h
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -223,6 +223,7 @@ enum class Attribute : u64 {
     // Implementation attributes
     BaseInstance = 256,
     BaseVertex = 257,
+    DrawID = 258,
 };
 
 constexpr size_t NUM_GENERICS = 32;
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 5275b2c8b6..4d81e9336e 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -518,6 +518,7 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
             case IR::Attribute::VertexId:
             case IR::Attribute::BaseVertex:
             case IR::Attribute::BaseInstance:
+            case IR::Attribute::DrawID:
                 break;
             default:
                 return;
@@ -665,6 +666,8 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
             return IR::Attribute::BaseInstance;
         case ReplaceConstant::BaseVertex:
             return IR::Attribute::BaseVertex;
+        case ReplaceConstant::DrawID:
+            return IR::Attribute::DrawID;
         default:
             throw NotImplementedException("Not implemented replacement variable {}", *replacement);
         }
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index ea0f483441..44236b6b1c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -19,6 +19,7 @@ namespace Shader {
 enum class ReplaceConstant : u32 {
     BaseInstance,
     BaseVertex,
+    DrawID,
 };
 
 enum class TextureType : u32 {
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index feea89c0e1..2437121ce8 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
 void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
     draw_state.topology = topology;
 
-    ProcessDrawIndirect(true);
+    ProcessDrawIndirect();
 }
 
 void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
@@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs
     draw_state.index_buffer.first = index_first;
     draw_state.index_buffer.count = index_count;
 
-    ProcessDrawIndirect(true);
+    ProcessDrawIndirect();
 }
 
 void DrawManager::SetInlineIndexBuffer(u32 index) {
@@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
     }
 }
 
-void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
-    LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology,
-              draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count);
+void DrawManager::ProcessDrawIndirect() {
+    LOG_TRACE(
+        HW_GPU,
+        "called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
+        draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
+        indirect_state.buffer_size, indirect_state.max_draw_counts);
 
     UpdateTopology();
 
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 49a4fca48a..58d1b2d59f 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -85,7 +85,7 @@ private:
 
     void ProcessDraw(bool draw_indexed, u32 instance_count);
 
-    void ProcessDrawIndirect(bool draw_indexed);
+    void ProcessDrawIndirect();
 
     Maxwell3D* maxwell3d{};
     State draw_state{};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 943a69935d..fbfd1ddd24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -220,9 +220,6 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
 }
 
 void Maxwell3D::RefreshParametersImpl() {
-    if (!Settings::IsGPULevelHigh()) {
-        return;
-    }
     size_t current_index = 0;
     for (auto& segment : macro_segments) {
         if (segment.first == 0) {
@@ -448,9 +445,11 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15:
         ProcessCBMultiData(base_start, amount);
         break;
-    case MAXWELL3D_REG_INDEX(inline_data):
+    case MAXWELL3D_REG_INDEX(inline_data): {
+        ASSERT(methods_pending == amount);
         upload_state.ProcessData(base_start, amount);
         return;
+    }
     default:
         for (u32 i = 0; i < amount; i++) {
             CallMethod(method, base_start[i], methods_pending - i <= 1);
@@ -537,7 +536,7 @@ void Maxwell3D::ProcessQueryGet() {
 void Maxwell3D::ProcessQueryCondition() {
     const GPUVAddr condition_address{regs.render_enable.Address()};
     switch (regs.render_enable_override) {
-    case Regs::RenderEnable::Override::AlwaysRender: {
+    case Regs::RenderEnable::Override::AlwaysRender:
         execute_on = true;
         break;
     case Regs::RenderEnable::Override::NeverRender:
@@ -586,7 +585,6 @@ void Maxwell3D::ProcessQueryCondition() {
         break;
     }
     }
-    }
 }
 
 void Maxwell3D::ProcessCounterReset() {
@@ -685,7 +683,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
     return regs.reg_array[method];
 }
 
-void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
+void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
+                                               HLEReplacementAttributeType name) {
     const u64 key = (static_cast<u64>(bank) << 32) | offset;
     replace_table.emplace(key, name);
 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a2dff03500..0b2fd29289 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1218,12 +1218,12 @@ public:
 
         struct Window {
             union {
-                u32 raw_1;
+                u32 raw_x;
                 BitField<0, 16, u32> x_min;
                 BitField<16, 16, u32> x_max;
             };
             union {
-                u32 raw_2;
+                u32 raw_y;
                 BitField<0, 16, u32> y_min;
                 BitField<16, 16, u32> y_max;
             };
@@ -3031,14 +3031,15 @@ public:
 
     EngineHint engine_state{EngineHint::None};
 
-    enum class HLEReplaceName : u32 {
+    enum class HLEReplacementAttributeType : u32 {
         BaseVertex = 0x0,
         BaseInstance = 0x1,
+        DrawID = 0x2,
     };
 
-    void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
+    void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
 
-    std::unordered_map<u64, HLEReplaceName> replace_table;
+    std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
 
     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
     static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -3087,9 +3088,7 @@ public:
     std::unique_ptr<DrawManager> draw_manager;
     friend class DrawManager;
 
-    std::vector<u8> inline_index_draw_indexes;
-
-    GPUVAddr getMacroAddress(size_t index) const {
+    GPUVAddr GetMacroAddress(size_t index) const {
         return macro_addresses[index];
     }
 
@@ -3100,7 +3099,7 @@ public:
         RefreshParametersImpl();
     }
 
-    bool AnyParametersDirty() {
+    bool AnyParametersDirty() const {
         return current_macro_dirty;
     }
 
@@ -3114,6 +3113,10 @@ public:
     /// Handles a write to the CB_BIND register.
     void ProcessCBBind(size_t stage_index);
 
+    /// Handles a write to the CB_DATA[i] register.
+    void ProcessCBData(u32 value);
+    void ProcessCBMultiData(const u32* start_base, u32 amount);
+
 private:
     void InitializeRegisterDefaults();
 
@@ -3165,10 +3168,6 @@ private:
     /// Handles writes to syncing register.
     void ProcessSyncPoint();
 
-    /// Handles a write to the CB_DATA[i] register.
-    void ProcessCBData(u32 value);
-    void ProcessCBMultiData(const u32* start_base, u32 amount);
-
     /// Returns a query's value or an empty object if the value will be deferred through a cache.
     std::optional<u64> GetQueryResult();
 
@@ -3196,11 +3195,6 @@ private:
 
     bool execute_on{true};
 
-    std::array<bool, Regs::NUM_REGS> draw_command{};
-    std::vector<u32> deferred_draw_method;
-    enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
-    DrawMode draw_mode{DrawMode::General};
-    bool draw_indexed{};
     std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
     std::vector<GPUVAddr> macro_addresses;
     bool current_macro_dirty{};
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 49c47dafee..a96e8648c9 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -23,7 +23,7 @@
 #include "video_core/macro/macro_jit_x64.h"
 #endif
 
-MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192));
+MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro HLE", MP_RGB(128, 192, 192));
 
 namespace Tegra {
 
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index c08b4abb38..a5476e7952 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -1,5 +1,5 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
 
 #include <array>
 #include <vector>
@@ -15,28 +15,28 @@
 
 namespace Tegra {
 
-using Maxwell = Engines::Maxwell3D;
+using Maxwell3D = Engines::Maxwell3D;
 
 namespace {
 
-bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {
+bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
     switch (topology) {
-    case Maxwell::Regs::PrimitiveTopology::Points:
-    case Maxwell::Regs::PrimitiveTopology::Lines:
-    case Maxwell::Regs::PrimitiveTopology::LineLoop:
-    case Maxwell::Regs::PrimitiveTopology::LineStrip:
-    case Maxwell::Regs::PrimitiveTopology::Triangles:
-    case Maxwell::Regs::PrimitiveTopology::TriangleStrip:
-    case Maxwell::Regs::PrimitiveTopology::TriangleFan:
-    case Maxwell::Regs::PrimitiveTopology::LinesAdjacency:
-    case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency:
-    case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency:
-    case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency:
-    case Maxwell::Regs::PrimitiveTopology::Patches:
+    case Maxwell3D::Regs::PrimitiveTopology::Points:
+    case Maxwell3D::Regs::PrimitiveTopology::Lines:
+    case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
+    case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::Triangles:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+    case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::Patches:
         return true;
-    case Maxwell::Regs::PrimitiveTopology::Quads:
-    case Maxwell::Regs::PrimitiveTopology::QuadStrip:
-    case Maxwell::Regs::PrimitiveTopology::Polygon:
+    case Maxwell3D::Regs::PrimitiveTopology::Quads:
+    case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::Polygon:
     default:
         return false;
     }
@@ -44,34 +44,55 @@ bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {
 
 class HLEMacroImpl : public CachedMacro {
 public:
-    explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
+    explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
 
 protected:
-    Engines::Maxwell3D& maxwell3d;
+    Maxwell3D& maxwell3d;
 };
 
-class HLE_771BB18C62444DA0 final : public HLEMacroImpl {
+class HLE_DrawArrays final : public HLEMacroImpl {
 public:
-    explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
-        const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
+
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+        maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
+                                          maxwell3d.regs.global_base_instance_index, 1);
+    }
+};
+
+class HLE_DrawIndexed final : public HLEMacroImpl {
+public:
+    explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
+        maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
+        maxwell3d.regs.index_buffer.format =
+            static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.draw_manager->DrawIndex(
-            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
-                                                                            0x3ffffff),
-            parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
+
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+        maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
+                                          maxwell3d.regs.global_base_vertex_index,
+                                          maxwell3d.regs.global_base_instance_index, 1);
     }
 };
 
+/*
+ * @note: these macros have two versions, a normal and extended version, with the extended version
+ * also assigning the base vertex/instance.
+ */
+template <bool extended>
 class HLE_DrawArraysIndirect final : public HLEMacroImpl {
 public:
-    explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false)
-        : HLEMacroImpl(maxwell3d_), extended(extended_) {}
+    explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
         if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
             Fallback(parameters);
             return;
@@ -81,20 +102,21 @@ public:
         params.is_indexed = false;
         params.include_count = false;
         params.count_start_address = 0;
-        params.indirect_start_address = maxwell3d.getMacroAddress(1);
+        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
         params.buffer_size = 4 * sizeof(u32);
         params.max_draw_counts = 1;
         params.stride = 0;
 
-        if (extended) {
-            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
         }
 
         maxwell3d.draw_manager->DrawArrayIndirect(topology);
 
-        if (extended) {
-            maxwell3d.engine_state = Maxwell::EngineHint::None;
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
             maxwell3d.replace_table.clear();
         }
     }
@@ -103,14 +125,14 @@ private:
     void Fallback(const std::vector<u32>& parameters) {
         SCOPE_EXIT({
             if (extended) {
-                maxwell3d.engine_state = Maxwell::EngineHint::None;
+                maxwell3d.engine_state = Maxwell3D::EngineHint::None;
                 maxwell3d.replace_table.clear();
             }
         });
         maxwell3d.RefreshParameters();
         const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
 
-        auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
         const u32 vertex_first = parameters[3];
         const u32 vertex_count = parameters[1];
 
@@ -122,31 +144,35 @@ private:
         }
 
         const u32 base_instance = parameters[4];
-        if (extended) {
+        if constexpr (extended) {
             maxwell3d.regs.global_base_instance_index = base_instance;
-            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
         }
 
         maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
                                           instance_count);
 
-        if (extended) {
+        if constexpr (extended) {
             maxwell3d.regs.global_base_instance_index = 0;
-            maxwell3d.engine_state = Maxwell::EngineHint::None;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
             maxwell3d.replace_table.clear();
         }
     }
-
-    bool extended;
 };
 
+/*
+ * @note: these macros have two versions, a normal and extended version, with the extended version
+ * also assigning the base vertex/instance.
+ */
+template <bool extended>
 class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
 public:
-    explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
         if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
             Fallback(parameters);
             return;
@@ -159,24 +185,30 @@ public:
         maxwell3d.regs.global_base_vertex_index = element_base;
         maxwell3d.regs.global_base_instance_index = base_instance;
         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
-        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
         auto& params = maxwell3d.draw_manager->GetIndirectParams();
         params.is_indexed = true;
         params.include_count = false;
         params.count_start_address = 0;
-        params.indirect_start_address = maxwell3d.getMacroAddress(1);
+        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
         params.buffer_size = 5 * sizeof(u32);
         params.max_draw_counts = 1;
         params.stride = 0;
         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
-        maxwell3d.engine_state = Maxwell::EngineHint::None;
-        maxwell3d.replace_table.clear();
         maxwell3d.regs.vertex_id_base = 0x0;
         maxwell3d.regs.global_base_vertex_index = 0x0;
         maxwell3d.regs.global_base_instance_index = 0x0;
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
     }
 
 private:
@@ -189,31 +221,37 @@ private:
         maxwell3d.regs.global_base_vertex_index = element_base;
         maxwell3d.regs.global_base_instance_index = base_instance;
         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
-        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
 
         maxwell3d.draw_manager->DrawIndex(
-            static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
-            parameters[3], parameters[1], element_base, base_instance, instance_count);
+            static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3],
+            parameters[1], element_base, base_instance, instance_count);
 
         maxwell3d.regs.vertex_id_base = 0x0;
         maxwell3d.regs.global_base_vertex_index = 0x0;
         maxwell3d.regs.global_base_instance_index = 0x0;
-        maxwell3d.engine_state = Maxwell::EngineHint::None;
-        maxwell3d.replace_table.clear();
+        if constexpr (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
     }
 };
 
 class HLE_MultiLayerClear final : public HLEMacroImpl {
 public:
-    explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
         ASSERT(parameters.size() == 1);
 
-        const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
+        const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
         const u32 rt_index = clear_params.RT;
         const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
         ASSERT(clear_params.layer == 0);
@@ -225,11 +263,10 @@ public:
 
 class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
 public:
-    explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_)
-        : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
+        const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
         if (!IsTopologySafe(topology)) {
             Fallback(parameters);
             return;
@@ -253,27 +290,30 @@ public:
         auto& params = maxwell3d.draw_manager->GetIndirectParams();
         params.is_indexed = true;
         params.include_count = true;
-        params.count_start_address = maxwell3d.getMacroAddress(4);
-        params.indirect_start_address = maxwell3d.getMacroAddress(5);
+        params.count_start_address = maxwell3d.GetMacroAddress(4);
+        params.indirect_start_address = maxwell3d.GetMacroAddress(5);
         params.buffer_size = stride * draw_count;
         params.max_draw_counts = draw_count;
         params.stride = stride;
         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-        maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
-        maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
+        maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+        maxwell3d.SetHLEReplacementAttributeType(
+            0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+        maxwell3d.SetHLEReplacementAttributeType(
+            0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
+                                                 Maxwell3D::HLEReplacementAttributeType::DrawID);
         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
-        maxwell3d.engine_state = Maxwell::EngineHint::None;
+        maxwell3d.engine_state = Maxwell3D::EngineHint::None;
         maxwell3d.replace_table.clear();
     }
 
 private:
     void Fallback(const std::vector<u32>& parameters) {
         SCOPE_EXIT({
-            // Clean everything.
             // Clean everything.
             maxwell3d.regs.vertex_id_base = 0x0;
-            maxwell3d.engine_state = Maxwell::EngineHint::None;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
             maxwell3d.replace_table.clear();
         });
         maxwell3d.RefreshParameters();
@@ -283,7 +323,7 @@ private:
             // Nothing to do.
             return;
         }
-        const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
+        const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
         const u32 padding = parameters[3];
         const std::size_t max_draws = parameters[4];
 
@@ -297,9 +337,13 @@ private:
             const u32 base_vertex = parameters[base + 3];
             const u32 base_instance = parameters[base + 4];
             maxwell3d.regs.vertex_id_base = base_vertex;
-            maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
-            maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
-            maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+            maxwell3d.CallMethod(0x8e3, 0x648, true);
+            maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
             maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
             maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
                                               base_vertex, base_instance, parameters[base + 1]);
@@ -309,7 +353,7 @@ private:
 
 class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
 public:
-    explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
@@ -325,7 +369,7 @@ public:
 
 class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
 public:
-    explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
@@ -341,7 +385,7 @@ public:
 
 class HLE_BindShader final : public HLEMacroImpl {
 public:
-    explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
@@ -371,7 +415,7 @@ public:
 
 class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
 public:
-    explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+    explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
 
     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
         maxwell3d.RefreshParameters();
@@ -384,60 +428,156 @@ public:
     }
 };
 
+template <size_t base_size>
+class HLE_ClearConstBuffer final : public HLEMacroImpl {
+public:
+    explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        static constexpr std::array<u32, base_size> zeroes{};
+        auto& regs = maxwell3d.regs;
+        regs.const_buffer.size = static_cast<u32>(base_size);
+        regs.const_buffer.address_high = parameters[0];
+        regs.const_buffer.address_low = parameters[1];
+        regs.const_buffer.offset = 0;
+        maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
+    }
+};
+
+class HLE_ClearMemory final : public HLEMacroImpl {
+public:
+    explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+
+        const u32 needed_memory = parameters[2] / sizeof(u32);
+        if (needed_memory > zero_memory.size()) {
+            zero_memory.resize(needed_memory, 0);
+        }
+        auto& regs = maxwell3d.regs;
+        regs.upload.line_length_in = parameters[2];
+        regs.upload.line_count = 1;
+        regs.upload.dest.address_high = parameters[0];
+        regs.upload.dest.address_low = parameters[1];
+        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+        maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
+                                  zero_memory.data(), needed_memory, needed_memory);
+    }
+
+private:
+    std::vector<u32> zero_memory;
+};
+
+class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
+public:
+    explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+
+        auto& regs = maxwell3d.regs;
+        regs.transform_feedback_enabled = 1;
+        regs.transform_feedback.buffers[0].start_offset = 0;
+        regs.transform_feedback.buffers[1].start_offset = 0;
+        regs.transform_feedback.buffers[2].start_offset = 0;
+        regs.transform_feedback.buffers[3].start_offset = 0;
+
+        regs.upload.line_length_in = 4;
+        regs.upload.line_count = 1;
+        regs.upload.dest.address_high = parameters[0];
+        regs.upload.dest.address_low = parameters[1];
+        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
+                             regs.transform_feedback.controls[0].stride, true);
+    }
+};
+
 } // Anonymous namespace
 
-HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
-    builders.emplace(0x771BB18C62444DA0ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_771BB18C62444DA0>(maxwell3d__);
+HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
+    builders.emplace(0xDD6A7FA92A7D2674ULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawArrays>(maxwell3d__);
                          }));
     builders.emplace(0x0D61FC9FAAC9FCADULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d__);
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__);
                          }));
     builders.emplace(0x8A4D173EB99A8603ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d__, true);
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
+                         }));
+    builders.emplace(0x2DB33AADB741839CULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
+                         }));
+    builders.emplace(0x771BB18C62444DA0ULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__);
                          }));
     builders.emplace(0x0217920100488FF7ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawIndexedIndirect>(maxwell3d__);
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__);
                          }));
     builders.emplace(0x3F5E74B9C9A50164ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(
                                  maxwell3d__);
                          }));
     builders.emplace(0xEAD26C3E2109B06BULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_MultiLayerClear>(maxwell3d__);
                          }));
     builders.emplace(0xC713C83D8F63CCF3ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__);
                          }));
     builders.emplace(0xD7333D26E0A93EDEULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__);
                          }));
     builders.emplace(0xEB29B2A09AA06D38ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_BindShader>(maxwell3d__);
                          }));
     builders.emplace(0xDB1341DBEB4C8AF7ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
-                         [](Engines::Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
                              return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__);
                          }));
+    builders.emplace(0x6C97861D891EDf7EULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__);
+                         }));
+    builders.emplace(0xD246FDDF3A6173D7ULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__);
+                         }));
+    builders.emplace(0xEE4D0004BEC8ECF4ULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_ClearMemory>(maxwell3d__);
+                         }));
+    builders.emplace(0xFC0CF27F5FFAA661ULL,
+                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
+                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
+                             return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
+                         }));
 }
 
 HLEMacro::~HLEMacro() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ed7558073f..7d48af8e17 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -357,21 +357,21 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
     if (addr == 0 || size == 0) {
         return;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         texture_cache.DownloadMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::BufferCache))) {
+    if ((True(which & VideoCommon::CacheType::BufferCache))) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.DownloadMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::QueryCache))) {
+    if ((True(which & VideoCommon::CacheType::QueryCache))) {
         query_cache.FlushRegion(addr, size);
     }
 }
 
 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
-    if ((bool(which & VideoCommon::CacheType::BufferCache))) {
+    if ((True(which & VideoCommon::CacheType::BufferCache))) {
         std::scoped_lock lock{buffer_cache.mutex};
         if (buffer_cache.IsRegionGpuModified(addr, size)) {
             return true;
@@ -380,7 +380,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
     if (!Settings::IsGPULevelHigh()) {
         return false;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         return texture_cache.IsRegionGpuModified(addr, size);
     }
@@ -392,18 +392,18 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
     if (addr == 0 || size == 0) {
         return;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
-    if (bool(which & VideoCommon::CacheType::BufferCache)) {
+    if (True(which & VideoCommon::CacheType::BufferCache)) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.WriteMemory(addr, size);
     }
-    if (bool(which & VideoCommon::CacheType::ShaderCache)) {
+    if (True(which & VideoCommon::CacheType::ShaderCache)) {
         shader_cache.InvalidateRegion(addr, size);
     }
-    if (bool(which & VideoCommon::CacheType::QueryCache)) {
+    if (True(which & VideoCommon::CacheType::QueryCache)) {
         query_cache.InvalidateRegion(addr, size);
     }
 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fc746fe2c8..242bf9602a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -431,21 +431,21 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
     if (addr == 0 || size == 0) {
         return;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         texture_cache.DownloadMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::BufferCache))) {
+    if ((True(which & VideoCommon::CacheType::BufferCache))) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.DownloadMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::QueryCache))) {
+    if ((True(which & VideoCommon::CacheType::QueryCache))) {
         query_cache.FlushRegion(addr, size);
     }
 }
 
 bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) {
-    if ((bool(which & VideoCommon::CacheType::BufferCache))) {
+    if ((True(which & VideoCommon::CacheType::BufferCache))) {
         std::scoped_lock lock{buffer_cache.mutex};
         if (buffer_cache.IsRegionGpuModified(addr, size)) {
             return true;
@@ -454,7 +454,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
     if (!Settings::IsGPULevelHigh()) {
         return false;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         return texture_cache.IsRegionGpuModified(addr, size);
     }
@@ -465,18 +465,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
     if (addr == 0 || size == 0) {
         return;
     }
-    if (bool(which & VideoCommon::CacheType::TextureCache)) {
+    if (True(which & VideoCommon::CacheType::TextureCache)) {
         std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::BufferCache))) {
+    if ((True(which & VideoCommon::CacheType::BufferCache))) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.WriteMemory(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::QueryCache))) {
+    if ((True(which & VideoCommon::CacheType::QueryCache))) {
         query_cache.InvalidateRegion(addr, size);
     }
-    if ((bool(which & VideoCommon::CacheType::ShaderCache))) {
+    if ((True(which & VideoCommon::CacheType::ShaderCache))) {
         pipeline_cache.InvalidateRegion(addr, size);
     }
 }
@@ -1050,7 +1050,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re
     constexpr size_t POINT = 0;
     constexpr size_t LINE = 1;
     constexpr size_t POLYGON = 2;
-    constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
+    static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
         POINT,   // Points
         LINE,    // Lines
         LINE,    // LineLoop
@@ -1159,13 +1159,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
 }
 
 void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
-    if (!regs.logic_op.enable) {
-        return;
-    }
     if (!state_tracker.TouchLogicOp()) {
         return;
     }
-    auto op = static_cast<VkLogicOp>(static_cast<u32>(regs.logic_op.op) - 0x1500);
+    const auto op_value = static_cast<u32>(regs.logic_op.op);
+    auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
+                                                      : VK_LOGIC_OP_NO_OP;
     scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index e5cf974722..d56558a830 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -251,4 +251,4 @@ void StateTracker::InvalidateState() {
 StateTracker::StateTracker()
     : flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
 
-} // namespace Vulkan
\ No newline at end of file
+} // namespace Vulkan
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index c347282459..574760f808 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -351,12 +351,14 @@ std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffe
     if (it == maxwell3d->replace_table.end()) {
         return std::nullopt;
     }
-    const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
+    const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) {
         switch (name) {
-        case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
+        case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex:
             return Shader::ReplaceConstant::BaseVertex;
-        case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
+        case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance:
             return Shader::ReplaceConstant::BaseInstance;
+        case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::DrawID:
+            return Shader::ReplaceConstant::DrawID;
         default:
             UNREACHABLE();
         }
-- 
cgit v1.2.3-70-g09d2