From d29fe60dba888e5364ba6cb562fd45af262cf34c Mon Sep 17 00:00:00 2001
From: yuzubot <yuzu@yuzu-emu.org>
Date: Sat, 22 May 2021 12:02:37 +0000
Subject: "Merge Tagged PR 5896"

---
 src/video_core/engines/shader_bytecode.h           |   4 +
 .../renderer_opengl/gl_arb_decompiler.cpp          |  82 +++++---
 .../renderer_opengl/gl_shader_decompiler.cpp       |  62 +++++-
 .../renderer_vulkan/vk_shader_decompiler.cpp       | 115 ++++++++---
 src/video_core/shader/control_flow.cpp             | 159 ++++++++++-----
 src/video_core/shader/control_flow.h               |  13 +-
 src/video_core/shader/decode.cpp                   | 223 ++++++++++++++-------
 src/video_core/shader/decode/other.cpp             |  11 +
 src/video_core/shader/node.h                       |  15 +-
 src/video_core/shader/node_helper.cpp              |   5 +
 src/video_core/shader/node_helper.h                |   3 +
 src/video_core/shader/shader_ir.h                  |  95 ++++++---
 12 files changed, 574 insertions(+), 213 deletions(-)

(limited to 'src')

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8b45f1b623..5d659dcaff 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1785,6 +1785,8 @@ public:
         SSY,
         SYNC,
         BRK,
+        CAL,
+        RET,
         DEPBAR,
         VOTE,
         VOTE_VTG,
@@ -2108,6 +2110,8 @@ private:
             INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
             INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
             INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
+            INST("111000100110----", Id::CAL, Type::Flow, "CAL"),
+            INST("111000110010----", Id::RET, Type::Flow, "RET"),
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
             INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
             INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index 3e4d88c302..e986474788 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -491,6 +491,9 @@ private:
     const Registry& registry;
     const ShaderType stage;
 
+    std::shared_ptr<ShaderFunctionIR> context_func;
+    u32 ast_var_base{};
+
     std::size_t num_temporaries = 0;
     std::size_t max_temporaries = 0;
 
@@ -807,13 +810,33 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
     : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
     DefineGlobalMemory();
 
+    context_func = ir.GetMainFunction();
+    ast_var_base = 0;
+
     AddLine("TEMP RC;");
     AddLine("TEMP FSWZA[4];");
     AddLine("TEMP FSWZB[4];");
-    if (ir.IsDecompiled()) {
+    InitializeVariables();
+    AddLine("main:");
+    if (context_func->IsDecompiled()) {
         DecompileAST();
     } else {
         DecompileBranchMode();
+        AddLine("RET;");
+    }
+
+    const auto& subfunctions = ir.GetSubFunctions();
+    auto it = subfunctions.begin();
+    while (it != subfunctions.end()) {
+        context_func = *it;
+        AddLine("func_{}:", context_func->GetId());
+        if (context_func->IsDecompiled()) {
+            DecompileAST();
+        } else {
+            DecompileBranchMode();
+            AddLine("RET;");
+        }
+        it++;
     }
     AddLine("END");
 
@@ -1060,41 +1083,38 @@ void ARBDecompiler::InitializeVariables() {
 }
 
 void ARBDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
+    const u32 num_flow_variables = context_func->GetASTNumVariables();
     for (u32 i = 0; i < num_flow_variables; ++i) {
-        AddLine("TEMP F{};", i);
+        AddLine("TEMP F{};", i + ast_var_base);
     }
     for (u32 i = 0; i < num_flow_variables; ++i) {
-        AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
+        AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i + ast_var_base);
     }
 
-    InitializeVariables();
-
-    VisitAST(ir.GetASTProgram());
+    VisitAST(context_func->GetASTProgram());
+    ast_var_base += num_flow_variables;
 }
 
 void ARBDecompiler::DecompileBranchMode() {
     static constexpr u32 FLOW_STACK_SIZE = 20;
-    if (!ir.IsFlowStackDisabled()) {
+    if (!context_func->IsFlowStackDisabled()) {
         AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
         AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
         AddLine("TEMP SSY_TOP;");
         AddLine("TEMP PBK_TOP;");
     }
 
-    AddLine("TEMP PC;");
+    AddLine("TEMP PC{};", context_func->GetId());
 
-    if (!ir.IsFlowStackDisabled()) {
+    if (!context_func->IsFlowStackDisabled()) {
         AddLine("MOV.U SSY_TOP.x, 0;");
         AddLine("MOV.U PBK_TOP.x, 0;");
     }
 
-    InitializeVariables();
-
-    const auto basic_block_end = ir.GetBasicBlocks().end();
-    auto basic_block_it = ir.GetBasicBlocks().begin();
+    const auto basic_block_end = context_func->GetBasicBlocks().end();
+    auto basic_block_it = context_func->GetBasicBlocks().begin();
     const u32 first_address = basic_block_it->first;
-    AddLine("MOV.U PC.x, {};", first_address);
+    AddLine("MOV.U PC{}.x, {};", context_func->GetId(), first_address);
 
     AddLine("REP;");
 
@@ -1103,7 +1123,7 @@ void ARBDecompiler::DecompileBranchMode() {
         const auto& [address, bb] = *basic_block_it;
         ++num_blocks;
 
-        AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
+        AddLine("SEQ.S.CC RC.x, PC{}.x, {};", context_func->GetId(), address);
         AddLine("IF NE.x;");
 
         VisitBlock(bb);
@@ -1114,7 +1134,7 @@ void ARBDecompiler::DecompileBranchMode() {
             const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
             if (!op || op->GetCode() != OperationCode::Branch) {
                 const u32 next_address = basic_block_it->first;
-                AddLine("MOV.U PC.x, {};", next_address);
+                AddLine("MOV.U PC{}.x, {};", context_func->GetId(), next_address);
                 AddLine("CONT;");
             }
         }
@@ -1152,7 +1172,8 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
     } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
         VisitBlock(decoded->nodes);
     } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
-        AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
+        AddLine("MOV.U F{}, {};", var_set->index + ast_var_base,
+                VisitExpression(var_set->condition));
         ResetTemporaries();
     } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
         const std::string condition = VisitExpression(do_while->condition);
@@ -1172,7 +1193,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
             ResetTemporaries();
         }
         if (ast_return->kills) {
-            AddLine("KIL TR;");
+            if (stage == ShaderType::Fragment) {
+                AddLine("KIL TR;");
+            } else {
+                AddLine("RET;");
+            }
         } else {
             Exit();
         }
@@ -1219,7 +1244,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) {
         return Visit(ir.GetConditionCode(expr->cc));
     }
     if (const auto expr = std::get_if<ExprVar>(&*node)) {
-        return fmt::format("F{}.x", expr->var_index);
+        return fmt::format("F{}.x", expr->var_index + ast_var_base);
     }
     if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
         return expr->value ? "0xffffffff" : "0";
@@ -1406,6 +1431,11 @@ std::string ARBDecompiler::Visit(const Node& node) {
         return {};
     }
 
+    if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+        AddLine("CAL func_{};", func_call->GetFuncId());
+        return {};
+    }
+
     if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
         // Uncommenting this will generate invalid code. GLASM lacks comments.
         // AddLine("// {}", cmt->GetText());
@@ -1479,7 +1509,7 @@ std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
 }
 
 void ARBDecompiler::Exit() {
-    if (stage != ShaderType::Fragment) {
+    if (!context_func->IsMain() || stage != ShaderType::Fragment) {
         AddLine("RET;");
         return;
     }
@@ -2021,13 +2051,13 @@ std::string ARBDecompiler::ImageStore(Operation operation) {
 
 std::string ARBDecompiler::Branch(Operation operation) {
     const auto target = std::get<ImmediateNode>(*operation[0]);
-    AddLine("MOV.U PC.x, {};", target.GetValue());
+    AddLine("MOV.U PC{}.x, {};", context_func->GetId(), target.GetValue());
     AddLine("CONT;");
     return {};
 }
 
 std::string ARBDecompiler::BranchIndirect(Operation operation) {
-    AddLine("MOV.U PC.x, {};", Visit(operation[0]));
+    AddLine("MOV.U PC{}.x, {};", context_func->GetId(), Visit(operation[0]));
     AddLine("CONT;");
     return {};
 }
@@ -2045,7 +2075,7 @@ std::string ARBDecompiler::PopFlowStack(Operation operation) {
     const auto stack = std::get<MetaStackClass>(operation.GetMeta());
     const std::string_view stack_name = StackName(stack);
     AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
-    AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
+    AddLine("MOV.U PC{}.x, {}[{}_TOP.x].x;", context_func->GetId(), stack_name, stack_name);
     AddLine("CONT;");
     return {};
 }
@@ -2056,6 +2086,10 @@ std::string ARBDecompiler::Exit(Operation) {
 }
 
 std::string ARBDecompiler::Discard(Operation) {
+    if (stage != ShaderType::Fragment) {
+        AddLine("RET;");
+        return {};
+    }
     AddLine("KIL TR;");
     return {};
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ac78d344ca..fa6b10ac37 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -435,6 +435,27 @@ public:
         DeclareCustomVariables();
         DeclarePhysicalAttributeReader();
 
+        const auto& subfunctions = ir.GetSubFunctions();
+        auto it = subfunctions.rbegin();
+        while (it != subfunctions.rend()) {
+            context_func = *it;
+            code.AddLine("void func_{}() {{", context_func->GetId());
+            ++code.scope;
+
+            if (context_func->IsDecompiled()) {
+                DecompileAST();
+            } else {
+                DecompileBranchMode();
+            }
+
+            --code.scope;
+            code.AddLine("}}");
+
+            it++;
+        }
+
+        context_func = ir.GetMainFunction();
+
         code.AddLine("void main() {{");
         ++code.scope;
 
@@ -442,7 +463,7 @@ public:
             code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
         }
 
-        if (ir.IsDecompiled()) {
+        if (context_func->IsDecompiled()) {
             DecompileAST();
         } else {
             DecompileBranchMode();
@@ -462,13 +483,13 @@ private:
 
     void DecompileBranchMode() {
         // VM's program counter
-        const auto first_address = ir.GetBasicBlocks().begin()->first;
+        const auto first_address = context_func->GetBasicBlocks().begin()->first;
         code.AddLine("uint jmp_to = {}U;", first_address);
 
         // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
         // unlikely that shaders will use 20 nested SSYs and PBKs.
         constexpr u32 FLOW_STACK_SIZE = 20;
-        if (!ir.IsFlowStackDisabled()) {
+        if (!context_func->IsFlowStackDisabled()) {
             for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
                 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
                 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -480,7 +501,7 @@ private:
 
         code.AddLine("switch (jmp_to) {{");
 
-        for (const auto& pair : ir.GetBasicBlocks()) {
+        for (const auto& pair : context_func->GetBasicBlocks()) {
             const auto& [address, bb] = pair;
             code.AddLine("case 0x{:X}U: {{", address);
             ++code.scope;
@@ -1131,6 +1152,11 @@ private:
             return {};
         }
 
+        if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+            code.AddLine("func_{}();", func_call->GetFuncId());
+            return {};
+        }
+
         if (const auto comment = std::get_if<CommentNode>(&*node)) {
             code.AddLine("// " + comment->GetText());
             return {};
@@ -2267,7 +2293,9 @@ private:
     }
 
     Expression Exit(Operation operation) {
-        PreExit();
+        if (context_func->IsMain()) {
+            PreExit();
+        }
         code.AddLine("return;");
         return {};
     }
@@ -2277,7 +2305,11 @@ private:
         // about unexecuted instructions that may follow this.
         code.AddLine("if (true) {{");
         ++code.scope;
-        code.AddLine("discard;");
+        if (stage != ShaderType::Fragment) {
+            code.AddLine("return;");
+        } else {
+            code.AddLine("discard;");
+        }
         --code.scope;
         code.AddLine("}}");
         return {};
@@ -2388,7 +2420,7 @@ private:
     }
 
     Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
+        if (!context_func->IsDecompiled()) {
             LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
             return {};
         }
@@ -2755,6 +2787,8 @@ private:
     const Header header;
     std::unordered_map<u8, VaryingTFB> transform_feedback;
 
+    std::shared_ptr<ShaderFunctionIR> context_func;
+
     ShaderWriter code;
 
     std::optional<u32> max_input_vertices;
@@ -2902,9 +2936,15 @@ public:
             decomp.code.scope++;
         }
         if (ast.kills) {
-            decomp.code.AddLine("discard;");
+            if (decomp.stage != ShaderType::Fragment) {
+                decomp.code.AddLine("return;");
+            } else {
+                decomp.code.AddLine("discard;");
+            }
         } else {
-            decomp.PreExit();
+            if (decomp.context_func->IsMain()) {
+                decomp.PreExit();
+            }
             decomp.code.AddLine("return;");
         }
         if (!is_true) {
@@ -2937,13 +2977,13 @@ private:
 };
 
 void GLSLDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
+    const u32 num_flow_variables = context_func->GetASTNumVariables();
     for (u32 i = 0; i < num_flow_variables; i++) {
         code.AddLine("bool {} = false;", GetFlowVariable(i));
     }
 
     ASTDecompiler decompiler{*this};
-    decompiler.Visit(ir.GetASTProgram());
+    decompiler.Visit(context_func->GetASTProgram());
 }
 
 } // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index c6846d8861..258e2f5df2 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -406,10 +406,38 @@ private:
         binding = DeclareStorageTexels(binding);
         binding = DeclareImages(binding);
 
+        const auto& subfunctions = ir.GetSubFunctions();
+
+        labels.resize(subfunctions.size() + 1);
+        other_functions.resize(subfunctions.size());
+
+        auto it = subfunctions.rbegin();
+        while (it != subfunctions.rend()) {
+            context_func = *it;
+            other_functions[context_func->GetId() - 1] =
+                OpFunction(t_void, {}, TypeFunction(t_void));
+            AddLabel();
+
+            if (context_func->IsDecompiled()) {
+                DeclareFlowVariables();
+                DecompileAST();
+            } else {
+                AllocateLabels();
+                DecompileBranchMode();
+            }
+
+            OpReturn();
+            OpFunctionEnd();
+
+            it++;
+        }
+
+        context_func = ir.GetMainFunction();
+
         const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
         AddLabel();
 
-        if (ir.IsDecompiled()) {
+        if (context_func->IsDecompiled()) {
             DeclareFlowVariables();
             DecompileAST();
         } else {
@@ -441,16 +469,18 @@ private:
     void DecompileAST();
 
     void DecompileBranchMode() {
-        const u32 first_address = ir.GetBasicBlocks().begin()->first;
-        const Id loop_label = OpLabel("loop");
-        const Id merge_label = OpLabel("merge");
+        const u32 first_address = context_func->GetBasicBlocks().begin()->first;
+        const u32 func_id = context_func->GetId();
+        const std::string func_id_msg = std::to_string(func_id);
+        const Id loop_label = OpLabel("loop_" + func_id_msg);
+        const Id merge_label = OpLabel("merge_" + func_id_msg);
         const Id dummy_label = OpLabel();
         const Id jump_label = OpLabel();
-        continue_label = OpLabel("continue");
+        continue_label = OpLabel("continue_" + func_id_msg);
 
         std::vector<Sirit::Literal> literals;
         std::vector<Id> branch_labels;
-        for (const auto& [literal, label] : labels) {
+        for (const auto& [literal, label] : labels[func_id]) {
             literals.push_back(literal);
             branch_labels.push_back(label);
         }
@@ -462,11 +492,11 @@ private:
         std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
         std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
 
-        Name(jmp_to, "jmp_to");
-        Name(ssy_flow_stack, "ssy_flow_stack");
-        Name(ssy_flow_stack_top, "ssy_flow_stack_top");
-        Name(pbk_flow_stack, "pbk_flow_stack");
-        Name(pbk_flow_stack_top, "pbk_flow_stack_top");
+        Name(jmp_to, "jmp_to_" + func_id_msg);
+        Name(ssy_flow_stack, "ssy_flow_stack_" + func_id_msg);
+        Name(ssy_flow_stack_top, "ssy_flow_stack_top_" + func_id_msg);
+        Name(pbk_flow_stack, "pbk_flow_stack_" + func_id_msg);
+        Name(pbk_flow_stack_top, "pbk_flow_stack_top_" + func_id_msg);
 
         DefinePrologue();
 
@@ -484,13 +514,14 @@ private:
         AddLabel(default_branch);
         OpReturn();
 
-        for (const auto& [address, bb] : ir.GetBasicBlocks()) {
-            AddLabel(labels.at(address));
+        for (const auto& [address, bb] : context_func->GetBasicBlocks()) {
+            AddLabel(labels[func_id].at(address));
 
             VisitBasicBlock(bb);
 
-            const auto next_it = labels.lower_bound(address + 1);
-            const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
+            const auto next_it = labels[func_id].lower_bound(address + 1);
+            const Id next_label =
+                next_it != labels[func_id].end() ? next_it->second : default_branch;
             OpBranch(next_label);
         }
 
@@ -508,9 +539,10 @@ private:
     static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
 
     void AllocateLabels() {
-        for (const auto& pair : ir.GetBasicBlocks()) {
+        const u32 func_id = context_func->GetId();
+        for (const auto& pair : context_func->GetBasicBlocks()) {
             const u32 address = pair.first;
-            labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
+            labels[func_id].emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
         }
     }
 
@@ -589,6 +621,14 @@ private:
         DeclareOutputVertex();
     }
 
+    void SafeKill() {
+        if (stage != ShaderType::Fragment) {
+            OpReturn();
+            return;
+        }
+        OpKill();
+    }
+
     void DeclareFragment() {
         if (stage != ShaderType::Fragment) {
             return;
@@ -656,7 +696,7 @@ private:
     }
 
     void DeclareFlowVariables() {
-        for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
+        for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) {
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
             Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
             flow_variables.emplace(i, AddGlobalVariable(id));
@@ -1333,6 +1373,12 @@ private:
             return {};
         }
 
+        if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+            const u32 func_id = func_call->GetFuncId();
+            OpFunctionCall(t_void, other_functions[func_id - 1]);
+            return {};
+        }
+
         if (const auto comment = std::get_if<CommentNode>(&*node)) {
             if (device.HasDebuggingToolAttached()) {
                 // We should insert comments with OpString instead of using named variables
@@ -2124,7 +2170,7 @@ private:
 
         OpBranchConditional(condition, true_label, discard_label);
         AddLabel(discard_label);
-        OpKill();
+        SafeKill();
         AddLabel(true_label);
     }
 
@@ -2175,7 +2221,9 @@ private:
     }
 
     Expression Exit(Operation operation) {
-        PreExit();
+        if (context_func->IsMain()) {
+            PreExit();
+        }
         inside_branch = true;
         if (conditional_branch_set) {
             OpReturn();
@@ -2192,12 +2240,12 @@ private:
     Expression Discard(Operation operation) {
         inside_branch = true;
         if (conditional_branch_set) {
-            OpKill();
+            SafeKill();
         } else {
             const Id dummy = OpLabel();
             OpBranch(dummy);
             AddLabel(dummy);
-            OpKill();
+            SafeKill();
             AddLabel();
         }
         return {};
@@ -2276,7 +2324,7 @@ private:
     }
 
     Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
+        if (!context_func->IsDecompiled()) {
             LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
             return {};
         }
@@ -2770,6 +2818,8 @@ private:
     const Specialization& specialization;
     std::unordered_map<u8, VaryingTFB> transform_feedback;
 
+    std::shared_ptr<ShaderFunctionIR> context_func;
+
     const Id t_void = Name(TypeVoid(), "void");
 
     const Id t_bool = Name(TypeBool(), "bool");
@@ -2896,7 +2946,8 @@ private:
     Id ssy_flow_stack{};
     Id pbk_flow_stack{};
     Id continue_label{};
-    std::map<u32, Id> labels;
+    std::vector<std::map<u32, Id>> labels;
+    std::vector<Id> other_functions;
 
     bool conditional_branch_set{};
     bool inside_branch{};
@@ -3047,9 +3098,11 @@ public:
             decomp.OpBranchConditional(condition, then_label, endif_label);
             decomp.AddLabel(then_label);
             if (ast.kills) {
-                decomp.OpKill();
+                decomp.SafeKill();
             } else {
-                decomp.PreExit();
+                if (decomp.context_func->IsMain()) {
+                    decomp.PreExit();
+                }
                 decomp.OpReturn();
             }
             decomp.AddLabel(endif_label);
@@ -3058,9 +3111,11 @@ public:
             decomp.OpBranch(next_block);
             decomp.AddLabel(next_block);
             if (ast.kills) {
-                decomp.OpKill();
+                decomp.SafeKill();
             } else {
-                decomp.PreExit();
+                if (decomp.context_func->IsMain()) {
+                    decomp.PreExit();
+                }
                 decomp.OpReturn();
             }
             decomp.AddLabel(decomp.OpLabel());
@@ -3097,7 +3152,7 @@ private:
 };
 
 void SPIRVDecompiler::DecompileAST() {
-    const u32 num_flow_variables = ir.GetASTNumVariables();
+    const u32 num_flow_variables = context_func->GetASTNumVariables();
     for (u32 i = 0; i < num_flow_variables; i++) {
         const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
         Name(id, fmt::format("flow_var_{}", i));
@@ -3106,7 +3161,7 @@ void SPIRVDecompiler::DecompileAST() {
 
     DefinePrologue();
 
-    const ASTNode program = ir.GetASTProgram();
+    const ASTNode program = context_func->GetASTProgram();
     ASTDecompiler decompiler{*this};
     decompiler.Visit(program);
 
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 43d965f2fc..7c8bd7e2f1 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -7,6 +7,7 @@
 #include <set>
 #include <stack>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "common/assert.h"
@@ -26,17 +27,29 @@ using Tegra::Shader::OpCode;
 
 constexpr s32 unassigned_branch = -2;
 
+enum class JumpLabel : u32 {
+    SSYClass = 0,
+    PBKClass = 1,
+};
+
+struct JumpItem {
+    JumpLabel type;
+    u32 address;
+
+    bool operator==(const JumpItem& other) const {
+        return std::tie(type, address) == std::tie(other.type, other.address);
+    }
+};
+
 struct Query {
     u32 address{};
-    std::stack<u32> ssy_stack{};
-    std::stack<u32> pbk_stack{};
+    std::stack<JumpItem> stack{};
 };
 
 struct BlockStack {
     BlockStack() = default;
-    explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
-    std::stack<u32> ssy_stack{};
-    std::stack<u32> pbk_stack{};
+    explicit BlockStack(const Query& q) : stack{q.stack} {}
+    std::stack<JumpItem> stack{};
 };
 
 template <typename T, typename... Args>
@@ -65,20 +78,36 @@ struct BlockInfo {
     }
 };
 
+struct ProgramControl {
+    std::unordered_set<u32> found_functions{};
+    std::list<u32> pending_functions{};
+
+    void RegisterFunction(u32 address) {
+        if (found_functions.count(address) != 0) {
+            return;
+        }
+        found_functions.insert(address);
+        pending_functions.emplace_back(address);
+    }
+};
+
 struct CFGRebuildState {
-    explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
-        : program_code{program_code_}, registry{registry_}, start{start_} {}
+    explicit CFGRebuildState(ProgramControl& control_, const ProgramCode& program_code_, u32 start_,
+                             u32 base_start_, Registry& registry_)
+        : control{control_}, program_code{program_code_}, registry{registry_}, start{start_},
+          base_start{base_start_} {}
 
+    ProgramControl& control;
     const ProgramCode& program_code;
     Registry& registry;
     u32 start{};
+    u32 base_start{};
     std::vector<BlockInfo> block_info;
     std::list<u32> inspect_queries;
     std::list<Query> queries;
     std::unordered_map<u32, u32> registered;
     std::set<u32> labels;
-    std::map<u32, u32> ssy_labels;
-    std::map<u32, u32> pbk_labels;
+    std::map<u32, JumpItem> jump_labels;
     std::unordered_map<u32, BlockStack> stacks;
     ASTManager* manager{};
 };
@@ -153,7 +182,7 @@ template <typename Result, typename TestCallable, typename PackCallable>
 std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
                                        PackCallable pack) {
     for (; pos >= state.start; --pos) {
-        if (IsSchedInstruction(pos, state.start)) {
+        if (IsSchedInstruction(pos, state.base_start)) {
             continue;
         }
         const Instruction instr = state.program_code[pos];
@@ -262,7 +291,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
             single_branch.ignore = true;
             break;
         }
-        if (IsSchedInstruction(offset, state.start)) {
+        if (IsSchedInstruction(offset, state.base_start)) {
             offset++;
             continue;
         }
@@ -274,6 +303,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
         }
 
         switch (opcode->get().GetId()) {
+        case OpCode::Id::RET:
         case OpCode::Id::EXIT: {
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
@@ -411,13 +441,20 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
         case OpCode::Id::SSY: {
             const u32 target = offset + instr.bra.GetBranchTarget();
             insert_label(state, target);
-            state.ssy_labels.emplace(offset, target);
+            JumpItem it = {JumpLabel::SSYClass, target};
+            state.jump_labels.emplace(offset, it);
             break;
         }
         case OpCode::Id::PBK: {
             const u32 target = offset + instr.bra.GetBranchTarget();
             insert_label(state, target);
-            state.pbk_labels.emplace(offset, target);
+            JumpItem it = {JumpLabel::PBKClass, target};
+            state.jump_labels.emplace(offset, it);
+            break;
+        }
+        case OpCode::Id::CAL: {
+            const u32 target = offset + instr.bra.GetBranchTarget();
+            state.control.RegisterFunction(target);
             break;
         }
         case OpCode::Id::BRX: {
@@ -513,7 +550,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
 }
 
 bool TryQuery(CFGRebuildState& state) {
-    const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
+    const auto gather_labels = [](std::stack<JumpItem>& cc, std::map<u32, JumpItem>& labels,
                                   BlockInfo& block) {
         auto gather_start = labels.lower_bound(block.start);
         const auto gather_end = labels.upper_bound(block.end);
@@ -522,6 +559,19 @@ bool TryQuery(CFGRebuildState& state) {
             ++gather_start;
         }
     };
+    const auto pop_labels = [](JumpLabel type, SingleBranch* branch, Query& query) -> bool {
+        while (!query.stack.empty() && query.stack.top().type != type) {
+            query.stack.pop();
+        }
+        if (query.stack.empty()) {
+            return false;
+        }
+        if (branch->address == unassigned_branch) {
+            branch->address = query.stack.top().address;
+        }
+        query.stack.pop();
+        return true;
+    };
     if (state.queries.empty()) {
         return false;
     }
@@ -534,8 +584,7 @@ bool TryQuery(CFGRebuildState& state) {
     // consumes a label. Schedule new queries accordingly
     if (block.visited) {
         BlockStack& stack = state.stacks[q.address];
-        const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
-                              (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
+        const bool all_okay = (stack.stack.empty() || q.stack == stack.stack);
         state.queries.pop_front();
         return all_okay;
     }
@@ -544,8 +593,7 @@ bool TryQuery(CFGRebuildState& state) {
 
     Query q2(q);
     state.queries.pop_front();
-    gather_labels(q2.ssy_stack, state.ssy_labels, block);
-    gather_labels(q2.pbk_stack, state.pbk_labels, block);
+    gather_labels(q2.stack, state.jump_labels, block);
     if (std::holds_alternative<SingleBranch>(*block.branch)) {
         auto* branch = std::get_if<SingleBranch>(block.branch.get());
         if (!branch->condition.IsUnconditional()) {
@@ -555,16 +603,10 @@ bool TryQuery(CFGRebuildState& state) {
 
         auto& conditional_query = state.queries.emplace_back(q2);
         if (branch->is_sync) {
-            if (branch->address == unassigned_branch) {
-                branch->address = conditional_query.ssy_stack.top();
-            }
-            conditional_query.ssy_stack.pop();
+            pop_labels(JumpLabel::SSYClass, branch, conditional_query);
         }
         if (branch->is_brk) {
-            if (branch->address == unassigned_branch) {
-                branch->address = conditional_query.pbk_stack.top();
-            }
-            conditional_query.pbk_stack.pop();
+            pop_labels(JumpLabel::PBKClass, branch, conditional_query);
         }
         conditional_query.address = branch->address;
         return true;
@@ -646,25 +688,23 @@ void DecompileShader(CFGRebuildState& state) {
     state.manager->Decompile();
 }
 
-} // Anonymous namespace
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
-                                                const CompilerSettings& settings,
-                                                Registry& registry) {
-    auto result_out = std::make_unique<ShaderCharacteristics>();
+ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_code,
+                            u32 start_address, u32 base_start, const CompilerSettings& settings,
+                            Registry& registry) {
+    ShaderFunction result_out{};
     if (settings.depth == CompileDepth::BruteForce) {
-        result_out->settings.depth = CompileDepth::BruteForce;
+        result_out.settings.depth = CompileDepth::BruteForce;
         return result_out;
     }
 
-    CFGRebuildState state{program_code, start_address, registry};
+    CFGRebuildState state{control, program_code, start_address, base_start, registry};
     // Inspect Code and generate blocks
     state.labels.clear();
     state.labels.emplace(start_address);
     state.inspect_queries.push_back(state.start);
     while (!state.inspect_queries.empty()) {
         if (!TryInspectAddress(state)) {
-            result_out->settings.depth = CompileDepth::BruteForce;
+            result_out.settings.depth = CompileDepth::BruteForce;
             return result_out;
         }
     }
@@ -675,7 +715,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
 
     if (settings.depth != CompileDepth::FlowStack) {
         // Decompile Stacks
-        state.queries.push_back(Query{state.start, {}, {}});
+        state.queries.push_back(Query{state.start, {}});
         decompiled = true;
         while (!state.queries.empty()) {
             if (!TryQuery(state)) {
@@ -705,19 +745,18 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
             state.manager->ShowCurrentState("Of Shader");
             state.manager->Clear();
         } else {
-            auto characteristics = std::make_unique<ShaderCharacteristics>();
-            characteristics->start = start_address;
-            characteristics->settings.depth = settings.depth;
-            characteristics->manager = std::move(manager);
-            characteristics->end = state.block_info.back().end + 1;
-            return characteristics;
+            result_out.start = start_address;
+            result_out.settings.depth = settings.depth;
+            result_out.manager = std::move(manager);
+            result_out.end = state.block_info.back().end + 1;
+            return result_out;
         }
     }
 
-    result_out->start = start_address;
-    result_out->settings.depth =
+    result_out.start = start_address;
+    result_out.settings.depth =
         use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
-    result_out->blocks.clear();
+    result_out.blocks.clear();
     for (auto& block : state.block_info) {
         ShaderBlock new_block{};
         new_block.start = block.start;
@@ -726,20 +765,20 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
         if (!new_block.ignore_branch) {
             new_block.branch = block.branch;
         }
-        result_out->end = std::max(result_out->end, block.end);
-        result_out->blocks.push_back(new_block);
+        result_out.end = std::max(result_out.end, block.end);
+        result_out.blocks.push_back(new_block);
     }
     if (!use_flow_stack) {
-        result_out->labels = std::move(state.labels);
+        result_out.labels = std::move(state.labels);
         return result_out;
     }
 
-    auto back = result_out->blocks.begin();
+    auto back = result_out.blocks.begin();
     auto next = std::next(back);
-    while (next != result_out->blocks.end()) {
+    while (next != result_out.blocks.end()) {
         if (!state.labels.contains(next->start) && next->start == back->end + 1) {
             back->end = next->end;
-            next = result_out->blocks.erase(next);
+            next = result_out.blocks.erase(next);
             continue;
         }
         back = next;
@@ -748,4 +787,22 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
 
     return result_out;
 }
+
+} // Anonymous namespace
+
+std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
+                                        const CompilerSettings& settings, Registry& registry) {
+    ProgramControl control{};
+    auto result_out = std::make_unique<ShaderProgram>();
+    result_out->main =
+        ScanFunction(control, program_code, start_address, start_address, settings, registry);
+    while (!control.pending_functions.empty()) {
+        u32 address = control.pending_functions.front();
+        auto fun = ScanFunction(control, program_code, address, start_address, settings, registry);
+        result_out->subfunctions.emplace(address, std::move(fun));
+        control.pending_functions.pop_front();
+    }
+    return result_out;
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37bf964928..5ef2251b95 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <list>
+#include <map>
 #include <optional>
 #include <set>
 #include <variant>
@@ -101,7 +102,7 @@ struct ShaderBlock {
     }
 };
 
-struct ShaderCharacteristics {
+struct ShaderFunction {
     std::list<ShaderBlock> blocks{};
     std::set<u32> labels{};
     u32 start{};
@@ -110,8 +111,12 @@ struct ShaderCharacteristics {
     CompilerSettings settings{};
 };
 
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
-                                                const CompilerSettings& settings,
-                                                Registry& registry);
+struct ShaderProgram {
+    ShaderFunction main;
+    std::map<u32, ShaderFunction> subfunctions;
+};
+
+std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
+                                        const CompilerSettings& settings, Registry& registry);
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6576d12089..355c724a3a 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -64,9 +64,52 @@ std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
 
 } // Anonymous namespace
 
+class ExprDecoder {
+public:
+    explicit ExprDecoder(ShaderIR& ir_) : ir(ir_) {}
+
+    void operator()(const ExprAnd& expr) {
+        Visit(expr.operand1);
+        Visit(expr.operand2);
+    }
+
+    void operator()(const ExprOr& expr) {
+        Visit(expr.operand1);
+        Visit(expr.operand2);
+    }
+
+    void operator()(const ExprNot& expr) {
+        Visit(expr.operand1);
+    }
+
+    void operator()(const ExprPredicate& expr) {
+        const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+        if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
+            ir.used_predicates.insert(pred);
+        }
+    }
+
+    void operator()(const ExprCondCode& expr) {}
+
+    void operator()(const ExprVar& expr) {}
+
+    void operator()(const ExprBoolean& expr) {}
+
+    void operator()(const ExprGprEqual& expr) {
+        ir.used_registers.insert(expr.gpr);
+    }
+
+    void Visit(const Expr& node) {
+        return std::visit(*this, *node);
+    }
+
+private:
+    ShaderIR& ir;
+};
+
 class ASTDecoder {
 public:
-    explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
+    explicit ASTDecoder(ShaderIR& ir_) : ir(ir_), decoder(ir_) {}
 
     void operator()(ASTProgram& ast) {
         ASTNode current = ast.nodes.GetFirst();
@@ -77,6 +120,7 @@ public:
     }
 
     void operator()(ASTIfThen& ast) {
+        decoder.Visit(ast.condition);
         ASTNode current = ast.nodes.GetFirst();
         while (current) {
             Visit(current);
@@ -96,13 +140,18 @@ public:
 
     void operator()(ASTBlockDecoded& ast) {}
 
-    void operator()(ASTVarSet& ast) {}
+    void operator()(ASTVarSet& ast) {
+        decoder.Visit(ast.condition);
+    }
 
     void operator()(ASTLabel& ast) {}
 
-    void operator()(ASTGoto& ast) {}
+    void operator()(ASTGoto& ast) {
+        decoder.Visit(ast.condition);
+    }
 
     void operator()(ASTDoWhile& ast) {
+        decoder.Visit(ast.condition);
         ASTNode current = ast.nodes.GetFirst();
         while (current) {
             Visit(current);
@@ -110,9 +159,13 @@ public:
         }
     }
 
-    void operator()(ASTReturn& ast) {}
+    void operator()(ASTReturn& ast) {
+        decoder.Visit(ast.condition);
+    }
 
-    void operator()(ASTBreak& ast) {}
+    void operator()(ASTBreak& ast) {
+        decoder.Visit(ast.condition);
+    }
 
     void Visit(ASTNode& node) {
         std::visit(*this, *node->GetInnerData());
@@ -125,77 +178,113 @@ public:
 
 private:
     ShaderIR& ir;
+    ExprDecoder decoder;
 };
 
 void ShaderIR::Decode() {
-    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
-    decompiled = false;
-    auto info = ScanFlow(program_code, main_offset, settings, registry);
-    auto& shader_info = *info;
-    coverage_begin = shader_info.start;
-    coverage_end = shader_info.end;
-    switch (shader_info.settings.depth) {
-    case CompileDepth::FlowStack: {
-        for (const auto& block : shader_info.blocks) {
-            basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
-        }
-        break;
-    }
-    case CompileDepth::NoFlowStack: {
-        disable_flow_stack = true;
-        const auto insert_block = [this](NodeBlock& nodes, u32 label) {
-            if (label == static_cast<u32>(exit_branch)) {
-                return;
+    const auto decode_function = ([this](ShaderFunction& shader_info) {
+        coverage_end = std::max<u32>(0, shader_info.end);
+        switch (shader_info.settings.depth) {
+        case CompileDepth::FlowStack: {
+            for (const auto& block : shader_info.blocks) {
+                basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
             }
-            basic_blocks.insert({label, nodes});
-        };
-        const auto& blocks = shader_info.blocks;
-        NodeBlock current_block;
-        u32 current_label = static_cast<u32>(exit_branch);
-        for (const auto& block : blocks) {
-            if (shader_info.labels.contains(block.start)) {
-                insert_block(current_block, current_label);
-                current_block.clear();
-                current_label = block.start;
+            break;
+        }
+        case CompileDepth::NoFlowStack: {
+            disable_flow_stack = true;
+            const auto insert_block = [this](NodeBlock& nodes, u32 label) {
+                if (label == static_cast<u32>(exit_branch)) {
+                    return;
+                }
+                basic_blocks.insert({label, nodes});
+            };
+            const auto& blocks = shader_info.blocks;
+            NodeBlock current_block;
+            u32 current_label = static_cast<u32>(exit_branch);
+            for (const auto& block : blocks) {
+                if (shader_info.labels.contains(block.start)) {
+                    insert_block(current_block, current_label);
+                    current_block.clear();
+                    current_label = block.start;
+                }
+                if (!block.ignore_branch) {
+                    DecodeRangeInner(current_block, block.start, block.end);
+                    InsertControlFlow(current_block, block);
+                } else {
+                    DecodeRangeInner(current_block, block.start, block.end + 1);
+                }
             }
-            if (!block.ignore_branch) {
-                DecodeRangeInner(current_block, block.start, block.end);
-                InsertControlFlow(current_block, block);
-            } else {
-                DecodeRangeInner(current_block, block.start, block.end + 1);
+            insert_block(current_block, current_label);
+            break;
+        }
+        case CompileDepth::DecompileBackwards:
+        case CompileDepth::FullDecompile: {
+            program_manager = std::move(shader_info.manager);
+            disable_flow_stack = true;
+            decompiled = true;
+            ASTDecoder decoder{*this};
+            ASTNode program = program_manager.GetProgram();
+            decoder.Visit(program);
+            break;
+        }
+        default:
+            LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
+            [[fallthrough]];
+        case CompileDepth::BruteForce: {
+            const auto shader_end = static_cast<u32>(program_code.size());
+            coverage_begin = main_offset;
+            coverage_end = shader_end;
+            for (u32 label = main_offset; label < shader_end; ++label) {
+                basic_blocks.insert({label, DecodeRange(label, label + 1)});
             }
+            break;
         }
-        insert_block(current_block, current_label);
-        break;
-    }
-    case CompileDepth::DecompileBackwards:
-    case CompileDepth::FullDecompile: {
-        program_manager = std::move(shader_info.manager);
-        disable_flow_stack = true;
-        decompiled = true;
-        ASTDecoder decoder{*this};
-        ASTNode program = GetASTProgram();
-        decoder.Visit(program);
-        break;
-    }
-    default:
-        LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
-        [[fallthrough]];
-    case CompileDepth::BruteForce: {
-        const auto shader_end = static_cast<u32>(program_code.size());
-        coverage_begin = main_offset;
-        coverage_end = shader_end;
-        for (u32 label = main_offset; label < shader_end; ++label) {
-            basic_blocks.insert({label, DecodeRange(label, label + 1)});
         }
-        break;
-    }
+        if (settings.depth != shader_info.settings.depth) {
+            LOG_WARNING(
+                HW_GPU,
+                "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
+                CompileDepthAsString(settings.depth),
+                CompileDepthAsString(shader_info.settings.depth));
+        }
+    });
+    const auto gen_function =
+        ([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr<ShaderFunctionIR> {
+            std::shared_ptr<ShaderFunctionIR> result;
+            if (decompiled) {
+                result = std::make_shared<ShaderFunctionIR>(std::move(program_manager), id,
+                                                            shader_info.start, shader_info.end);
+            } else {
+                result =
+                    std::make_shared<ShaderFunctionIR>(std::move(basic_blocks), disable_flow_stack,
+                                                       id, shader_info.start, shader_info.end);
+            }
+            decompiled = false;
+            disable_flow_stack = false;
+            basic_blocks.clear();
+            program_manager.Clear();
+            return result;
+        });
+    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+
+    decompiled = false;
+    auto info = ScanFlow(program_code, main_offset, settings, registry);
+    u32 id_start = 1;
+    for (auto& pair : info->subfunctions) {
+        func_map.emplace(pair.first, id_start);
+        id_start++;
     }
-    if (settings.depth != shader_info.settings.depth) {
-        LOG_WARNING(
-            HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
-            CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
+    coverage_begin = info->main.start;
+    coverage_end = 0;
+    decode_function(info->main);
+    main_function = gen_function(info->main, 0);
+    subfunctions.resize(info->subfunctions.size());
+    for (auto& pair : info->subfunctions) {
+        auto& func_info = pair.second;
+        decode_function(func_info);
+        u32 id = func_map[pair.first];
+        subfunctions[id - 1] = gen_function(func_info, id);
     }
 }
 
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 5f88537bc4..2bc596512a 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -33,6 +33,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         // With the previous preconditions, this instruction is a no-operation.
         break;
     }
+    case OpCode::Id::RET:
     case OpCode::Id::EXIT: {
         const ConditionCode cc = instr.flow_condition_code;
         UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
@@ -312,6 +313,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
         break;
     }
+    case OpCode::Id::CAL: {
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        const auto it = func_map.find(target);
+        if (it == func_map.end()) {
+            UNREACHABLE();
+            break;
+        }
+        bb.push_back(FunctionCall(it->second));
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
     }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b54d33763d..a58e7c65e4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -267,10 +267,11 @@ class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;
+class FunctionCallNode;
 
 using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
-                              LmemNode, SmemNode, GmemNode, CommentNode>;
+                              LmemNode, SmemNode, GmemNode, FunctionCallNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
@@ -494,6 +495,18 @@ private:
     std::vector<Node> code; ///< Code to execute
 };
 
+class FunctionCallNode final : public AmendNode {
+public:
+    explicit FunctionCallNode(u32 func_id_) : func_id{func_id_} {}
+
+    [[nodiscard]] u32 GetFuncId() const {
+        return func_id;
+    }
+
+private:
+    u32 func_id; ///< Id of the function to call
+};
+
 /// A general purpose register
 class GprNode final {
 public:
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6a5b6940d1..cef9c26bc3 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -19,6 +19,11 @@ Node Comment(std::string text) {
     return MakeNode<CommentNode>(std::move(text));
 }
 
+/// Creates a function call
+Node FunctionCall(u32 func_id) {
+    return MakeNode<FunctionCallNode>(func_id);
+}
+
 Node Immediate(u32 value) {
     return MakeNode<ImmediateNode>(value);
 }
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 1e0886185d..3f882cd25d 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -27,6 +27,9 @@ Node Conditional(Node condition, std::vector<Node> code);
 /// Creates a commentary node
 Node Comment(std::string text);
 
+/// Creates a function call
+Node FunctionCall(u32 func_id);
+
 /// Creates an u32 immediate
 Node Immediate(u32 value);
 
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1cd7c14d76..94715b0699 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -26,7 +26,7 @@ namespace VideoCommon::Shader {
 
 struct ShaderBlock;
 
-constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
+constexpr u32 MAX_PROGRAM_LENGTH = 0x2000;
 
 struct ConstBuffer {
     constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
@@ -64,16 +64,68 @@ struct GlobalMemoryUsage {
     bool is_written{};
 };
 
-class ShaderIR final {
+class ShaderFunctionIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
-                      CompilerSettings settings_, Registry& registry_);
-    ~ShaderIR();
+    explicit ShaderFunctionIR(std::map<u32, NodeBlock>&& basic_blocks_, bool disable_flow_stack_,
+                              u32 id_, u32 coverage_begin_, u32 coverage_end_)
+        : basic_blocks{std::move(basic_blocks_)}, decompiled{false},
+          disable_flow_stack{disable_flow_stack_}, id{id_}, coverage_begin{coverage_begin_},
+          coverage_end{coverage_end_} {}
+    explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_,
+                              u32 coverage_end_)
+        : program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true},
+          id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {}
 
     const std::map<u32, NodeBlock>& GetBasicBlocks() const {
         return basic_blocks;
     }
 
+    [[nodiscard]] bool IsFlowStackDisabled() const {
+        return disable_flow_stack;
+    }
+
+    [[nodiscard]] bool IsDecompiled() const {
+        return decompiled;
+    }
+
+    const ASTManager& GetASTManager() const {
+        return program_manager;
+    }
+
+    [[nodiscard]] ASTNode GetASTProgram() const {
+        return program_manager.GetProgram();
+    }
+
+    [[nodiscard]] u32 GetASTNumVariables() const {
+        return program_manager.GetVariables();
+    }
+
+    [[nodiscard]] bool IsMain() const {
+        return id == 0;
+    }
+
+    [[nodiscard]] u32 GetId() const {
+        return id;
+    }
+
+private:
+    std::map<u32, NodeBlock> basic_blocks;
+    ASTManager program_manager{true, true};
+
+    bool decompiled{};
+    bool disable_flow_stack{};
+    u32 id{};
+
+    u32 coverage_begin{};
+    u32 coverage_end{};
+};
+
+class ShaderIR final {
+public:
+    explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
+                      CompilerSettings settings_, Registry& registry_);
+    ~ShaderIR();
+
     const std::set<u32>& GetRegisters() const {
         return used_registers;
     }
@@ -155,26 +207,6 @@ public:
         return header;
     }
 
-    bool IsFlowStackDisabled() const {
-        return disable_flow_stack;
-    }
-
-    bool IsDecompiled() const {
-        return decompiled;
-    }
-
-    const ASTManager& GetASTManager() const {
-        return program_manager;
-    }
-
-    ASTNode GetASTProgram() const {
-        return program_manager.GetProgram();
-    }
-
-    u32 GetASTNumVariables() const {
-        return program_manager.GetVariables();
-    }
-
     u32 ConvertAddressToNvidiaSpace(u32 address) const {
         return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
     }
@@ -190,7 +222,16 @@ public:
         return num_custom_variables;
     }
 
+    std::shared_ptr<ShaderFunctionIR> GetMainFunction() const {
+        return main_function;
+    }
+
+    const std::vector<std::shared_ptr<ShaderFunctionIR>>& GetSubFunctions() const {
+        return subfunctions;
+    }
+
 private:
+    friend class ExprDecoder;
     friend class ASTDecoder;
 
     struct SamplerInfo {
@@ -453,6 +494,10 @@ private:
     std::vector<Node> amend_code;
     u32 num_custom_variables{};
 
+    std::shared_ptr<ShaderFunctionIR> main_function;
+    std::vector<std::shared_ptr<ShaderFunctionIR>> subfunctions;
+    std::unordered_map<u32, u32> func_map;
+
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
     std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
-- 
cgit v1.2.3-70-g09d2