aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoryuzubot <yuzu@yuzu-emu.org>2021-05-22 12:02:37 +0000
committeryuzubot <yuzu@yuzu-emu.org>2021-05-22 12:02:37 +0000
commitd29fe60dba888e5364ba6cb562fd45af262cf34c (patch)
treec8be09e098e60e124f5c5f273ca14b9860a9af6e /src
parent5068279f23c1ae628f9298ab1a811435fd99c8cd (diff)
"Merge Tagged PR 5896"
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h4
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp82
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp62
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp115
-rw-r--r--src/video_core/shader/control_flow.cpp159
-rw-r--r--src/video_core/shader/control_flow.h13
-rw-r--r--src/video_core/shader/decode.cpp223
-rw-r--r--src/video_core/shader/decode/other.cpp11
-rw-r--r--src/video_core/shader/node.h15
-rw-r--r--src/video_core/shader/node_helper.cpp5
-rw-r--r--src/video_core/shader/node_helper.h3
-rw-r--r--src/video_core/shader/shader_ir.h95
12 files changed, 574 insertions, 213 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8b45f1b623..5d659dcaff 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1785,6 +1785,8 @@ public:
SSY,
SYNC,
BRK,
+ CAL,
+ RET,
DEPBAR,
VOTE,
VOTE_VTG,
@@ -2108,6 +2110,8 @@ private:
INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
+ INST("111000100110----", Id::CAL, Type::Flow, "CAL"),
+ INST("111000110010----", Id::RET, Type::Flow, "RET"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index 3e4d88c302..e986474788 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -491,6 +491,9 @@ private:
const Registry& registry;
const ShaderType stage;
+ std::shared_ptr<ShaderFunctionIR> context_func;
+ u32 ast_var_base{};
+
std::size_t num_temporaries = 0;
std::size_t max_temporaries = 0;
@@ -807,13 +810,33 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
DefineGlobalMemory();
+ context_func = ir.GetMainFunction();
+ ast_var_base = 0;
+
AddLine("TEMP RC;");
AddLine("TEMP FSWZA[4];");
AddLine("TEMP FSWZB[4];");
- if (ir.IsDecompiled()) {
+ InitializeVariables();
+ AddLine("main:");
+ if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
+ AddLine("RET;");
+ }
+
+ const auto& subfunctions = ir.GetSubFunctions();
+ auto it = subfunctions.begin();
+ while (it != subfunctions.end()) {
+ context_func = *it;
+ AddLine("func_{}:", context_func->GetId());
+ if (context_func->IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ AddLine("RET;");
+ }
+ it++;
}
AddLine("END");
@@ -1060,41 +1083,38 @@ void ARBDecompiler::InitializeVariables() {
}
void ARBDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
+ const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("TEMP F{};", i);
+ AddLine("TEMP F{};", i + ast_var_base);
}
for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
+ AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i + ast_var_base);
}
- InitializeVariables();
-
- VisitAST(ir.GetASTProgram());
+ VisitAST(context_func->GetASTProgram());
+ ast_var_base += num_flow_variables;
}
void ARBDecompiler::DecompileBranchMode() {
static constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
+ if (!context_func->IsFlowStackDisabled()) {
AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
AddLine("TEMP SSY_TOP;");
AddLine("TEMP PBK_TOP;");
}
- AddLine("TEMP PC;");
+ AddLine("TEMP PC{};", context_func->GetId());
- if (!ir.IsFlowStackDisabled()) {
+ if (!context_func->IsFlowStackDisabled()) {
AddLine("MOV.U SSY_TOP.x, 0;");
AddLine("MOV.U PBK_TOP.x, 0;");
}
- InitializeVariables();
-
- const auto basic_block_end = ir.GetBasicBlocks().end();
- auto basic_block_it = ir.GetBasicBlocks().begin();
+ const auto basic_block_end = context_func->GetBasicBlocks().end();
+ auto basic_block_it = context_func->GetBasicBlocks().begin();
const u32 first_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", first_address);
+ AddLine("MOV.U PC{}.x, {};", context_func->GetId(), first_address);
AddLine("REP;");
@@ -1103,7 +1123,7 @@ void ARBDecompiler::DecompileBranchMode() {
const auto& [address, bb] = *basic_block_it;
++num_blocks;
- AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
+ AddLine("SEQ.S.CC RC.x, PC{}.x, {};", context_func->GetId(), address);
AddLine("IF NE.x;");
VisitBlock(bb);
@@ -1114,7 +1134,7 @@ void ARBDecompiler::DecompileBranchMode() {
const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
if (!op || op->GetCode() != OperationCode::Branch) {
const u32 next_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", next_address);
+ AddLine("MOV.U PC{}.x, {};", context_func->GetId(), next_address);
AddLine("CONT;");
}
}
@@ -1152,7 +1172,8 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
} else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
VisitBlock(decoded->nodes);
} else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
+ AddLine("MOV.U F{}, {};", var_set->index + ast_var_base,
+ VisitExpression(var_set->condition));
ResetTemporaries();
} else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
const std::string condition = VisitExpression(do_while->condition);
@@ -1172,7 +1193,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
ResetTemporaries();
}
if (ast_return->kills) {
- AddLine("KIL TR;");
+ if (stage == ShaderType::Fragment) {
+ AddLine("KIL TR;");
+ } else {
+ AddLine("RET;");
+ }
} else {
Exit();
}
@@ -1219,7 +1244,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) {
return Visit(ir.GetConditionCode(expr->cc));
}
if (const auto expr = std::get_if<ExprVar>(&*node)) {
- return fmt::format("F{}.x", expr->var_index);
+ return fmt::format("F{}.x", expr->var_index + ast_var_base);
}
if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
return expr->value ? "0xffffffff" : "0";
@@ -1406,6 +1431,11 @@ std::string ARBDecompiler::Visit(const Node& node) {
return {};
}
+ if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+ AddLine("CAL func_{};", func_call->GetFuncId());
+ return {};
+ }
+
if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
// Uncommenting this will generate invalid code. GLASM lacks comments.
// AddLine("// {}", cmt->GetText());
@@ -1479,7 +1509,7 @@ std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
}
void ARBDecompiler::Exit() {
- if (stage != ShaderType::Fragment) {
+ if (!context_func->IsMain() || stage != ShaderType::Fragment) {
AddLine("RET;");
return;
}
@@ -2021,13 +2051,13 @@ std::string ARBDecompiler::ImageStore(Operation operation) {
std::string ARBDecompiler::Branch(Operation operation) {
const auto target = std::get<ImmediateNode>(*operation[0]);
- AddLine("MOV.U PC.x, {};", target.GetValue());
+ AddLine("MOV.U PC{}.x, {};", context_func->GetId(), target.GetValue());
AddLine("CONT;");
return {};
}
std::string ARBDecompiler::BranchIndirect(Operation operation) {
- AddLine("MOV.U PC.x, {};", Visit(operation[0]));
+ AddLine("MOV.U PC{}.x, {};", context_func->GetId(), Visit(operation[0]));
AddLine("CONT;");
return {};
}
@@ -2045,7 +2075,7 @@ std::string ARBDecompiler::PopFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const std::string_view stack_name = StackName(stack);
AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
+ AddLine("MOV.U PC{}.x, {}[{}_TOP.x].x;", context_func->GetId(), stack_name, stack_name);
AddLine("CONT;");
return {};
}
@@ -2056,6 +2086,10 @@ std::string ARBDecompiler::Exit(Operation) {
}
std::string ARBDecompiler::Discard(Operation) {
+ if (stage != ShaderType::Fragment) {
+ AddLine("RET;");
+ return {};
+ }
AddLine("KIL TR;");
return {};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ac78d344ca..fa6b10ac37 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -435,6 +435,27 @@ public:
DeclareCustomVariables();
DeclarePhysicalAttributeReader();
+ const auto& subfunctions = ir.GetSubFunctions();
+ auto it = subfunctions.rbegin();
+ while (it != subfunctions.rend()) {
+ context_func = *it;
+ code.AddLine("void func_{}() {{", context_func->GetId());
+ ++code.scope;
+
+ if (context_func->IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+
+ --code.scope;
+ code.AddLine("}}");
+
+ it++;
+ }
+
+ context_func = ir.GetMainFunction();
+
code.AddLine("void main() {{");
++code.scope;
@@ -442,7 +463,7 @@ public:
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
}
- if (ir.IsDecompiled()) {
+ if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
@@ -462,13 +483,13 @@ private:
void DecompileBranchMode() {
// VM's program counter
- const auto first_address = ir.GetBasicBlocks().begin()->first;
+ const auto first_address = context_func->GetBasicBlocks().begin()->first;
code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
+ if (!context_func->IsFlowStackDisabled()) {
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -480,7 +501,7 @@ private:
code.AddLine("switch (jmp_to) {{");
- for (const auto& pair : ir.GetBasicBlocks()) {
+ for (const auto& pair : context_func->GetBasicBlocks()) {
const auto& [address, bb] = pair;
code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
@@ -1131,6 +1152,11 @@ private:
return {};
}
+ if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+ code.AddLine("func_{}();", func_call->GetFuncId());
+ return {};
+ }
+
if (const auto comment = std::get_if<CommentNode>(&*node)) {
code.AddLine("// " + comment->GetText());
return {};
@@ -2267,7 +2293,9 @@ private:
}
Expression Exit(Operation operation) {
- PreExit();
+ if (context_func->IsMain()) {
+ PreExit();
+ }
code.AddLine("return;");
return {};
}
@@ -2277,7 +2305,11 @@ private:
// about unexecuted instructions that may follow this.
code.AddLine("if (true) {{");
++code.scope;
- code.AddLine("discard;");
+ if (stage != ShaderType::Fragment) {
+ code.AddLine("return;");
+ } else {
+ code.AddLine("discard;");
+ }
--code.scope;
code.AddLine("}}");
return {};
@@ -2388,7 +2420,7 @@ private:
}
Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
+ if (!context_func->IsDecompiled()) {
LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
return {};
}
@@ -2755,6 +2787,8 @@ private:
const Header header;
std::unordered_map<u8, VaryingTFB> transform_feedback;
+ std::shared_ptr<ShaderFunctionIR> context_func;
+
ShaderWriter code;
std::optional<u32> max_input_vertices;
@@ -2902,9 +2936,15 @@ public:
decomp.code.scope++;
}
if (ast.kills) {
- decomp.code.AddLine("discard;");
+ if (decomp.stage != ShaderType::Fragment) {
+ decomp.code.AddLine("return;");
+ } else {
+ decomp.code.AddLine("discard;");
+ }
} else {
- decomp.PreExit();
+ if (decomp.context_func->IsMain()) {
+ decomp.PreExit();
+ }
decomp.code.AddLine("return;");
}
if (!is_true) {
@@ -2937,13 +2977,13 @@ private:
};
void GLSLDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
+ const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; i++) {
code.AddLine("bool {} = false;", GetFlowVariable(i));
}
ASTDecompiler decompiler{*this};
- decompiler.Visit(ir.GetASTProgram());
+ decompiler.Visit(context_func->GetASTProgram());
}
} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index c6846d8861..258e2f5df2 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -406,10 +406,38 @@ private:
binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
+ const auto& subfunctions = ir.GetSubFunctions();
+
+ labels.resize(subfunctions.size() + 1);
+ other_functions.resize(subfunctions.size());
+
+ auto it = subfunctions.rbegin();
+ while (it != subfunctions.rend()) {
+ context_func = *it;
+ other_functions[context_func->GetId() - 1] =
+ OpFunction(t_void, {}, TypeFunction(t_void));
+ AddLabel();
+
+ if (context_func->IsDecompiled()) {
+ DeclareFlowVariables();
+ DecompileAST();
+ } else {
+ AllocateLabels();
+ DecompileBranchMode();
+ }
+
+ OpReturn();
+ OpFunctionEnd();
+
+ it++;
+ }
+
+ context_func = ir.GetMainFunction();
+
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
AddLabel();
- if (ir.IsDecompiled()) {
+ if (context_func->IsDecompiled()) {
DeclareFlowVariables();
DecompileAST();
} else {
@@ -441,16 +469,18 @@ private:
void DecompileAST();
void DecompileBranchMode() {
- const u32 first_address = ir.GetBasicBlocks().begin()->first;
- const Id loop_label = OpLabel("loop");
- const Id merge_label = OpLabel("merge");
+ const u32 first_address = context_func->GetBasicBlocks().begin()->first;
+ const u32 func_id = context_func->GetId();
+ const std::string func_id_msg = std::to_string(func_id);
+ const Id loop_label = OpLabel("loop_" + func_id_msg);
+ const Id merge_label = OpLabel("merge_" + func_id_msg);
const Id dummy_label = OpLabel();
const Id jump_label = OpLabel();
- continue_label = OpLabel("continue");
+ continue_label = OpLabel("continue_" + func_id_msg);
std::vector<Sirit::Literal> literals;
std::vector<Id> branch_labels;
- for (const auto& [literal, label] : labels) {
+ for (const auto& [literal, label] : labels[func_id]) {
literals.push_back(literal);
branch_labels.push_back(label);
}
@@ -462,11 +492,11 @@ private:
std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
- Name(jmp_to, "jmp_to");
- Name(ssy_flow_stack, "ssy_flow_stack");
- Name(ssy_flow_stack_top, "ssy_flow_stack_top");
- Name(pbk_flow_stack, "pbk_flow_stack");
- Name(pbk_flow_stack_top, "pbk_flow_stack_top");
+ Name(jmp_to, "jmp_to_" + func_id_msg);
+ Name(ssy_flow_stack, "ssy_flow_stack_" + func_id_msg);
+ Name(ssy_flow_stack_top, "ssy_flow_stack_top_" + func_id_msg);
+ Name(pbk_flow_stack, "pbk_flow_stack_" + func_id_msg);
+ Name(pbk_flow_stack_top, "pbk_flow_stack_top_" + func_id_msg);
DefinePrologue();
@@ -484,13 +514,14 @@ private:
AddLabel(default_branch);
OpReturn();
- for (const auto& [address, bb] : ir.GetBasicBlocks()) {
- AddLabel(labels.at(address));
+ for (const auto& [address, bb] : context_func->GetBasicBlocks()) {
+ AddLabel(labels[func_id].at(address));
VisitBasicBlock(bb);
- const auto next_it = labels.lower_bound(address + 1);
- const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
+ const auto next_it = labels[func_id].lower_bound(address + 1);
+ const Id next_label =
+ next_it != labels[func_id].end() ? next_it->second : default_branch;
OpBranch(next_label);
}
@@ -508,9 +539,10 @@ private:
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateLabels() {
- for (const auto& pair : ir.GetBasicBlocks()) {
+ const u32 func_id = context_func->GetId();
+ for (const auto& pair : context_func->GetBasicBlocks()) {
const u32 address = pair.first;
- labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
+ labels[func_id].emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
}
}
@@ -589,6 +621,14 @@ private:
DeclareOutputVertex();
}
+ void SafeKill() {
+ if (stage != ShaderType::Fragment) {
+ OpReturn();
+ return;
+ }
+ OpKill();
+ }
+
void DeclareFragment() {
if (stage != ShaderType::Fragment) {
return;
@@ -656,7 +696,7 @@ private:
}
void DeclareFlowVariables() {
- for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
+ for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) {
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
flow_variables.emplace(i, AddGlobalVariable(id));
@@ -1333,6 +1373,12 @@ private:
return {};
}
+ if (const auto func_call = std::get_if<FunctionCallNode>(&*node)) {
+ const u32 func_id = func_call->GetFuncId();
+ OpFunctionCall(t_void, other_functions[func_id - 1]);
+ return {};
+ }
+
if (const auto comment = std::get_if<CommentNode>(&*node)) {
if (device.HasDebuggingToolAttached()) {
// We should insert comments with OpString instead of using named variables
@@ -2124,7 +2170,7 @@ private:
OpBranchConditional(condition, true_label, discard_label);
AddLabel(discard_label);
- OpKill();
+ SafeKill();
AddLabel(true_label);
}
@@ -2175,7 +2221,9 @@ private:
}
Expression Exit(Operation operation) {
- PreExit();
+ if (context_func->IsMain()) {
+ PreExit();
+ }
inside_branch = true;
if (conditional_branch_set) {
OpReturn();
@@ -2192,12 +2240,12 @@ private:
Expression Discard(Operation operation) {
inside_branch = true;
if (conditional_branch_set) {
- OpKill();
+ SafeKill();
} else {
const Id dummy = OpLabel();
OpBranch(dummy);
AddLabel(dummy);
- OpKill();
+ SafeKill();
AddLabel();
}
return {};
@@ -2276,7 +2324,7 @@ private:
}
Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
+ if (!context_func->IsDecompiled()) {
LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
return {};
}
@@ -2770,6 +2818,8 @@ private:
const Specialization& specialization;
std::unordered_map<u8, VaryingTFB> transform_feedback;
+ std::shared_ptr<ShaderFunctionIR> context_func;
+
const Id t_void = Name(TypeVoid(), "void");
const Id t_bool = Name(TypeBool(), "bool");
@@ -2896,7 +2946,8 @@ private:
Id ssy_flow_stack{};
Id pbk_flow_stack{};
Id continue_label{};
- std::map<u32, Id> labels;
+ std::vector<std::map<u32, Id>> labels;
+ std::vector<Id> other_functions;
bool conditional_branch_set{};
bool inside_branch{};
@@ -3047,9 +3098,11 @@ public:
decomp.OpBranchConditional(condition, then_label, endif_label);
decomp.AddLabel(then_label);
if (ast.kills) {
- decomp.OpKill();
+ decomp.SafeKill();
} else {
- decomp.PreExit();
+ if (decomp.context_func->IsMain()) {
+ decomp.PreExit();
+ }
decomp.OpReturn();
}
decomp.AddLabel(endif_label);
@@ -3058,9 +3111,11 @@ public:
decomp.OpBranch(next_block);
decomp.AddLabel(next_block);
if (ast.kills) {
- decomp.OpKill();
+ decomp.SafeKill();
} else {
- decomp.PreExit();
+ if (decomp.context_func->IsMain()) {
+ decomp.PreExit();
+ }
decomp.OpReturn();
}
decomp.AddLabel(decomp.OpLabel());
@@ -3097,7 +3152,7 @@ private:
};
void SPIRVDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
+ const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; i++) {
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
Name(id, fmt::format("flow_var_{}", i));
@@ -3106,7 +3161,7 @@ void SPIRVDecompiler::DecompileAST() {
DefinePrologue();
- const ASTNode program = ir.GetASTProgram();
+ const ASTNode program = context_func->GetASTProgram();
ASTDecompiler decompiler{*this};
decompiler.Visit(program);
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 43d965f2fc..7c8bd7e2f1 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -7,6 +7,7 @@
#include <set>
#include <stack>
#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "common/assert.h"
@@ -26,17 +27,29 @@ using Tegra::Shader::OpCode;
constexpr s32 unassigned_branch = -2;
+enum class JumpLabel : u32 {
+ SSYClass = 0,
+ PBKClass = 1,
+};
+
+struct JumpItem {
+ JumpLabel type;
+ u32 address;
+
+ bool operator==(const JumpItem& other) const {
+ return std::tie(type, address) == std::tie(other.type, other.address);
+ }
+};
+
struct Query {
u32 address{};
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
+ std::stack<JumpItem> stack{};
};
struct BlockStack {
BlockStack() = default;
- explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
- std::stack<u32> ssy_stack{};
- std::stack<u32> pbk_stack{};
+ explicit BlockStack(const Query& q) : stack{q.stack} {}
+ std::stack<JumpItem> stack{};
};
template <typename T, typename... Args>
@@ -65,20 +78,36 @@ struct BlockInfo {
}
};
+struct ProgramControl {
+ std::unordered_set<u32> found_functions{};
+ std::list<u32> pending_functions{};
+
+ void RegisterFunction(u32 address) {
+ if (found_functions.count(address) != 0) {
+ return;
+ }
+ found_functions.insert(address);
+ pending_functions.emplace_back(address);
+ }
+};
+
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
- : program_code{program_code_}, registry{registry_}, start{start_} {}
+ explicit CFGRebuildState(ProgramControl& control_, const ProgramCode& program_code_, u32 start_,
+ u32 base_start_, Registry& registry_)
+ : control{control_}, program_code{program_code_}, registry{registry_}, start{start_},
+ base_start{base_start_} {}
+ ProgramControl& control;
const ProgramCode& program_code;
Registry& registry;
u32 start{};
+ u32 base_start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
std::list<Query> queries;
std::unordered_map<u32, u32> registered;
std::set<u32> labels;
- std::map<u32, u32> ssy_labels;
- std::map<u32, u32> pbk_labels;
+ std::map<u32, JumpItem> jump_labels;
std::unordered_map<u32, BlockStack> stacks;
ASTManager* manager{};
};
@@ -153,7 +182,7 @@ template <typename Result, typename TestCallable, typename PackCallable>
std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
PackCallable pack) {
for (; pos >= state.start; --pos) {
- if (IsSchedInstruction(pos, state.start)) {
+ if (IsSchedInstruction(pos, state.base_start)) {
continue;
}
const Instruction instr = state.program_code[pos];
@@ -262,7 +291,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
single_branch.ignore = true;
break;
}
- if (IsSchedInstruction(offset, state.start)) {
+ if (IsSchedInstruction(offset, state.base_start)) {
offset++;
continue;
}
@@ -274,6 +303,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
}
switch (opcode->get().GetId()) {
+ case OpCode::Id::RET:
case OpCode::Id::EXIT: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
@@ -411,13 +441,20 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
case OpCode::Id::SSY: {
const u32 target = offset + instr.bra.GetBranchTarget();
insert_label(state, target);
- state.ssy_labels.emplace(offset, target);
+ JumpItem it = {JumpLabel::SSYClass, target};
+ state.jump_labels.emplace(offset, it);
break;
}
case OpCode::Id::PBK: {
const u32 target = offset + instr.bra.GetBranchTarget();
insert_label(state, target);
- state.pbk_labels.emplace(offset, target);
+ JumpItem it = {JumpLabel::PBKClass, target};
+ state.jump_labels.emplace(offset, it);
+ break;
+ }
+ case OpCode::Id::CAL: {
+ const u32 target = offset + instr.bra.GetBranchTarget();
+ state.control.RegisterFunction(target);
break;
}
case OpCode::Id::BRX: {
@@ -513,7 +550,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
}
bool TryQuery(CFGRebuildState& state) {
- const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
+ const auto gather_labels = [](std::stack<JumpItem>& cc, std::map<u32, JumpItem>& labels,
BlockInfo& block) {
auto gather_start = labels.lower_bound(block.start);
const auto gather_end = labels.upper_bound(block.end);
@@ -522,6 +559,19 @@ bool TryQuery(CFGRebuildState& state) {
++gather_start;
}
};
+ const auto pop_labels = [](JumpLabel type, SingleBranch* branch, Query& query) -> bool {
+ while (!query.stack.empty() && query.stack.top().type != type) {
+ query.stack.pop();
+ }
+ if (query.stack.empty()) {
+ return false;
+ }
+ if (branch->address == unassigned_branch) {
+ branch->address = query.stack.top().address;
+ }
+ query.stack.pop();
+ return true;
+ };
if (state.queries.empty()) {
return false;
}
@@ -534,8 +584,7 @@ bool TryQuery(CFGRebuildState& state) {
// consumes a label. Schedule new queries accordingly
if (block.visited) {
BlockStack& stack = state.stacks[q.address];
- const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
- (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
+ const bool all_okay = (stack.stack.empty() || q.stack == stack.stack);
state.queries.pop_front();
return all_okay;
}
@@ -544,8 +593,7 @@ bool TryQuery(CFGRebuildState& state) {
Query q2(q);
state.queries.pop_front();
- gather_labels(q2.ssy_stack, state.ssy_labels, block);
- gather_labels(q2.pbk_stack, state.pbk_labels, block);
+ gather_labels(q2.stack, state.jump_labels, block);
if (std::holds_alternative<SingleBranch>(*block.branch)) {
auto* branch = std::get_if<SingleBranch>(block.branch.get());
if (!branch->condition.IsUnconditional()) {
@@ -555,16 +603,10 @@ bool TryQuery(CFGRebuildState& state) {
auto& conditional_query = state.queries.emplace_back(q2);
if (branch->is_sync) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.ssy_stack.top();
- }
- conditional_query.ssy_stack.pop();
+ pop_labels(JumpLabel::SSYClass, branch, conditional_query);
}
if (branch->is_brk) {
- if (branch->address == unassigned_branch) {
- branch->address = conditional_query.pbk_stack.top();
- }
- conditional_query.pbk_stack.pop();
+ pop_labels(JumpLabel::PBKClass, branch, conditional_query);
}
conditional_query.address = branch->address;
return true;
@@ -646,25 +688,23 @@ void DecompileShader(CFGRebuildState& state) {
state.manager->Decompile();
}
-} // Anonymous namespace
-
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry) {
- auto result_out = std::make_unique<ShaderCharacteristics>();
+ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_code,
+ u32 start_address, u32 base_start, const CompilerSettings& settings,
+ Registry& registry) {
+ ShaderFunction result_out{};
if (settings.depth == CompileDepth::BruteForce) {
- result_out->settings.depth = CompileDepth::BruteForce;
+ result_out.settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, start_address, registry};
+ CFGRebuildState state{control, program_code, start_address, base_start, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
state.inspect_queries.push_back(state.start);
while (!state.inspect_queries.empty()) {
if (!TryInspectAddress(state)) {
- result_out->settings.depth = CompileDepth::BruteForce;
+ result_out.settings.depth = CompileDepth::BruteForce;
return result_out;
}
}
@@ -675,7 +715,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
if (settings.depth != CompileDepth::FlowStack) {
// Decompile Stacks
- state.queries.push_back(Query{state.start, {}, {}});
+ state.queries.push_back(Query{state.start, {}});
decompiled = true;
while (!state.queries.empty()) {
if (!TryQuery(state)) {
@@ -705,19 +745,18 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
state.manager->ShowCurrentState("Of Shader");
state.manager->Clear();
} else {
- auto characteristics = std::make_unique<ShaderCharacteristics>();
- characteristics->start = start_address;
- characteristics->settings.depth = settings.depth;
- characteristics->manager = std::move(manager);
- characteristics->end = state.block_info.back().end + 1;
- return characteristics;
+ result_out.start = start_address;
+ result_out.settings.depth = settings.depth;
+ result_out.manager = std::move(manager);
+ result_out.end = state.block_info.back().end + 1;
+ return result_out;
}
}
- result_out->start = start_address;
- result_out->settings.depth =
+ result_out.start = start_address;
+ result_out.settings.depth =
use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
- result_out->blocks.clear();
+ result_out.blocks.clear();
for (auto& block : state.block_info) {
ShaderBlock new_block{};
new_block.start = block.start;
@@ -726,20 +765,20 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
if (!new_block.ignore_branch) {
new_block.branch = block.branch;
}
- result_out->end = std::max(result_out->end, block.end);
- result_out->blocks.push_back(new_block);
+ result_out.end = std::max(result_out.end, block.end);
+ result_out.blocks.push_back(new_block);
}
if (!use_flow_stack) {
- result_out->labels = std::move(state.labels);
+ result_out.labels = std::move(state.labels);
return result_out;
}
- auto back = result_out->blocks.begin();
+ auto back = result_out.blocks.begin();
auto next = std::next(back);
- while (next != result_out->blocks.end()) {
+ while (next != result_out.blocks.end()) {
if (!state.labels.contains(next->start) && next->start == back->end + 1) {
back->end = next->end;
- next = result_out->blocks.erase(next);
+ next = result_out.blocks.erase(next);
continue;
}
back = next;
@@ -748,4 +787,22 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
return result_out;
}
+
+} // Anonymous namespace
+
+std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings, Registry& registry) {
+ ProgramControl control{};
+ auto result_out = std::make_unique<ShaderProgram>();
+ result_out->main =
+ ScanFunction(control, program_code, start_address, start_address, settings, registry);
+ while (!control.pending_functions.empty()) {
+ u32 address = control.pending_functions.front();
+ auto fun = ScanFunction(control, program_code, address, start_address, settings, registry);
+ result_out->subfunctions.emplace(address, std::move(fun));
+ control.pending_functions.pop_front();
+ }
+ return result_out;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37bf964928..5ef2251b95 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -5,6 +5,7 @@
#pragma once
#include <list>
+#include <map>
#include <optional>
#include <set>
#include <variant>
@@ -101,7 +102,7 @@ struct ShaderBlock {
}
};
-struct ShaderCharacteristics {
+struct ShaderFunction {
std::list<ShaderBlock> blocks{};
std::set<u32> labels{};
u32 start{};
@@ -110,8 +111,12 @@ struct ShaderCharacteristics {
CompilerSettings settings{};
};
-std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
- const CompilerSettings& settings,
- Registry& registry);
+struct ShaderProgram {
+ ShaderFunction main;
+ std::map<u32, ShaderFunction> subfunctions;
+};
+
+std::unique_ptr<ShaderProgram> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings, Registry& registry);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6576d12089..355c724a3a 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -64,9 +64,52 @@ std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
} // Anonymous namespace
+class ExprDecoder {
+public:
+ explicit ExprDecoder(ShaderIR& ir_) : ir(ir_) {}
+
+ void operator()(const ExprAnd& expr) {
+ Visit(expr.operand1);
+ Visit(expr.operand2);
+ }
+
+ void operator()(const ExprOr& expr) {
+ Visit(expr.operand1);
+ Visit(expr.operand2);
+ }
+
+ void operator()(const ExprNot& expr) {
+ Visit(expr.operand1);
+ }
+
+ void operator()(const ExprPredicate& expr) {
+ const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+ if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
+ ir.used_predicates.insert(pred);
+ }
+ }
+
+ void operator()(const ExprCondCode& expr) {}
+
+ void operator()(const ExprVar& expr) {}
+
+ void operator()(const ExprBoolean& expr) {}
+
+ void operator()(const ExprGprEqual& expr) {
+ ir.used_registers.insert(expr.gpr);
+ }
+
+ void Visit(const Expr& node) {
+ return std::visit(*this, *node);
+ }
+
+private:
+ ShaderIR& ir;
+};
+
class ASTDecoder {
public:
- explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
+ explicit ASTDecoder(ShaderIR& ir_) : ir(ir_), decoder(ir_) {}
void operator()(ASTProgram& ast) {
ASTNode current = ast.nodes.GetFirst();
@@ -77,6 +120,7 @@ public:
}
void operator()(ASTIfThen& ast) {
+ decoder.Visit(ast.condition);
ASTNode current = ast.nodes.GetFirst();
while (current) {
Visit(current);
@@ -96,13 +140,18 @@ public:
void operator()(ASTBlockDecoded& ast) {}
- void operator()(ASTVarSet& ast) {}
+ void operator()(ASTVarSet& ast) {
+ decoder.Visit(ast.condition);
+ }
void operator()(ASTLabel& ast) {}
- void operator()(ASTGoto& ast) {}
+ void operator()(ASTGoto& ast) {
+ decoder.Visit(ast.condition);
+ }
void operator()(ASTDoWhile& ast) {
+ decoder.Visit(ast.condition);
ASTNode current = ast.nodes.GetFirst();
while (current) {
Visit(current);
@@ -110,9 +159,13 @@ public:
}
}
- void operator()(ASTReturn& ast) {}
+ void operator()(ASTReturn& ast) {
+ decoder.Visit(ast.condition);
+ }
- void operator()(ASTBreak& ast) {}
+ void operator()(ASTBreak& ast) {
+ decoder.Visit(ast.condition);
+ }
void Visit(ASTNode& node) {
std::visit(*this, *node->GetInnerData());
@@ -125,77 +178,113 @@ public:
private:
ShaderIR& ir;
+ ExprDecoder decoder;
};
void ShaderIR::Decode() {
- std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
- decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, registry);
- auto& shader_info = *info;
- coverage_begin = shader_info.start;
- coverage_end = shader_info.end;
- switch (shader_info.settings.depth) {
- case CompileDepth::FlowStack: {
- for (const auto& block : shader_info.blocks) {
- basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
- }
- break;
- }
- case CompileDepth::NoFlowStack: {
- disable_flow_stack = true;
- const auto insert_block = [this](NodeBlock& nodes, u32 label) {
- if (label == static_cast<u32>(exit_branch)) {
- return;
+ const auto decode_function = ([this](ShaderFunction& shader_info) {
+ coverage_end = std::max<u32>(0, shader_info.end);
+ switch (shader_info.settings.depth) {
+ case CompileDepth::FlowStack: {
+ for (const auto& block : shader_info.blocks) {
+ basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
- basic_blocks.insert({label, nodes});
- };
- const auto& blocks = shader_info.blocks;
- NodeBlock current_block;
- u32 current_label = static_cast<u32>(exit_branch);
- for (const auto& block : blocks) {
- if (shader_info.labels.contains(block.start)) {
- insert_block(current_block, current_label);
- current_block.clear();
- current_label = block.start;
+ break;
+ }
+ case CompileDepth::NoFlowStack: {
+ disable_flow_stack = true;
+ const auto insert_block = [this](NodeBlock& nodes, u32 label) {
+ if (label == static_cast<u32>(exit_branch)) {
+ return;
+ }
+ basic_blocks.insert({label, nodes});
+ };
+ const auto& blocks = shader_info.blocks;
+ NodeBlock current_block;
+ u32 current_label = static_cast<u32>(exit_branch);
+ for (const auto& block : blocks) {
+ if (shader_info.labels.contains(block.start)) {
+ insert_block(current_block, current_label);
+ current_block.clear();
+ current_label = block.start;
+ }
+ if (!block.ignore_branch) {
+ DecodeRangeInner(current_block, block.start, block.end);
+ InsertControlFlow(current_block, block);
+ } else {
+ DecodeRangeInner(current_block, block.start, block.end + 1);
+ }
}
- if (!block.ignore_branch) {
- DecodeRangeInner(current_block, block.start, block.end);
- InsertControlFlow(current_block, block);
- } else {
- DecodeRangeInner(current_block, block.start, block.end + 1);
+ insert_block(current_block, current_label);
+ break;
+ }
+ case CompileDepth::DecompileBackwards:
+ case CompileDepth::FullDecompile: {
+ program_manager = std::move(shader_info.manager);
+ disable_flow_stack = true;
+ decompiled = true;
+ ASTDecoder decoder{*this};
+ ASTNode program = program_manager.GetProgram();
+ decoder.Visit(program);
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
+ [[fallthrough]];
+ case CompileDepth::BruteForce: {
+ const auto shader_end = static_cast<u32>(program_code.size());
+ coverage_begin = main_offset;
+ coverage_end = shader_end;
+ for (u32 label = main_offset; label < shader_end; ++label) {
+ basic_blocks.insert({label, DecodeRange(label, label + 1)});
}
+ break;
}
- insert_block(current_block, current_label);
- break;
- }
- case CompileDepth::DecompileBackwards:
- case CompileDepth::FullDecompile: {
- program_manager = std::move(shader_info.manager);
- disable_flow_stack = true;
- decompiled = true;
- ASTDecoder decoder{*this};
- ASTNode program = GetASTProgram();
- decoder.Visit(program);
- break;
- }
- default:
- LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
- [[fallthrough]];
- case CompileDepth::BruteForce: {
- const auto shader_end = static_cast<u32>(program_code.size());
- coverage_begin = main_offset;
- coverage_end = shader_end;
- for (u32 label = main_offset; label < shader_end; ++label) {
- basic_blocks.insert({label, DecodeRange(label, label + 1)});
}
- break;
- }
+ if (settings.depth != shader_info.settings.depth) {
+ LOG_WARNING(
+ HW_GPU,
+ "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
+ CompileDepthAsString(settings.depth),
+ CompileDepthAsString(shader_info.settings.depth));
+ }
+ });
+ const auto gen_function =
+ ([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr<ShaderFunctionIR> {
+ std::shared_ptr<ShaderFunctionIR> result;
+ if (decompiled) {
+ result = std::make_shared<ShaderFunctionIR>(std::move(program_manager), id,
+ shader_info.start, shader_info.end);
+ } else {
+ result =
+ std::make_shared<ShaderFunctionIR>(std::move(basic_blocks), disable_flow_stack,
+ id, shader_info.start, shader_info.end);
+ }
+ decompiled = false;
+ disable_flow_stack = false;
+ basic_blocks.clear();
+ program_manager.Clear();
+ return result;
+ });
+ std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+
+ decompiled = false;
+ auto info = ScanFlow(program_code, main_offset, settings, registry);
+ u32 id_start = 1;
+ for (auto& pair : info->subfunctions) {
+ func_map.emplace(pair.first, id_start);
+ id_start++;
}
- if (settings.depth != shader_info.settings.depth) {
- LOG_WARNING(
- HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
- CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
+ coverage_begin = info->main.start;
+ coverage_end = 0;
+ decode_function(info->main);
+ main_function = gen_function(info->main, 0);
+ subfunctions.resize(info->subfunctions.size());
+ for (auto& pair : info->subfunctions) {
+ auto& func_info = pair.second;
+ decode_function(func_info);
+ u32 id = func_map[pair.first];
+ subfunctions[id - 1] = gen_function(func_info, id);
}
}
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 5f88537bc4..2bc596512a 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -33,6 +33,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
// With the previous preconditions, this instruction is a no-operation.
break;
}
+ case OpCode::Id::RET:
case OpCode::Id::EXIT: {
const ConditionCode cc = instr.flow_condition_code;
UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
@@ -312,6 +313,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
+ case OpCode::Id::CAL: {
+ const u32 target = pc + instr.bra.GetBranchTarget();
+ const auto it = func_map.find(target);
+ if (it == func_map.end()) {
+ UNREACHABLE();
+ break;
+ }
+ bb.push_back(FunctionCall(it->second));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b54d33763d..a58e7c65e4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -267,10 +267,11 @@ class PatchNode;
class SmemNode;
class GmemNode;
class CommentNode;
+class FunctionCallNode;
using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
- LmemNode, SmemNode, GmemNode, CommentNode>;
+ LmemNode, SmemNode, GmemNode, FunctionCallNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -494,6 +495,18 @@ private:
std::vector<Node> code; ///< Code to execute
};
+class FunctionCallNode final : public AmendNode {
+public:
+ explicit FunctionCallNode(u32 func_id_) : func_id{func_id_} {}
+
+ [[nodiscard]] u32 GetFuncId() const {
+ return func_id;
+ }
+
+private:
+ u32 func_id; ///< Id of the function to call
+};
+
/// A general purpose register
class GprNode final {
public:
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6a5b6940d1..cef9c26bc3 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -19,6 +19,11 @@ Node Comment(std::string text) {
return MakeNode<CommentNode>(std::move(text));
}
+/// Creates a function call
+Node FunctionCall(u32 func_id) {
+ return MakeNode<FunctionCallNode>(func_id);
+}
+
Node Immediate(u32 value) {
return MakeNode<ImmediateNode>(value);
}
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 1e0886185d..3f882cd25d 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -27,6 +27,9 @@ Node Conditional(Node condition, std::vector<Node> code);
/// Creates a commentary node
Node Comment(std::string text);
+/// Creates a function call
+Node FunctionCall(u32 func_id);
+
/// Creates an u32 immediate
Node Immediate(u32 value);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1cd7c14d76..94715b0699 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -26,7 +26,7 @@ namespace VideoCommon::Shader {
struct ShaderBlock;
-constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
+constexpr u32 MAX_PROGRAM_LENGTH = 0x2000;
struct ConstBuffer {
constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
@@ -64,16 +64,68 @@ struct GlobalMemoryUsage {
bool is_written{};
};
-class ShaderIR final {
+class ShaderFunctionIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
- CompilerSettings settings_, Registry& registry_);
- ~ShaderIR();
+ explicit ShaderFunctionIR(std::map<u32, NodeBlock>&& basic_blocks_, bool disable_flow_stack_,
+ u32 id_, u32 coverage_begin_, u32 coverage_end_)
+ : basic_blocks{std::move(basic_blocks_)}, decompiled{false},
+ disable_flow_stack{disable_flow_stack_}, id{id_}, coverage_begin{coverage_begin_},
+ coverage_end{coverage_end_} {}
+ explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_,
+ u32 coverage_end_)
+ : program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true},
+ id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {}
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
}
+ [[nodiscard]] bool IsFlowStackDisabled() const {
+ return disable_flow_stack;
+ }
+
+ [[nodiscard]] bool IsDecompiled() const {
+ return decompiled;
+ }
+
+ const ASTManager& GetASTManager() const {
+ return program_manager;
+ }
+
+ [[nodiscard]] ASTNode GetASTProgram() const {
+ return program_manager.GetProgram();
+ }
+
+ [[nodiscard]] u32 GetASTNumVariables() const {
+ return program_manager.GetVariables();
+ }
+
+ [[nodiscard]] bool IsMain() const {
+ return id == 0;
+ }
+
+ [[nodiscard]] u32 GetId() const {
+ return id;
+ }
+
+private:
+ std::map<u32, NodeBlock> basic_blocks;
+ ASTManager program_manager{true, true};
+
+ bool decompiled{};
+ bool disable_flow_stack{};
+ u32 id{};
+
+ u32 coverage_begin{};
+ u32 coverage_end{};
+};
+
+class ShaderIR final {
+public:
+ explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
+ CompilerSettings settings_, Registry& registry_);
+ ~ShaderIR();
+
const std::set<u32>& GetRegisters() const {
return used_registers;
}
@@ -155,26 +207,6 @@ public:
return header;
}
- bool IsFlowStackDisabled() const {
- return disable_flow_stack;
- }
-
- bool IsDecompiled() const {
- return decompiled;
- }
-
- const ASTManager& GetASTManager() const {
- return program_manager;
- }
-
- ASTNode GetASTProgram() const {
- return program_manager.GetProgram();
- }
-
- u32 GetASTNumVariables() const {
- return program_manager.GetVariables();
- }
-
u32 ConvertAddressToNvidiaSpace(u32 address) const {
return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
}
@@ -190,7 +222,16 @@ public:
return num_custom_variables;
}
+ std::shared_ptr<ShaderFunctionIR> GetMainFunction() const {
+ return main_function;
+ }
+
+ const std::vector<std::shared_ptr<ShaderFunctionIR>>& GetSubFunctions() const {
+ return subfunctions;
+ }
+
private:
+ friend class ExprDecoder;
friend class ASTDecoder;
struct SamplerInfo {
@@ -453,6 +494,10 @@ private:
std::vector<Node> amend_code;
u32 num_custom_variables{};
+ std::shared_ptr<ShaderFunctionIR> main_function;
+ std::vector<std::shared_ptr<ShaderFunctionIR>> subfunctions;
+ std::unordered_map<u32, u32> func_map;
+
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;