From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: shader: Initial recompiler work --- .../frontend/maxwell/control_flow.cpp | 531 +++++++++++++++++++++ 1 file changed, 531 insertions(+) create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.cpp (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 0000000000..fc4dba8269 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -0,0 +1,531 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <optional> +#include <ranges> +#include <string> +#include <utility> + +#include <fmt/format.h> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell::Flow { + +static u32 BranchOffset(Location pc, Instruction inst) { + return pc.Offset() + inst.branch.Offset() + 8; +} + +static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { + if (pc <= block.begin || pc >= block.end) { + throw InvalidArgument("Invalid address to split={}", pc); + } + return { + Block{ + .begin{block.begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{block.id}, + .stack{block.stack}, + .cond{true}, + .branch_true{new_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }, + Block{ + .begin{pc}, + .end{block.end}, + .end_class{block.end_class}, + .id{new_id}, + .stack{std::move(block.stack)}, + .cond{block.cond}, + .branch_true{block.branch_true}, + .branch_false{block.branch_false}, + }, + }; +} + +static Token OpcodeToken(Opcode opcode) { + switch (opcode) { + case Opcode::PBK: + case Opcode::BRK: + return Token::PBK; + case Opcode::PCNT: + case Opcode::CONT: + return Token::PBK; + case Opcode::PEXIT: + case Opcode::EXIT: + return Token::PEXIT; + case Opcode::PLONGJMP: + case Opcode::LONGJMP: + return Token::PLONGJMP; + case Opcode::PRET: + case Opcode::RET: + case Opcode::CAL: + return Token::PRET; + case Opcode::SSY: + case Opcode::SYNC: + return Token::SSY; + default: + throw InvalidArgument("{}", opcode); + } +} + +static bool IsAbsoluteJump(Opcode opcode) { + switch (opcode) { + case Opcode::JCAL: + case Opcode::JMP: + case Opcode::JMX: + return true; + default: + return false; + } +} + +static bool HasFlowTest(Opcode opcode) { + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::EXIT: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::RET: + case Opcode::SYNC: + return true; + case Opcode::CAL: + case Opcode::JCAL: + return false; + default: + throw InvalidArgument("Invalid branch {}", opcode); + } +} + +static std::string Name(const Block& block) { + if (block.begin.IsVirtual()) { + return fmt::format("\"Virtual {}\"", block.id); + } else { + return fmt::format("\"{}\"", block.begin); + } +} + +void Stack::Push(Token token, Location target) { + entries.push_back({ + .token{token}, + .target{target}, + }); +} + +std::pair<Location, Stack> Stack::Pop(Token token) const { + const std::optional<Location> pc{Peek(token)}; + if (!pc) { + throw LogicError("Token could not be found"); + } + return {*pc, Remove(token)}; +} + +std::optional<Location> Stack::Peek(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + if (it == reverse_entries.end()) { + return std::nullopt; + } + return it->target; +} + +Stack Stack::Remove(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + const auto pos{std::distance(reverse_entries.begin(), it)}; + Stack result; + result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); + return result; +} + +bool Block::Contains(Location pc) const noexcept { + return pc >= begin && pc < end; +} + +Function::Function(Location start_address) + : entrypoint{start_address}, labels{Label{ + .address{start_address}, + .block_id{0}, + .stack{}, + }} {} + +CFG::CFG(Environment& env_, Location start_address) : env{env_} { + functions.emplace_back(start_address); + for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { + while (!functions[function_id].labels.empty()) { + Function& function{functions[function_id]}; + Label label{function.labels.back()}; + function.labels.pop_back(); + AnalyzeLabel(function_id, label); + } + } +} + +void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { + if (InspectVisitedBlocks(function_id, label)) { + // Label address has been visited + return; + } + // Try to find the next block + Function* function{&functions[function_id]}; + Location pc{label.address}; + const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, + [function](Location pc, u32 block_index) { + return pc < function->blocks_data[block_index].begin; + })}; + const auto next_index{std::distance(function->blocks.begin(), next)}; + const bool is_last{next == function->blocks.end()}; + Location next_pc; + BlockId next_id{UNREACHABLE_BLOCK_ID}; + if (!is_last) { + next_pc = function->blocks_data[*next].begin; + next_id = function->blocks_data[*next].id; + } + // Insert before the next block + Block block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{label.block_id}, + .stack{std::move(label.stack)}, + .cond{true}, + .branch_true{UNREACHABLE_BLOCK_ID}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }; + // Analyze instructions until it reaches an already visited block or there's a branch + bool is_branch{false}; + while (is_last || pc < next_pc) { + is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; + if (is_branch) { + break; + } + ++pc; + } + if (!is_branch) { + // If the block finished without a branch, + // it means that the next instruction is already visited, jump to it + block.end = pc; + block.cond = true; + block.branch_true = next_id; + block.branch_false = UNREACHABLE_BLOCK_ID; + } + // Function's pointer might be invalid, resolve it again + function = &functions[function_id]; + const u32 new_block_index = static_cast<u32>(function->blocks_data.size()); + function->blocks.insert(function->blocks.begin() + next_index, new_block_index); + function->blocks_data.push_back(std::move(block)); +} + +bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { + const Location pc{label.address}; + Function& function{functions[function_id]}; + const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { + return function.blocks_data[block_index].Contains(pc); + })}; + if (it == function.blocks.end()) { + // Address has not been visited + return false; + } + Block& block{function.blocks_data[*it]}; + if (block.begin == pc) { + throw LogicError("Dangling branch"); + } + const u32 first_index{*it}; + const u32 second_index{static_cast<u32>(function.blocks_data.size())}; + const std::array new_indices{first_index, second_index}; + std::array split_blocks{Split(std::move(block), pc, label.block_id)}; + function.blocks_data[*it] = std::move(split_blocks[0]); + function.blocks_data.push_back(std::move(split_blocks[1])); + function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + return true; +} + +CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { + const Instruction inst{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(inst.raw)}; + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::RET: + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::BRX: + case Opcode::JMX: + AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::RET: + block.end_class = EndClass::Return; + break; + default: + break; + } + block.end = pc; + return AnalysisState::Branch; + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::SYNC: { + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; + block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block.end = pc; + return AnalysisState::Branch; + } + case Opcode::PBK: + case Opcode::PCNT: + case Opcode::PEXIT: + case Opcode::PLONGJMP: + case Opcode::SSY: + block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + return AnalysisState::Continue; + case Opcode::EXIT: + return AnalyzeEXIT(block, function_id, pc, inst); + case Opcode::PRET: + throw NotImplementedException("PRET flow analysis"); + case Opcode::CAL: + case Opcode::JCAL: { + const bool is_absolute{IsAbsoluteJump(opcode)}; + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function into the list if it doesn't exist + if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { + functions.push_back(cal_pc); + } + // Handle CAL like a regular instruction + break; + } + default: + break; + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{true} || pred == Predicate{false}) { + return AnalysisState::Continue; + } + const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + return AnalysisState::Branch; +} + +void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, + EndClass insn_end_class, IR::Condition cond) { + if (block.begin != pc) { + // If the block doesn't start in the conditional instruction + // mark it as a label to visit it later + block.end = pc; + block.cond = true; + block.branch_true = AddLabel(block, block.stack, pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return; + } + // Impersonate the visited block with a virtual block + // Jump from this virtual to the real conditional instruction and the next instruction + Function& function{functions[function_id]}; + const BlockId conditional_block_id{++function.current_block_id}; + function.blocks.push_back(static_cast<u32>(function.blocks_data.size())); + Block& virtual_block{function.blocks_data.emplace_back(Block{ + .begin{}, // Virtual block + .end{}, + .end_class{EndClass::Branch}, + .id{block.id}, // Impersonating + .stack{block.stack}, + .cond{cond}, + .branch_true{conditional_block_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + })}; + // Set the end properties of the conditional instruction and give it a new identity + Block& conditional_block{block}; + conditional_block.end = pc; + conditional_block.end_class = insn_end_class; + conditional_block.id = conditional_block_id; + // Add a label to the instruction after the conditional instruction + const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + // Branch to the next instruction from the virtual block + virtual_block.branch_false = endif_block_id; + // And branch to it from the conditional instruction if it is a branch + if (insn_end_class == EndClass::Branch) { + conditional_block.cond = true; + conditional_block.branch_true = endif_block_id; + conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + } +} + +bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode) { + if (inst.branch.is_cbuf) { + throw NotImplementedException("Branch with constant buffer offset"); + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false}) { + return false; + } + const bool has_flow_test{HasFlowTest(opcode)}; + const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + block.cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); + block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + } else { + block.cond = true; + } + return true; +} + +void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute) { + const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); +} + +void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { + throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +} + +void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function to the function list if it doesn't exist + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + if (it == functions.end()) { + functions.emplace_back(cal_pc); + } +} + +CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, + Instruction inst) { + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { + // EXIT will never be taken + return AnalysisState::Continue; + } + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + if (block.stack.Peek(Token::PEXIT).has_value()) { + throw NotImplementedException("Conditional EXIT with PEXIT token"); + } + const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + return AnalysisState::Branch; + } + if (const std::optional<Location> exit_pc{block.stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; + block.cond = true; + block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return AnalysisState::Branch; + } + block.end = pc; + block.end_class = EndClass::Exit; + return AnalysisState::Branch; +} + +BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { + Function& function{functions[function_id]}; + if (block.begin == pc) { + return block.id; + } + const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; + if (target != function.blocks_data.end()) { + return target->id; + } + const BlockId block_id{++function.current_block_id}; + function.labels.push_back(Label{ + .address{pc}, + .block_id{block_id}, + .stack{std::move(stack)}, + }); + return block_id; +} + +std::string CFG::Dot() const { + int node_uid{0}; + + std::string dot{"digraph shader {\n"}; + for (const Function& function : functions) { + dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const u32 block_index : function.blocks) { + const Block& block{function.blocks_data[block_index]}; + const std::string name{Name(block)}; + const auto add_branch = [&](BlockId branch_id, bool add_label) { + const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; + dot += fmt::format("\t\t{}->", name); + if (it == function.blocks_data.end()) { + dot += fmt::format("\"Unknown label {}\"", branch_id); + } else { + dot += Name(*it); + }; + if (add_label && block.cond != true && block.cond != false) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != false) { + add_branch(block.branch_true, true); + } + if (block.cond != true) { + add_branch(block.branch_false, false); + } + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Return: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Unreachable: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format( + "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); + ++node_uid; + break; + } + } + if (function.entrypoint == 8) { + dot += fmt::format("\t\tlabel = \"main\";\n"); + } else { + dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); + } + dot += "\t}\n"; + } + if (!functions.empty()) { + if (functions.front().blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + } + dot += "}\n"; + return dot; +} + +} // namespace Shader::Maxwell::Flow -- cgit v1.2.3-70-g09d2 From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: shader: SSA and dominance --- src/shader_recompiler/CMakeLists.txt | 3 + src/shader_recompiler/frontend/ir/basic_block.cpp | 51 +++++-- src/shader_recompiler/frontend/ir/basic_block.h | 20 ++- src/shader_recompiler/frontend/ir/function.cpp | 5 + src/shader_recompiler/frontend/ir/function.h | 25 ++++ .../frontend/ir/microinstruction.cpp | 22 +++ .../frontend/ir/microinstruction.h | 10 ++ src/shader_recompiler/frontend/ir/opcode.inc | 8 ++ src/shader_recompiler/frontend/ir/pred.h | 7 + src/shader_recompiler/frontend/ir/reg.h | 9 +- src/shader_recompiler/frontend/ir/value.cpp | 37 +++++ src/shader_recompiler/frontend/ir/value.h | 3 + .../frontend/maxwell/control_flow.cpp | 130 ++++++++++++++++- .../frontend/maxwell/control_flow.h | 44 +++++- src/shader_recompiler/frontend/maxwell/program.cpp | 75 +++++----- src/shader_recompiler/frontend/maxwell/program.h | 11 +- .../frontend/maxwell/termination_code.cpp | 7 + .../frontend/maxwell/termination_code.h | 1 + .../frontend/maxwell/translate/impl/impl.h | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 6 +- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/passes.h | 9 ++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 155 +++++++++++++++++++++ src/shader_recompiler/main.cpp | 4 +- 24 files changed, 570 insertions(+), 77 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/function.cpp create mode 100644 src/shader_recompiler/frontend/ir/function.h create mode 100644 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index c65846bc44..36a61f21ac 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -11,6 +11,8 @@ add_executable(shader_recompiler frontend/ir/condition.h frontend/ir/flow_test.cpp frontend/ir/flow_test.h + frontend/ir/function.cpp + frontend/ir/function.h frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp @@ -51,6 +53,7 @@ add_executable(shader_recompiler ir_opt/get_set_elimination_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h + ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp main.cpp ) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 0406726ad9..e795618fcf 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -37,6 +37,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } +void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { + imm_predecessors.push_back(immediate_predecessor); +} + u32 Block::LocationBegin() const noexcept { return location_begin; } @@ -53,6 +57,18 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept { + return imm_predecessors; +} + +static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); +} + static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, const std::map<const Inst*, size_t>& inst_to_index, const Value& arg) { @@ -60,10 +76,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind return "<null>"; } if (arg.IsLabel()) { - if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { - return fmt::format("{{Block ${}}}", it->second); - } - return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(arg.Label())); + return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { @@ -115,16 +128,26 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + if (op == Opcode::Phi) { + size_t val_index{0}; + for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { + ret += val_index != 0 ? ", " : " "; + ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), + BlockToIndex(block_to_index, phi_block)); + ++val_index; + } + } else { + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3ed2eb9571..4b6b80c4b2 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -6,6 +6,8 @@ #include <initializer_list> #include <map> +#include <span> +#include <vector> #include <boost/intrusive/list.hpp> #include <boost/pool/pool_alloc.hpp> @@ -36,7 +38,11 @@ public: void AppendNewInst(Opcode op, std::initializer_list<Value> args); /// Prepends a new instruction to this basic block before the insertion point. - iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args); + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list<Value> args = {}); + + /// Adds a new immediate predecessor to the basic block. + void AddImmediatePredecessor(IR::Block* immediate_predecessor); /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; @@ -44,9 +50,12 @@ public: [[nodiscard]] u32 LocationEnd() const noexcept; /// Gets a mutable reference to the instruction list for this basic block. - InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept; /// Gets an immutable reference to the instruction list for this basic block. - const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept; + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept; [[nodiscard]] bool empty() const { return instructions.empty(); @@ -115,13 +124,16 @@ private: /// End location of this block u32 location_end; - /// List of instructions in this block. + /// List of instructions in this block InstructionList instructions; /// Memory pool for instruction list boost::fast_pool_allocator<Inst, boost::default_user_allocator_malloc_free, boost::details::pool::null_mutex> instruction_alloc_pool; + + /// Block immediate predecessors + std::vector<IR::Block*> imm_predecessors; }; [[nodiscard]] std::string DumpBlock(const Block& block); diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp new file mode 100644 index 0000000000..d1fc9461d0 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.cpp @@ -0,0 +1,5 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h new file mode 100644 index 0000000000..2d4dc5b981 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +struct Function { + struct InplaceDelete { + void operator()(IR::Block* block) const noexcept { + std::destroy_at(block); + } + }; + using UniqueBlock = std::unique_ptr<IR::Block, InplaceDelete>; + + std::vector<UniqueBlock> blocks; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 553fec3b7f..ecf76e23d8 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -30,6 +30,11 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::Branch: + case Opcode::BranchConditional: + case Opcode::Exit: + case Opcode::Return: + case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: @@ -113,6 +118,17 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } +std::span<const std::pair<Block*, Value>> Inst::PhiOperands() const noexcept { + return phi_operands; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_operands.emplace_back(predecessor, value); +} + void Inst::Invalidate() { ClearArgs(); op = Opcode::Void; @@ -125,6 +141,12 @@ void Inst::ClearArgs() { } value = {}; } + for (auto& [phi_block, phi_op] : phi_operands) { + if (!phi_op.IsImmediate()) { + UndoUse(phi_op); + } + } + phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 43460b9505..7f1ed6710c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,6 +5,8 @@ #pragma once #include <array> +#include <span> +#include <vector> #include <boost/intrusive/list.hpp> @@ -15,6 +17,8 @@ namespace Shader::IR { +class Block; + constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { @@ -59,6 +63,11 @@ public: /// Set the value of a given argument index. void SetArg(size_t index, Value value); + /// Get an immutable span to the phi operands. + [[nodiscard]] std::span<const std::pair<Block*, Value>> PhiOperands() const noexcept; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + void Invalidate(); void ClearArgs(); @@ -76,6 +85,7 @@ private: Inst* carry_inst{}; Inst* overflow_inst{}; Inst* zsco_inst{}; + std::vector<std::pair<Block*, Value>> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 371064bf3d..40759e96ab 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -5,6 +5,7 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) @@ -35,6 +36,13 @@ OPCODE(SetSFlag, Void, U1, OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + // Memory operations OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index 37cc530068..daf23193f2 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -10,6 +10,13 @@ namespace Shader::IR { enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +constexpr size_t NUM_USER_PREDS = 6; +constexpr size_t NUM_PREDS = 7; + +[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { + return static_cast<size_t>(pred); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 316fc4be8d..771094eb9a 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -271,6 +271,9 @@ enum class Reg : u64 { }; static_assert(static_cast<int>(Reg::RZ) == 255); +constexpr size_t NUM_USER_REGS = 255; +constexpr size_t NUM_REGS = 256; + [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { if (reg == Reg::RZ) { // Adding or subtracting registers from RZ yields RZ @@ -290,8 +293,12 @@ static_assert(static_cast<int>(Reg::RZ) == 255); return reg + (-num); } +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast<size_t>(reg); +} + [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (static_cast<size_t>(reg) / align) * align == static_cast<size_t>(reg); + return (RegIndex(reg) / align) * align == RegIndex(reg); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7b5b35d6c5..1e974e88c7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -115,6 +115,43 @@ u64 Value::U64() const { return imm_u64; } +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::Label: + return label == other.label; + case Type::Reg: + return reg == other.reg; + case Type::Pred: + return pred == other.pred; + case Type::Attribute: + return attribute == other.attribute; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + return imm_u16 == other.imm_u16; + case Type::U32: + return imm_u32 == other.imm_u32; + case Type::U64: + return imm_u64 == other.imm_u64; + case Type::ZSCO: + throw NotImplementedException("ZSCO comparison"); + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + void Value::ValidateAccess(IR::Type expected) const { if (type != expected) { throw LogicError("Reading {} out of {}", expected, type); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 664dacf9d7..368119921b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -48,6 +48,9 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + private: void ValidateAccess(IR::Type expected) const; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index fc4dba8269..21ee981371 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -36,6 +36,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { .cond{true}, .branch_true{new_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }, Block{ .begin{pc}, @@ -46,6 +47,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { .cond{block.cond}, .branch_true{block.branch_true}, .branch_false{block.branch_false}, + .imm_predecessors{}, }, }; } @@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string Name(const Block& block) { +static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.id); } else { @@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(Location start_address) - : entrypoint{start_address}, labels{Label{ + : entrypoint{start_address}, labels{{ .address{start_address}, .block_id{0}, .stack{}, }} {} +void Function::BuildBlocksMap() { + const size_t num_blocks{NumBlocks()}; + blocks_map.resize(num_blocks); + for (size_t block_index = 0; block_index < num_blocks; ++block_index) { + Block& block{blocks_data[block_index]}; + blocks_map[block.id] = █ + } +} + +void Function::BuildImmediatePredecessors() { + for (const Block& block : blocks_data) { + if (block.branch_true != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); + } + if (block.branch_false != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); + } + } +} + +void Function::BuildPostOrder() { + boost::container::small_vector<BlockId, 0x110> block_stack; + post_order_map.resize(NumBlocks()); + + Block& first_block{blocks_data[blocks.front()]}; + first_block.post_order_visited = true; + block_stack.push_back(first_block.id); + + const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { + if (branch_id == UNREACHABLE_BLOCK_ID) { + return false; + } + if (blocks_map[branch_id]->post_order_visited) { + return false; + } + blocks_map[branch_id]->post_order_visited = true; + + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block_id); + block_stack.push_back(branch_id); + return true; + }; + while (!block_stack.empty()) { + const Block* const block{blocks_map[block_stack.back()]}; + block_stack.pop_back(); + + if (!visit_branch(block->id, block->branch_true) && + !visit_branch(block->id, block->branch_false)) { + post_order_map[block->id] = static_cast<u32>(post_order_blocks.size()); + post_order_blocks.push_back(block->id); + } + } +} + +void Function::BuildImmediateDominators() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; + auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; + auto intersect{[this](Block* finger1, Block* finger2) { + while (finger1 != finger2) { + while (post_order_map[finger1->id] < post_order_map[finger2->id]) { + finger1 = finger1->imm_dominator; + } + while (post_order_map[finger2->id] < post_order_map[finger1->id]) { + finger2 = finger2->imm_dominator; + } + } + return finger1; + }}; + for (Block& block : blocks_data) { + block.imm_dominator = nullptr; + } + Block* const start_block{&blocks_data[blocks.front()]}; + start_block->imm_dominator = start_block; + + bool changed{true}; + while (changed) { + changed = false; + for (Block* const block : post_order_blocks | reverse_order_but_first) { + Block* new_idom{}; + for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { + new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; + } + changed |= block->imm_dominator != new_idom; + block->imm_dominator = new_idom; + } + } +} + +void Function::BuildDominanceFrontier() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; + for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { + for (Block* current : block.imm_predecessors | transform_block_id) { + while (current != block.imm_dominator) { + current->dominance_frontiers.push_back(current->id); + current = current->imm_dominator; + } + } + } +} + CFG::CFG(Environment& env_, Location start_address) : env{env_} { + VisitFunctions(start_address); + + for (Function& function : functions) { + function.BuildBlocksMap(); + function.BuildImmediatePredecessors(); + function.BuildPostOrder(); + function.BuildImmediateDominators(); + function.BuildDominanceFrontier(); + } +} + +void CFG::VisitFunctions(Location start_address) { functions.emplace_back(start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { .cond{true}, .branch_true{UNREACHABLE_BLOCK_ID}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; @@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.push_back(cal_pc); + functions.emplace_back(cal_pc); } // Handle CAL like a regular instruction break; @@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, })}; // Set the end properties of the conditional instruction and give it a new identity Block& conditional_block{block}; @@ -465,14 +583,14 @@ std::string CFG::Dot() const { dot += fmt::format("\t\tnode [style=filled];\n"); for (const u32 block_index : function.blocks) { const Block& block{function.blocks_data[block_index]}; - const std::string name{Name(block)}; + const std::string name{NameOf(block)}; const auto add_branch = [&](BlockId branch_id, bool add_label) { const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; dot += fmt::format("\t\t{}->", name); if (it == function.blocks_data.end()) { dot += fmt::format("\"Unknown label {}\"", branch_id); } else { - dot += Name(*it); + dot += NameOf(*it); }; if (add_label && block.cond != true && block.cond != false) { dot += fmt::format(" [label=\"{}\"]", block.cond); @@ -520,7 +638,7 @@ std::string CFG::Dot() const { if (functions.front().blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index b2ab0cdc35..20ada8afd9 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -70,6 +70,12 @@ struct Block { IR::Condition cond; BlockId branch_true; BlockId branch_false; + boost::container::small_vector<BlockId, 4> imm_predecessors; + boost::container::small_vector<BlockId, 8> dominance_frontiers; + union { + bool post_order_visited{false}; + Block* imm_dominator; + }; }; struct Label { @@ -81,11 +87,30 @@ struct Label { struct Function { Function(Location start_address); + void BuildBlocksMap(); + + void BuildImmediatePredecessors(); + + void BuildPostOrder(); + + void BuildImmediateDominators(); + + void BuildDominanceFrontier(); + + [[nodiscard]] size_t NumBlocks() const noexcept { + return static_cast<size_t>(current_block_id) + 1; + } + Location entrypoint; BlockId current_block_id{0}; boost::container::small_vector<Label, 16> labels; boost::container::small_vector<u32, 0x130> blocks; boost::container::small_vector<Block, 0x130> blocks_data; + // Translates from BlockId to block index + boost::container::small_vector<Block*, 0x130> blocks_map; + + boost::container::small_vector<u32, 0x130> post_order_blocks; + boost::container::small_vector<BlockId, 0x130> post_order_map; }; class CFG { @@ -97,6 +122,12 @@ class CFG { public: explicit CFG(Environment& env, Location start_address); + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + [[nodiscard]] std::string Dot() const; [[nodiscard]] std::span<const Function> Functions() const noexcept { @@ -104,20 +135,22 @@ public: } private: + void VisitFunctions(Location start_address); + void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited - [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, - Instruction inst, Opcode opcode); + bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); @@ -126,8 +159,7 @@ private: AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, - FunctionId function_id); + BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); Environment& env; boost::container::small_vector<Function, 1> functions; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 67a98ba57a..49d1f4bfb7 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,40 +8,53 @@ #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { +namespace { +void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span<IR::Block*> block_map, IR::Block* block_memory) { + const size_t num_blocks{cfg_function.blocks.size()}; + function.blocks.reserve(num_blocks); -Program::Function::~Function() { - std::ranges::for_each(blocks, &std::destroy_at<IR::Block>); -} - -Program::Program(Environment& env, const Flow::CFG& cfg) { - std::vector<IR::Block*> block_map; - functions.reserve(cfg.Functions().size()); + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - for (const Flow::Function& cfg_function : cfg.Functions()) { - Function& function{functions.emplace_back()}; + function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); + block_map[flow_block.id] = function.blocks.back().get(); + ++block_memory; + } +} - const size_t num_blocks{cfg_function.blocks.size()}; - IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; - function.blocks.reserve(num_blocks); +void EmitTerminationInsts(const Flow::Function& cfg_function, + std::span<IR::Block* const> block_map) { + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } +} - block_map.resize(cfg_function.blocks_data.size()); +void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + IR::Block* block_memory) { + std::vector<IR::Block*> block_map; + block_map.resize(cfg_function.blocks_data.size()); - // Visit the instructions of all blocks - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + TranslateCode(env, cfg_function, function, block_map, block_memory); + EmitTerminationInsts(cfg_function, block_map); +} +} // Anonymous namespace - IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; - ++block_memory; - function.blocks.push_back(block); - block_map[flow_block.id] = block; - } - // Now that all blocks are defined, emit the termination instructions - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } +Program::Program(Environment& env, const Flow::CFG& cfg) { + functions.reserve(cfg.Functions().size()); + for (const Flow::Function& cfg_function : cfg.Functions()) { + TranslateFunction(env, cfg_function, functions.emplace_back(), + block_alloc_pool.allocate(cfg_function.blocks.size())); + } + std::ranges::for_each(functions, Optimization::SsaRewritePass); + for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); + Optimization::Invoke(Optimization::IdentityRemovalPass, function); + // Optimization::Invoke(Optimization::VerificationPass, function); } } @@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) { std::map<const IR::Inst*, size_t> inst_to_index; std::map<const IR::Block*, size_t> block_to_index; - for (const Program::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); + for (const IR::Function& function : program.functions) { + for (const auto& block : function.blocks) { + block_to_index.emplace(block.get(), index); ++index; } } std::string ret; - for (const Program::Function& function : program.functions) { + for (const IR::Function& function : program.functions) { ret += fmt::format("Function\n"); - for (const IR::Block* const block : function.blocks) { + for (const auto& block : function.blocks) { ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } } diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 7814b2c016..36e678a9ea 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -4,13 +4,16 @@ #pragma once +#include <memory> #include <string> #include <vector> +#include <boost/container/small_vector.hpp> #include <boost/pool/pool_alloc.hpp> #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" namespace Shader::Maxwell { @@ -22,16 +25,10 @@ public: explicit Program(Environment& env, const Flow::CFG& cfg); private: - struct Function { - ~Function(); - - std::vector<IR::Block*> blocks; - }; - boost::pool_allocator<IR::Block, boost::default_user_allocator_new_delete, boost::details::pool::null_mutex> block_alloc_pool; - std::vector<Function> functions; + boost::container::small_vector<IR::Function, 1> functions; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp index a4ea5c5e38..ed5137f20c 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map, IR::IREmitter& ir) { + const auto add_immediate_predecessor = [&](Flow::BlockId label) { + block_map[label]->AddImmediatePredecessor(&ir.block); + }; if (flow_block.cond == true) { + add_immediate_predecessor(flow_block.branch_true); return ir.Branch(block_map[flow_block.branch_true]); } if (flow_block.cond == false) { + add_immediate_predecessor(flow_block.branch_false); return ir.Branch(block_map[flow_block.branch_false]); } + add_immediate_predecessor(flow_block.branch_true); + add_immediate_predecessor(flow_block.branch_false); return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], block_map[flow_block.branch_false]); } diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h index b0d667942e..04e0445340 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -11,6 +11,7 @@ namespace Shader::Maxwell { +/// Emit termination instructions and collect immediate predecessors void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bc607b0025..8be7d6ff1b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -208,7 +208,7 @@ public: void P2R_reg(u64 insn); void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); - void PBK(u64 insn); + void PBK(); void PCNT(u64 insn); void PEXIT(u64 insn); void PIXLD(u64 insn); @@ -252,7 +252,7 @@ public: void SHR_reg(u64 insn); void SHR_cbuf(u64 insn); void SHR_imm(u64 insn); - void SSY(u64 insn); + void SSY(); void ST(u64 insn); void STG(u64 insn); void STL(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c907c1ffb5..0f52696d10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) { ThrowNotImplemented(Opcode::P2R_imm); } -void TranslatorVisitor::PBK(u64) { +void TranslatorVisitor::PBK() { // PBK is a no-op } @@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) { ThrowNotImplemented(Opcode::SHR_imm); } -void TranslatorVisitor::SSY(u64) { - ThrowNotImplemented(Opcode::SSY); +void TranslatorVisitor::SSY() { + // SSY is a no-op } void TranslatorVisitor::ST(u64) { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index f9bb063fb1..7f85000878 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -28,7 +28,6 @@ void IdentityRemovalPass(IR::Block& block) { ++inst; } } - for (IR::Inst* const inst : to_invalidate) { inst->Invalidate(); } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index fe5454e9a6..83f094d735 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -5,12 +5,21 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { +template <typename Func> +void Invoke(Func&& func, IR::Function& function) { + for (const auto& block : function.blocks) { + func(*block); + } +} + void DeadCodeEliminationPass(IR::Block& block); void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); +void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 0000000000..a4b256a40e --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Lei�a R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include <map> + +#include <boost/container/flat_map.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; + +struct DefTable { + [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { + return regs[IR::RegIndex(variable)]; + } + + [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)]; + } + + std::array<ValueMap, IR::NUM_USER_REGS> regs; + std::array<ValueMap, IR::NUM_USER_PREDS> preds; +}; + +IR::Opcode UndefOpcode(IR::Reg) noexcept { + return IR::Opcode::Undef32; +} + +IR::Opcode UndefOpcode(IR::Pred) noexcept { + return IR::Opcode::Undef1; +} + +[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { + return inst.Opcode() == IR::Opcode::Phi; +} + +class Pass { +public: + void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + current_def[variable].insert_or_assign(block, value); + } + + IR::Value ReadVariable(auto variable, IR::Block* block) { + auto& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + return it->second; + } + return ReadVariableRecursive(variable, block); + } + +private: + IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + IR::Value val; + if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + val = ReadVariable(variable, preds.front()); + } else { + // Break potential cycles with operandless phi + val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, val); + val = AddPhiOperands(variable, val, block); + } + WriteVariable(variable, block, val); + return val; + } + + IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + for (IR::Block* const pred : block->ImmediatePredecessors()) { + phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + for (const auto& pair : phi.Inst()->PhiOperands()) { + const IR::Value& op{pair.second}; + if (op == same || op == phi) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return phi; + } + same = op; + } + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + } + // Reroute all uses of phi to same and remove phi + phi.Inst()->ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + DefTable current_def; +}; +} // Anonymous namespace + +void SsaRewritePass(IR::Function& function) { + Pass pass; + for (const auto& block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + } + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 39f0bf3336..e3c9ad6e8f 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -35,12 +35,12 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str())); }); - for (int i = 0; i < 1; ++i) { + for (int i = 0; i < 300; ++i) { for (auto& env : map) { // fmt::print(stdout, "Decoding {}\n", path.string()); const Location start_address{0}; auto cfg{std::make_unique<Flow::CFG>(*env, start_address)}; - // fmt::print(stdout, "{}\n", cfg.Dot()); + // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; -- cgit v1.2.3-70-g09d2 From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: shader: Initial implementation of an AST --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 45 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 18 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 8 + .../backend/spirv/emit_spirv_control_flow.cpp | 25 - .../backend/spirv/emit_spirv_undefined.cpp | 12 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 64 +- src/shader_recompiler/frontend/ir/basic_block.h | 40 +- src/shader_recompiler/frontend/ir/condition.cpp | 14 +- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/function.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 43 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 23 +- .../frontend/ir/microinstruction.cpp | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 16 +- .../frontend/ir/structured_control_flow.cpp | 742 +++++++++++++++++++++ .../frontend/ir/structured_control_flow.h | 22 + .../frontend/maxwell/control_flow.cpp | 426 +++++------- .../frontend/maxwell/control_flow.h | 77 +-- src/shader_recompiler/frontend/maxwell/location.h | 12 +- src/shader_recompiler/frontend/maxwell/program.cpp | 69 +- src/shader_recompiler/frontend/maxwell/program.h | 2 +- .../frontend/maxwell/termination_code.cpp | 86 --- .../frontend/maxwell/termination_code.h | 17 - .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 17 +- .../frontend/maxwell/translate/translate.h | 7 +- .../ir_opt/constant_propagation_pass.cpp | 50 ++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 24 +- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 + src/shader_recompiler/main.cpp | 29 +- src/shader_recompiler/shader_info.h | 28 + 33 files changed, 1346 insertions(+), 590 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/shader_info.h (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/externals/sirit b/externals/sirit index 1f7b70730d..c374bfd9fd 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35 +Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 12fbcb37c3..27fc79e21b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -36,6 +36,8 @@ add_executable(shader_recompiler frontend/ir/program.cpp frontend/ir/program.h frontend/ir/reg.h + frontend/ir/structured_control_flow.cpp + frontend/ir/structured_control_flow.h frontend/ir/type.cpp frontend/ir/type.h frontend/ir/value.cpp @@ -51,8 +53,6 @@ add_executable(shader_recompiler frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h - frontend/maxwell/termination_code.cpp - frontend/maxwell/termination_code.h frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7c4269fad4..5022b51597 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -105,8 +105,26 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->Opcode()); } -void EmitSPIRV::EmitPhi(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +static Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.u1; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector<Id, 64> operands; + operands.reserve(num_args * 2); + for (size_t index = 0; index < num_args; ++index) { + IR::Block* const phi_block{inst->PhiBlock(index)}; + operands.push_back(ctx.Def(inst->Arg(index))); + operands.push_back(ctx.BlockLabel(phi_block)); + } + const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; + return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } void EmitSPIRV::EmitVoid(EmitContext&) {} @@ -115,6 +133,29 @@ void EmitSPIRV::EmitIdentity(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +// FIXME: Move to its own file +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), + ctx.BlockLabel(inst->Arg(2).Label())); +} + +void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()), + spv::LoopControlMask::MaskNone); +} + +void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone); +} + +void EmitSPIRV::EmitReturn(EmitContext& ctx) { + ctx.OpReturn(); +} + void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 3f4b68a7d1..9aa83b5de4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -124,18 +124,20 @@ private: void EmitInst(EmitContext& ctx, IR::Inst* inst); // Microinstruction emitters - void EmitPhi(EmitContext& ctx); + Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); void EmitBranch(EmitContext& ctx, IR::Inst* inst); void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); - void EmitExit(EmitContext& ctx); + void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst); + void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst); void EmitReturn(EmitContext& ctx); - void EmitUnreachable(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); + void EmitSetGotoVariable(EmitContext& ctx); + void EmitGetGotoVariable(EmitContext& ctx); Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx); void EmitSetAttribute(EmitContext& ctx); @@ -151,11 +153,11 @@ private: void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); - void EmitUndef1(EmitContext& ctx); - void EmitUndef8(EmitContext& ctx); - void EmitUndef16(EmitContext& ctx); - void EmitUndef32(EmitContext& ctx); - void EmitUndef64(EmitContext& ctx); + Id EmitUndefU1(EmitContext& ctx); + void EmitUndefU8(EmitContext& ctx); + void EmitUndefU16(EmitContext& ctx); + void EmitUndefU32(EmitContext& ctx); + void EmitUndefU64(EmitContext& ctx); void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index b121305ea9..1eab739edb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -22,6 +22,14 @@ void EmitSPIRV::EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 770fe113c8..66ce6c8c54 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,28 +3,3 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" - -namespace Shader::Backend::SPIRV { - -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); -} - -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), - ctx.BlockLabel(inst->Arg(2).Label())); -} - -void EmitSPIRV::EmitExit(EmitContext& ctx) { - ctx.OpReturn(); -} - -void EmitSPIRV::EmitReturn(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSPIRV::EmitUnreachable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 3850b072ce..859b60a95d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitUndef1(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.u1); } -void EmitSPIRV::EmitUndef8(EmitContext&) { +void EmitSPIRV::EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef16(EmitContext&) { +void EmitSPIRV::EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef32(EmitContext&) { +void EmitSPIRV::EmitUndefU32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef64(EmitContext&) { +void EmitSPIRV::EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index da33ff6f10..b5616f3941 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -17,6 +17,8 @@ namespace Shader::IR { Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end) : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool<Inst>& inst_pool_) : Block{inst_pool_, 0, 0} {} + Block::~Block() = default; void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { @@ -38,8 +40,25 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { - imm_predecessors.push_back(immediate_predecessor); +void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { + branch_cond = cond; + branch_true = branch_true_; + branch_false = branch_false_; +} + +void Block::SetBranch(Block* branch) { + branch_cond = Condition{true}; + branch_true = branch; +} + +void Block::SetReturn() { + branch_cond = Condition{true}; + branch_true = nullptr; + branch_false = nullptr; +} + +bool Block::IsVirtual() const noexcept { + return location_begin == location_end; } u32 Block::LocationBegin() const noexcept { @@ -58,6 +77,12 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +void Block::AddImmediatePredecessor(Block* block) { + if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { + imm_predecessors.push_back(block); + } +} + std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept { return imm_predecessors; } @@ -70,8 +95,17 @@ static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_i return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); } +static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, - const std::map<const Inst*, size_t>& inst_to_index, + std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return "<null>"; @@ -80,10 +114,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { - if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { - return fmt::format("%{}", it->second); - } - return fmt::format("%<unknown inst {:016x}>", reinterpret_cast<u64>(arg.Inst())); + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { case Type::U1: @@ -125,14 +156,14 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& const Opcode op{inst.Opcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); if (TypeOf(op) != Type::Void) { - ret += fmt::format("%{:<5} = {}", inst_index, op); + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; + const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_index, @@ -140,10 +171,12 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += arg_str; } - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { @@ -151,9 +184,6 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += '\n'; } - - inst_to_index.emplace(&inst, inst_index); - ++inst_index; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec3ad62634..3205705e79 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,7 +11,9 @@ #include <boost/intrusive/list.hpp> +#include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -26,6 +28,7 @@ public: using const_reverse_iterator = InstructionList::const_reverse_iterator; explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end); + explicit Block(ObjectPool<Inst>& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -41,9 +44,15 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args = {}, u64 flags = 0); - /// Adds a new immediate predecessor to the basic block. - void AddImmediatePredecessor(IR::Block* immediate_predecessor); + /// Set the branches to jump to when all instructions have executed. + void SetBranches(Condition cond, Block* branch_true, Block* branch_false); + /// Set the branch to unconditionally jump to when all instructions have executed. + void SetBranch(Block* branch); + /// Mark the block as a return block. + void SetReturn(); + /// Returns true when the block does not implement any guest instructions directly. + [[nodiscard]] bool IsVirtual() const noexcept; /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; /// Gets the end location for this basic block. @@ -54,8 +63,23 @@ public: /// Gets an immutable reference to the instruction list for this basic block. [[nodiscard]] const InstructionList& Instructions() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept; + + [[nodiscard]] Condition BranchCondition() const noexcept { + return branch_cond; + } + [[nodiscard]] bool IsTerminationBlock() const noexcept { + return !branch_true && !branch_false; + } + [[nodiscard]] Block* TrueBranch() const noexcept { + return branch_true; + } + [[nodiscard]] Block* FalseBranch() const noexcept { + return branch_false; + } [[nodiscard]] bool empty() const { return instructions.empty(); @@ -129,10 +153,18 @@ private: /// List of instructions in this block InstructionList instructions; + /// Condition to choose the branch to take + Condition branch_cond{true}; + /// Block to jump into when the branch condition evaluates as true + Block* branch_true{nullptr}; + /// Block to jump into when the branch condition evaluates as false + Block* branch_false{nullptr}; /// Block immediate predecessors - std::vector<IR::Block*> imm_predecessors; + std::vector<Block*> imm_predecessors; }; +using BlockList = std::vector<Block*>; + [[nodiscard]] std::string DumpBlock(const Block& block); [[nodiscard]] std::string DumpBlock(const Block& block, diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index edff35dc77..ec1659e2bc 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -16,15 +16,13 @@ std::string NameOf(Condition condition) { ret = fmt::to_string(condition.FlowTest()); } const auto [pred, negated]{condition.Pred()}; - if (pred != Pred::PT || negated) { - if (!ret.empty()) { - ret += '&'; - } - if (negated) { - ret += '!'; - } - ret += fmt::to_string(pred); + if (!ret.empty()) { + ret += '&'; } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); return ret; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 52737025c9..16b4ae888e 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -26,7 +26,7 @@ public: explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept : Condition(FlowTest::T, pred_, pred_negated_) {} - Condition(bool value) : Condition(Pred::PT, !value) {} + explicit Condition(bool value) : Condition(Pred::PT, !value) {} auto operator<=>(const Condition&) const noexcept = default; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index bba7d1d395..fd7d564191 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -11,7 +11,7 @@ namespace Shader::IR { struct Function { - boost::container::small_vector<Block*, 16> blocks; + BlockList blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ada0be8343..30932043f9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -44,24 +44,27 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(IR::Block* label) { +void IREmitter::Branch(Block* label) { + label->AddImmediatePredecessor(block); Inst(Opcode::Branch, label); } -void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { - Inst(Opcode::BranchConditional, cond, true_label, false_label); +void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + true_label->AddImmediatePredecessor(block); + false_label->AddImmediatePredecessor(block); + Inst(Opcode::BranchConditional, condition, true_label, false_label); } -void IREmitter::Exit() { - Inst(Opcode::Exit); +void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { + Inst(Opcode::LoopMerge, merge_block, continue_target); } -void IREmitter::Return() { - Inst(Opcode::Return); +void IREmitter::SelectionMerge(Block* merge_block) { + Inst(Opcode::SelectionMerge, merge_block); } -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); +void IREmitter::Return() { + Inst(Opcode::Return); } U32 IREmitter::GetReg(IR::Reg reg) { @@ -81,6 +84,14 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst<U1>(Opcode::GetGotoVariable, id); +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } @@ -121,6 +132,20 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::Condition(IR::Condition cond) { + if (cond == IR::Condition{true}) { + return Imm1(true); + } else if (cond == IR::Condition{false}) { + return Imm1(false); + } + const FlowTest flow_test{cond.FlowTest()}; + const auto [pred, is_negated]{cond.Pred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } + throw NotImplementedException("Condition {}", cond); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst<F32>(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bfd9916cca..4decb46bc1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -16,11 +16,11 @@ namespace Shader::IR { class IREmitter { public: - explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) - : block{block_}, insertion_point{insertion_point_} {} + : block{&block_}, insertion_point{insertion_point_} {} - Block& block; + Block* block; [[nodiscard]] U1 Imm1(bool value) const; [[nodiscard]] U8 Imm8(u8 value) const; @@ -31,11 +31,11 @@ public: [[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(IR::Block* label); - void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); - void Exit(); + void Branch(Block* label); + void BranchConditional(const U1& condition, Block* true_label, Block* false_label); + void LoopMerge(Block* merge_block, Block* continue_target); + void SelectionMerge(Block* merge_block); void Return(); - void Unreachable(); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -43,6 +43,9 @@ public: [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); void SetPred(IR::Pred pred, const U1& value); + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); @@ -55,6 +58,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); @@ -168,7 +173,7 @@ private: template <typename T = Value, typename... Args> T Inst(Opcode op, Args... args) { - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } @@ -184,7 +189,7 @@ private: T Inst(Opcode op, Flags<FlagType> flags, Args... args) { u64 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e7ca92039e..b4ae371bd1 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -51,9 +51,9 @@ bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: case Opcode::BranchConditional: - case Opcode::Exit: + case Opcode::LoopMerge: + case Opcode::SelectionMerge: case Opcode::Return: - case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5dc65f2dfa..ede5e20c2d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -10,15 +10,17 @@ OPCODE(Identity, Opaque, Opaq // Control flow OPCODE(Branch, Void, Label, ) OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) OPCODE(GetPred, U1, Pred, ) OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) @@ -36,11 +38,11 @@ OPCODE(WorkgroupId, U32x3, OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations OPCODE(LoadGlobalU8, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp new file mode 100644 index 0000000000..2e9ce25256 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -0,0 +1,742 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <ranges> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; + +using Tree = boost::intrusive::list<Statement, + // Allow using Statement without a definition + boost::intrusive::base_hook<ListBaseHook>, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size<false>>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + Block* code; + Node label; + Tree children; + Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return "<invalid type>"; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(std::span<Block* const> blocks, ObjectPool<Statement, 64>& stmt_pool) + : pool{stmt_pool} { + std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; + fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector<Node> BuildUnorderedTreeGetGotos(std::span<Block* const> blocks) { + // Assume all blocks have two branches + std::vector<Node> gotos; + gotos.reserve(blocks.size() * 2); + + const std::unordered_map labels_map{BuildLabels(blocks)}; + Tree& root{root_stmt.children}; + auto insert_point{root.begin()}; + for (Block* const block : blocks) { + ++insert_point; // Skip label + ++insert_point; // Skip set variable + root.insert(insert_point, *pool.Create(block, &root_stmt)); + + if (block->IsTerminationBlock()) { + root.insert(insert_point, *pool.Create(Return{})); + continue; + } + const Condition cond{block->BranchCondition()}; + Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; + if (cond == Condition{true} || cond == Condition{false}) { + const bool is_true{cond == Condition{true}}; + const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; + const Node label{labels_map.at(branch)}; + Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_stmt)); + } else { + Statement* const ident_cond{pool.Create(Identity{}, cond)}; + const Node true_label{labels_map.at(block->TrueBranch())}; + const Node false_label{labels_map.at(block->FalseBranch())}; + Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; + Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_true)); + gotos.push_back(root.insert(insert_point, *goto_false)); + } + } + return gotos; + } + + std::unordered_map<const Block*, Node> BuildLabels(std::span<Block* const> blocks) { + // TODO: Consider storing labels intrusively inside the block + std::unordered_map<const Block*, Node> labels_map; + Tree& root{root_stmt.children}; + u32 label_id{0}; + for (const Block* const block : blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + labels_map.emplace(block, root.insert(root.end(), *label)); + Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; + root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + ++label_id; + } + return labels_map; + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + // Update nested if condition + switch (label_nested_stmt->type) { + case StatementType::If: + label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool<Statement, 64>& pool; + Statement root_stmt{FunctionTag{}}; +}; + +Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool<Inst>& inst_pool_, ObjectPool<Block>& block_pool_, + ObjectPool<Statement, 64>& stmt_pool_, Statement& root_stmt, + const std::function<void(IR::Block*)>& func_, BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, Block* continue_block, Block* break_block) { + Tree& tree{parent.children}; + Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IREmitter ir{*current_block}; + ir.Branch(stmt.code); + } + current_block = stmt.code; + func(stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + Block* const first_if_block{block_list.at(first_block_index)}; + IREmitter ir{*current_block}; + const U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + Block* const new_continue_block{block_pool.Create(inst_pool)}; + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + Block* const first_loop_block{block_list.at(first_block_index)}; + IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IREmitter continue_ir{*new_continue_block}; + const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const skip_block{MergeBlock(parent, stmt)}; + + IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IREmitter ir{*current_block}; + ir.Branch(continue_block); + } + } + + Block* MergeBlock(Statement& parent, Statement& stmt) { + if (Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool<Statement, 64>& stmt_pool; + ObjectPool<Inst>& inst_pool; + ObjectPool<Block>& block_pool; + const std::function<void(IR::Block*)>& func; + BlockList& block_list; +}; +} // Anonymous namespace + +BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, + std::span<Block* const> unordered_blocks, + const std::function<void(Block*)>& func) { + ObjectPool<Statement, 64> stmt_pool; + GotoPass goto_pass{unordered_blocks, stmt_pool}; + BlockList block_list; + TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), + func, block_list}; + return block_list; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h new file mode 100644 index 0000000000..a574c24f77 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <span> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +[[nodiscard]] BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, + std::span<Block* const> unordered_blocks, + const std::function<void(Block*)>& func); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 21ee981371..e766b555bd 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -17,38 +17,49 @@ #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; +} // Anonymous namespace static u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { - if (pc <= block.begin || pc >= block.end) { +static void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - return { - Block{ - .begin{block.begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{block.id}, - .stack{block.stack}, - .cond{true}, - .branch_true{new_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }, - Block{ - .begin{pc}, - .end{block.end}, - .end_class{block.end_class}, - .id{new_id}, - .stack{std::move(block.stack)}, - .cond{block.cond}, - .branch_true{block.branch_true}, - .branch_false{block.branch_false}, - .imm_predecessors{}, - }, + *new_block = Block{ + .begin{pc}, + .end{old_block->end}, + .end_class{old_block->end_class}, + .stack{old_block->stack}, + .cond{old_block->cond}, + .branch_true{old_block->branch_true}, + .branch_false{old_block->branch_false}, + .ir{nullptr}, + }; + *old_block = Block{ + .begin{old_block->begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{std::move(old_block->stack)}, + .cond{IR::Condition{true}}, + .branch_true{new_block}, + .branch_false{nullptr}, + .ir{nullptr}, }; } @@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) { static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { - return fmt::format("\"Virtual {}\"", block.id); + return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } @@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept { Function::Function(Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block_id{0}, + .block{nullptr}, .stack{}, }} {} -void Function::BuildBlocksMap() { - const size_t num_blocks{NumBlocks()}; - blocks_map.resize(num_blocks); - for (size_t block_index = 0; block_index < num_blocks; ++block_index) { - Block& block{blocks_data[block_index]}; - blocks_map[block.id] = █ - } -} - -void Function::BuildImmediatePredecessors() { - for (const Block& block : blocks_data) { - if (block.branch_true != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); - } - if (block.branch_false != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); - } - } -} - -void Function::BuildPostOrder() { - boost::container::small_vector<BlockId, 0x110> block_stack; - post_order_map.resize(NumBlocks()); - - Block& first_block{blocks_data[blocks.front()]}; - first_block.post_order_visited = true; - block_stack.push_back(first_block.id); - - const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { - if (branch_id == UNREACHABLE_BLOCK_ID) { - return false; - } - if (blocks_map[branch_id]->post_order_visited) { - return false; - } - blocks_map[branch_id]->post_order_visited = true; - - // Calling push_back twice is faster than insert on msvc - block_stack.push_back(block_id); - block_stack.push_back(branch_id); - return true; - }; - while (!block_stack.empty()) { - const Block* const block{blocks_map[block_stack.back()]}; - block_stack.pop_back(); - - if (!visit_branch(block->id, block->branch_true) && - !visit_branch(block->id, block->branch_false)) { - post_order_map[block->id] = static_cast<u32>(post_order_blocks.size()); - post_order_blocks.push_back(block->id); - } - } -} - -void Function::BuildImmediateDominators() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; - auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; - auto intersect{[this](Block* finger1, Block* finger2) { - while (finger1 != finger2) { - while (post_order_map[finger1->id] < post_order_map[finger2->id]) { - finger1 = finger1->imm_dominator; - } - while (post_order_map[finger2->id] < post_order_map[finger1->id]) { - finger2 = finger2->imm_dominator; - } - } - return finger1; - }}; - for (Block& block : blocks_data) { - block.imm_dominator = nullptr; - } - Block* const start_block{&blocks_data[blocks.front()]}; - start_block->imm_dominator = start_block; - - bool changed{true}; - while (changed) { - changed = false; - for (Block* const block : post_order_blocks | reverse_order_but_first) { - Block* new_idom{}; - for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { - new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; - } - changed |= block->imm_dominator != new_idom; - block->imm_dominator = new_idom; - } - } -} - -void Function::BuildDominanceFrontier() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; - for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { - for (Block* current : block.imm_predecessors | transform_block_id) { - while (current != block.imm_dominator) { - current->dominance_frontiers.push_back(current->id); - current = current->imm_dominator; - } - } - } -} - -CFG::CFG(Environment& env_, Location start_address) : env{env_} { - VisitFunctions(start_address); - - for (Function& function : functions) { - function.BuildBlocksMap(); - function.BuildImmediatePredecessors(); - function.BuildPostOrder(); - function.BuildImmediateDominators(); - function.BuildDominanceFrontier(); - } -} - -void CFG::VisitFunctions(Location start_address) { +CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) + : env{env_}, block_pool{block_pool_} { functions.emplace_back(start_address); + functions.back().labels.back().block = block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + }); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { return; } // Try to find the next block - Function* function{&functions[function_id]}; + Function* const function{&functions[function_id]}; Location pc{label.address}; - const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, - [function](Location pc, u32 block_index) { - return pc < function->blocks_data[block_index].begin; - })}; - const auto next_index{std::distance(function->blocks.begin(), next)}; - const bool is_last{next == function->blocks.end()}; - Location next_pc; - BlockId next_id{UNREACHABLE_BLOCK_ID}; - if (!is_last) { - next_pc = function->blocks_data[*next].begin; - next_id = function->blocks_data[*next].id; - } + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; // Insert before the next block - Block block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{label.block_id}, - .stack{std::move(label.stack)}, - .cond{true}, - .branch_true{UNREACHABLE_BLOCK_ID}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }; + Block* const block{label.block}; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; - while (is_last || pc < next_pc) { + while (!next || pc < next->begin) { is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; if (is_branch) { break; @@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { if (!is_branch) { // If the block finished without a branch, // it means that the next instruction is already visited, jump to it - block.end = pc; - block.cond = true; - block.branch_true = next_id; - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; } // Function's pointer might be invalid, resolve it again - function = &functions[function_id]; - const u32 new_block_index = static_cast<u32>(function->blocks_data.size()); - function->blocks.insert(function->blocks.begin() + next_index, new_block_index); - function->blocks_data.push_back(std::move(block)); + // Insert the new block + functions[function_id].blocks.insert(*block); } bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { const Location pc{label.address}; Function& function{functions[function_id]}; - const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { - return function.blocks_data[block_index].Contains(pc); - })}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; if (it == function.blocks.end()) { // Address has not been visited return false; } - Block& block{function.blocks_data[*it]}; - if (block.begin == pc) { - throw LogicError("Dangling branch"); - } - const u32 first_index{*it}; - const u32 second_index{static_cast<u32>(function.blocks_data.size())}; - const std::array new_indices{first_index, second_index}; - std::array split_blocks{Split(std::move(block), pc, label.block_id)}; - function.blocks_data[*it] = std::move(split_blocks[0]); - function.blocks_data.push_back(std::move(split_blocks[1])); - function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); + } + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); return true; } -CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { const Instruction inst{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(inst.raw)}; switch (opcode) { @@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); break; case Opcode::RET: - block.end_class = EndClass::Return; + block->end_class = EndClass::Return; break; default: break; } - block.end = pc; + block->end = pc; return AnalysisState::Branch; case Opcode::BRK: case Opcode::CONT: @@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; } - const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; - block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); - block.end = pc; + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; return AnalysisState::Branch; } case Opcode::PBK: @@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati case Opcode::PEXIT: case Opcode::PLONGJMP: case Opcode::SSY: - block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); @@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati return AnalysisState::Branch; } -void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond) { - if (block.begin != pc) { + if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later - block.end = pc; - block.cond = true; - block.branch_true = AddLabel(block, block.stack, pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; return; } - // Impersonate the visited block with a virtual block - // Jump from this virtual to the real conditional instruction and the next instruction - Function& function{functions[function_id]}; - const BlockId conditional_block_id{++function.current_block_id}; - function.blocks.push_back(static_cast<u32>(function.blocks_data.size())); - Block& virtual_block{function.blocks_data.emplace_back(Block{ - .begin{}, // Virtual block - .end{}, + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{ + .begin{block->begin.Virtual()}, + .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .id{block.id}, // Impersonating - .stack{block.stack}, + .stack{block->stack}, .cond{cond}, - .branch_true{conditional_block_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - })}; - // Set the end properties of the conditional instruction and give it a new identity - Block& conditional_block{block}; - conditional_block.end = pc; - conditional_block.end_class = insn_end_class; - conditional_block.id = conditional_block_id; + .branch_true{conditional_block}, + .branch_false{nullptr}, + .ir{nullptr}, + }; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc; + conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction - const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block - virtual_block.branch_false = endif_block_id; + block->branch_false = endif_block; // And branch to it from the conditional instruction if it is a branch if (insn_end_class == EndClass::Branch) { - conditional_block.cond = true; - conditional_block.branch_true = endif_block_id; - conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); } -bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode) { if (inst.branch.is_cbuf) { throw NotImplementedException("Branch with constant buffer offset"); @@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr const bool has_flow_test{HasFlowTest(opcode)}; const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - block.cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); - block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); } else { - block.cond = true; + block->cond = IR::Condition{true}; } return true; } -void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute) { const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { +void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } @@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { } } -CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; const Predicate pred{inst.Pred()}; @@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati return AnalysisState::Continue; } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - if (block.stack.Peek(Token::PEXIT).has_value()) { + if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } - if (const std::optional<Location> exit_pc{block.stack.Peek(Token::PEXIT)}) { - const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; - block.cond = true; - block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; return AnalysisState::Branch; } - block.end = pc; - block.end_class = EndClass::Exit; + block->end = pc; + block->end_class = EndClass::Exit; return AnalysisState::Branch; } -BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { Function& function{functions[function_id]}; - if (block.begin == pc) { - return block.id; + if (block->begin == pc) { + // Jumps to itself + return block; } - const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; - if (target != function.blocks_data.end()) { - return target->id; + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + return &*it; } - const BlockId block_id{++function.current_block_id}; + // TODO: FIX DANGLING BLOCKS + Block* const new_block{block_pool.Create(Block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{stack}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + })}; function.labels.push_back(Label{ .address{pc}, - .block_id{block_id}, + .block{new_block}, .stack{std::move(stack)}, }); - return block_id; + return new_block; } std::string CFG::Dot() const { @@ -581,18 +474,12 @@ std::string CFG::Dot() const { for (const Function& function : functions) { dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); dot += fmt::format("\t\tnode [style=filled];\n"); - for (const u32 block_index : function.blocks) { - const Block& block{function.blocks_data[block_index]}; + for (const Block& block : function.blocks) { const std::string name{NameOf(block)}; - const auto add_branch = [&](BlockId branch_id, bool add_label) { - const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; - dot += fmt::format("\t\t{}->", name); - if (it == function.blocks_data.end()) { - dot += fmt::format("\"Unknown label {}\"", branch_id); - } else { - dot += NameOf(*it); - }; - if (add_label && block.cond != true && block.cond != false) { + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; @@ -600,10 +487,10 @@ std::string CFG::Dot() const { dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: - if (block.cond != false) { + if (block.cond != IR::Condition{false}) { add_branch(block.branch_true, true); } - if (block.cond != true) { + if (block.cond != IR::Condition{true}) { add_branch(block.branch_false, false); } break; @@ -619,12 +506,6 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; - case EndClass::Unreachable: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += fmt::format( - "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } if (function.entrypoint == 8) { @@ -635,10 +516,11 @@ std::string CFG::Dot() const { dot += "\t}\n"; } if (!functions.empty()) { - if (functions.front().blocks.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 49b369282a..8179787b87 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -11,25 +11,27 @@ #include <vector> #include <boost/container/small_vector.hpp> +#include <boost/intrusive/set.hpp> #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +class Block; +} namespace Shader::Maxwell::Flow { -using BlockId = u32; using FunctionId = size_t; -constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast<u32>(-1)}; - enum class EndClass { Branch, Exit, Return, - Unreachable, }; enum class Token { @@ -59,58 +61,37 @@ private: boost::container::small_vector<StackEntry, 3> entries; }; -struct Block { +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode<boost::intrusive::normal_link>> { [[nodiscard]] bool Contains(Location pc) const noexcept; + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + Location begin; Location end; EndClass end_class; - BlockId id; Stack stack; IR::Condition cond; - BlockId branch_true; - BlockId branch_false; - boost::container::small_vector<BlockId, 4> imm_predecessors; - boost::container::small_vector<BlockId, 8> dominance_frontiers; - union { - bool post_order_visited{false}; - Block* imm_dominator; - }; + Block* branch_true; + Block* branch_false; + IR::Block* ir; }; struct Label { Location address; - BlockId block_id; + Block* block; Stack stack; }; struct Function { Function(Location start_address); - void BuildBlocksMap(); - - void BuildImmediatePredecessors(); - - void BuildPostOrder(); - - void BuildImmediateDominators(); - - void BuildDominanceFrontier(); - - [[nodiscard]] size_t NumBlocks() const noexcept { - return static_cast<size_t>(current_block_id) + 1; - } - Location entrypoint; - BlockId current_block_id{0}; boost::container::small_vector<Label, 16> labels; - boost::container::small_vector<u32, 0x130> blocks; - boost::container::small_vector<Block, 0x130> blocks_data; - // Translates from BlockId to block index - boost::container::small_vector<Block*, 0x130> blocks_map; - - boost::container::small_vector<u32, 0x130> post_order_blocks; - boost::container::small_vector<BlockId, 0x130> post_order_map; + boost::intrusive::set<Block> blocks; }; class CFG { @@ -120,7 +101,7 @@ class CFG { }; public: - explicit CFG(Environment& env, Location start_address); + explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -133,35 +114,37 @@ public: [[nodiscard]] std::span<const Function> Functions() const noexcept { return std::span(functions.data(), functions.size()); } + [[nodiscard]] std::span<Function> Functions() noexcept { + return std::span(functions.data(), functions.size()); + } private: - void VisitFunctions(Location start_address); - void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); - void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode); - void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); - AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); Environment& env; + ObjectPool<Block>& block_pool; boost::container::small_vector<Function, 1> functions; FunctionId current_function_id{0}; }; diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h index 66b51a19e6..26d29eae2e 100644 --- a/src/shader_recompiler/frontend/maxwell/location.h +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -15,7 +15,7 @@ namespace Shader::Maxwell { class Location { - static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits<u32>::max()}; + static constexpr u32 VIRTUAL_BIAS{4}; public: constexpr Location() = default; @@ -27,12 +27,18 @@ public: Align(); } + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + [[nodiscard]] constexpr u32 Offset() const noexcept { return offset; } [[nodiscard]] constexpr bool IsVirtual() const { - return offset == VIRTUAL_OFFSET; + return offset % 8 == VIRTUAL_BIAS; } constexpr auto operator<=>(const Location&) const noexcept = default; @@ -89,7 +95,7 @@ private: offset -= 8 + (offset % 32 == 8 ? 8 : 0); } - u32 offset{VIRTUAL_OFFSET}; + u32 offset{0xcccccccc}; }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8cdd20804e..9fa912ed8e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,57 +4,58 @@ #include <algorithm> #include <memory> +#include <vector> #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { namespace { -void TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span<IR::Block*> block_map) { +IR::BlockList TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::Function& cfg_function) { const size_t num_blocks{cfg_function.blocks.size()}; - function.blocks.reserve(num_blocks); - - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; - block_map[flow_block.id] = ir_block; - function.blocks.emplace_back(ir_block); - } -} - -void EmitTerminationInsts(const Flow::Function& cfg_function, - std::span<IR::Block* const> block_map) { - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } -} - -void TranslateFunction(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::Function& cfg_function, - IR::Function& function) { - std::vector<IR::Block*> block_map; - block_map.resize(cfg_function.blocks_data.size()); - - TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); - EmitTerminationInsts(cfg_function, block_map); + std::vector<IR::Block*> blocks(cfg_function.blocks.size()); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + const u32 begin{cfg_block.begin.Offset()}; + const u32 end{cfg_block.end.Offset()}; + blocks[i] = block_pool.Create(inst_pool, begin, end); + cfg_block.ir = blocks[i]; + ++i; + }); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + IR::Block* const block{blocks[i]}; + ++i; + if (cfg_block.end_class != Flow::EndClass::Branch) { + block->SetReturn(); + } else if (cfg_block.cond == IR::Condition{true}) { + block->SetBranch(cfg_block.branch_true->ir); + } else if (cfg_block.cond == IR::Condition{false}) { + block->SetBranch(cfg_block.branch_false->ir); + } else { + block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, + cfg_block.branch_false->ir); + } + }); + return IR::VisitAST(inst_pool, block_pool, blocks, + [&](IR::Block* block) { Translate(env, block); }); } } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg) { IR::Program program; auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); + for (Flow::Function& cfg_function : cfg.Functions()) { + functions.push_back(IR::Function{ + .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, + }); } + + fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 3355ab1299..542621a1de 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -19,6 +19,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - const Flow::CFG& cfg); + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp deleted file mode 100644 index ed5137f20c..0000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <span> - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" - -namespace Shader::Maxwell { - -static void EmitExit(IR::IREmitter& ir) { - ir.Exit(); -} - -static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { - switch (flow_test) { - case IR::FlowTest::T: - return ir.Imm1(true); - case IR::FlowTest::F: - return ir.Imm1(false); - case IR::FlowTest::NE: - // FIXME: Verify this - return ir.LogicalNot(ir.GetZFlag()); - case IR::FlowTest::NaN: - // FIXME: Verify this - return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); - default: - throw NotImplementedException("Flow test {}", flow_test); - } -} - -static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { - const IR::FlowTest flow_test{cond.FlowTest()}; - const auto [pred, pred_negated]{cond.Pred()}; - if (pred == IR::Pred::PT && !pred_negated) { - return GetFlowTest(flow_test, ir); - } - if (flow_test == IR::FlowTest::T) { - return ir.GetPred(pred, pred_negated); - } - return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); -} - -static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map, - IR::IREmitter& ir) { - const auto add_immediate_predecessor = [&](Flow::BlockId label) { - block_map[label]->AddImmediatePredecessor(&ir.block); - }; - if (flow_block.cond == true) { - add_immediate_predecessor(flow_block.branch_true); - return ir.Branch(block_map[flow_block.branch_true]); - } - if (flow_block.cond == false) { - add_immediate_predecessor(flow_block.branch_false); - return ir.Branch(block_map[flow_block.branch_false]); - } - add_immediate_predecessor(flow_block.branch_true); - add_immediate_predecessor(flow_block.branch_false); - return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], - block_map[flow_block.branch_false]); -} - -void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map) { - IR::Block* const block{block_map[flow_block.id]}; - IR::IREmitter ir(*block); - switch (flow_block.end_class) { - case Flow::EndClass::Branch: - EmitBranch(flow_block, block_map, ir); - break; - case Flow::EndClass::Exit: - EmitExit(ir); - break; - case Flow::EndClass::Return: - ir.Return(); - break; - case Flow::EndClass::Unreachable: - ir.Unreachable(); - break; - } -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h deleted file mode 100644 index 04e0445340..0000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <span> - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" - -namespace Shader::Maxwell { - -/// Emit termination instructions and collect immediate predecessors -void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d4b417d14a..b752785d44 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { IR::U32 result; if (shl.w != 0) { // When .W is set, the shift value is wrapped - // To emulate this we just have to clamp it ourselves. + // To emulate this we just have to wrap it ourselves. const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; result = v.ir.ShiftLeftLogical(base, shift); } else { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 7e6bb07a22..f1230f58fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, - const Flow::Block& flow_block) { - IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; - TranslatorVisitor visitor{env, block}; - - const Location pc_end{flow_block.end}; - Location pc{flow_block.begin}; - while (pc != pc_end) { +void Translate(Environment& env, IR::Block* block) { + if (block->IsVirtual()) { + return; + } + TranslatorVisitor visitor{env, *block}; + const Location pc_end{block->LocationEnd()}; + for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { @@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, default: throw LogicError("Invalid opcode {}", opcode); } - ++pc; } - return block; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index c1c21b2782..e1aa2e0f4b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,14 +6,9 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, - const Flow::Block& flow_block); +void Translate(Environment& env, IR::Block* block); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1170c61ee..9fba6ac239 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -132,6 +132,32 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->Opcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + template <typename Dest, typename Source> void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; @@ -160,6 +186,24 @@ void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); } +void FoldBranchConditional(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + // TODO: Convert to Branch + return; + } + const IR::Inst* cond_inst{cond.InstRecursive()}; + if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + const IR::Value true_label{inst.Arg(1)}; + const IR::Value false_label{inst.Arg(2)}; + // Remove negation on the conditional (take the parameter out of LogicalNot) and swap + // the branches + inst.SetArg(0, cond_inst->Arg(0)); + inst.SetArg(1, false_label); + inst.SetArg(2, true_label); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -178,6 +222,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldSelect<u32>(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: @@ -188,6 +236,8 @@ void ConstantPropagation(IR::Inst& inst) { } return (base >> shift) & ((1U << count) - 1); }); + case IR::Opcode::BranchConditional: + return FoldBranchConditional(inst); default: break; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 15a9db90a9..8ca996e935 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -34,6 +34,13 @@ struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + u32 index; +}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -43,6 +50,10 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { + return goto_vars[goto_variable.index]; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -61,6 +72,7 @@ struct DefTable { std::array<ValueMap, IR::NUM_USER_REGS> regs; std::array<ValueMap, IR::NUM_USER_PREDS> preds; + boost::container::flat_map<u32, ValueMap> goto_vars; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -68,15 +80,15 @@ struct DefTable { }; IR::Opcode UndefOpcode(IR::Reg) noexcept { - return IR::Opcode::Undef32; + return IR::Opcode::UndefU32; } IR::Opcode UndefOpcode(IR::Pred) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } IR::Opcode UndefOpcode(const FlagTag&) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { @@ -165,6 +177,9 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block, inst.Arg(1)); } break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -187,6 +202,9 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 8a5adf5a23..32b56eb57c 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Function& function) { for (const auto& block : function.blocks) { for (const IR::Inst& inst : *block) { + if (inst.Opcode() == IR::Opcode::Phi) { + // Skip validation on phi nodes + continue; + } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 9887e066d2..3ca1677c4a 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> #include <filesystem> #include <fmt/format.h> @@ -36,34 +37,46 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str())); }); - for (int i = 0; i < 300; ++i) { + auto block_pool{std::make_unique<ObjectPool<Flow::Block>>()}; + auto t0 = std::chrono::high_resolution_clock::now(); + int N = 1; + int n = 0; + for (int i = 0; i < N; ++i) { for (auto& env : map) { + ++n; // fmt::print(stdout, "Decoding {}\n", path.string()); + const Location start_address{0}; - auto cfg{std::make_unique<Flow::CFG>(*env, start_address)}; + block_pool->ReleaseContents(); + Flow::CFG cfg{*env, *block_pool, start_address}; // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; } } + auto t = std::chrono::high_resolution_clock::now(); + fmt::print(stdout, "{} ms", + std::chrono::duration_cast<std::chrono::milliseconds>(t - t0).count() / double(N)); } int main() { // RunDatabase(); + auto flow_block_pool{std::make_unique<ObjectPool<Flow::Block>>()}; auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()}; auto block_pool{std::make_unique<ObjectPool<IR::Block>>()}; - // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + // FileEnvironment env{"D:\\Shaders\\shader.bin"}; for (int i = 0; i < 1; ++i) { block_pool->ReleaseContents(); inst_pool->ReleaseContents(); - auto cfg{std::make_unique<Flow::CFG>(env, 0)}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; - // fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + flow_block_pool->ReleaseContents(); + Flow::CFG cfg{env, *flow_block_pool, 0}; + fmt::print(stdout, "{}\n", cfg.Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); Backend::SPIRV::EmitSPIRV spirv{program}; } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 0000000000..1760bf4a96 --- /dev/null +++ b/src/shader_recompiler/shader_info.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include <boost/container/static_vector.hpp> + +namespace Shader { + +struct Info { + struct ConstantBuffer { + + }; + + struct { + bool workgroup_id{}; + bool local_invocation_id{}; + bool fp16{}; + bool fp64{}; + } uses; + + std::array<18 +}; + +} // namespace Shader -- cgit v1.2.3-70-g09d2 From e87a502da2d5a8356a639d53c0a16a77890de4c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 24 Feb 2021 05:21:30 -0300 Subject: shader: Fix control flow --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 27 ++++++++++++++-------- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../frontend/ir/structured_control_flow.cpp | 15 ++++++++---- src/shader_recompiler/frontend/ir/value.cpp | 6 ++++- src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/control_flow.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 4 ++-- 8 files changed, 39 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8ba86e614e..0209d5540b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -134,18 +134,27 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U1 IREmitter::Condition(IR::Condition cond) { - if (cond == IR::Condition{true}) { - return Imm1(true); - } else if (cond == IR::Condition{false}) { - return Imm1(false); +static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { + switch (flow_test) { + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::F: + return ir.Imm1(false); + case FlowTest::EQ: + // TODO: Test this + return ir.GetZFlag(); + case FlowTest::NE: + // TODO: Test this + return ir.LogicalNot(ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); } +} + +U1 IREmitter::Condition(IR::Condition cond) { const FlowTest flow_test{cond.FlowTest()}; const auto [pred, is_negated]{cond.Pred()}; - if (flow_test == FlowTest::T) { - return GetPred(pred, is_negated); - } - throw NotImplementedException("Condition {}", cond); + return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } F32 IREmitter::GetAttribute(IR::Attribute attribute) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f2d71144a1..289e76f32b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -4,8 +4,8 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Phi, Opaque, ) -OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow OPCODE(Branch, Void, Label, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index 032ac8fda4..bfba55a7ed 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -329,7 +329,6 @@ private: if (!sibling) { throw LogicError("Not siblings"); } - // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it @@ -351,9 +350,14 @@ private: const std::unordered_map labels_map{BuildLabels(blocks)}; Tree& root{root_stmt.children}; auto insert_point{root.begin()}; + // Skip all goto variables zero-initialization + std::advance(insert_point, labels_map.size()); + for (Block* const block : blocks) { - ++insert_point; // Skip label - ++insert_point; // Skip set variable + // Skip label + ++insert_point; + // Skip set variable + ++insert_point; root.insert(insert_point, *pool.Create(block, &root_stmt)); if (block->IsTerminationBlock()) { @@ -391,6 +395,7 @@ private: labels_map.emplace(block, root.insert(root.end(), *label)); Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); ++label_id; } return labels_map; @@ -457,10 +462,10 @@ private: } body.erase(goto_stmt); - // Update nested if condition switch (label_nested_stmt->type) { case StatementType::If: - label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); break; case StatementType::Loop: break; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 718314213d..791ba26906 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -36,6 +36,10 @@ bool Value::IsIdentity() const noexcept { return type == Type::Opaque && inst->Opcode() == Opcode::Identity; } +bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->Opcode() == Opcode::Phi; +} + bool Value::IsEmpty() const noexcept { return type == Type::Void; } @@ -52,7 +56,7 @@ bool Value::IsLabel() const noexcept { } IR::Type Value::Type() const noexcept { - if (IsIdentity()) { + if (IsIdentity() || IsPhi()) { return inst->Arg(0).Type(); } if (type == Type::Opaque) { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8aba0bbf6e..9b7e1480be 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -33,6 +33,7 @@ public: explicit Value(f64 value) noexcept; [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e766b555bd..52be41b848 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -354,7 +354,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4e722e2059..672e140b24 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -209,7 +209,7 @@ public: void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); void PBK(); - void PCNT(u64 insn); + void PCNT(); void PEXIT(u64 insn); void PIXLD(u64 insn); void PLONGJMP(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4114e10bea..3f6dedfdd4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -637,8 +637,8 @@ void TranslatorVisitor::PBK() { // PBK is a no-op } -void TranslatorVisitor::PCNT(u64) { - ThrowNotImplemented(Opcode::PCNT); +void TranslatorVisitor::PCNT() { + // PCNT is a no-op } void TranslatorVisitor::PEXIT(u64) { -- cgit v1.2.3-70-g09d2 From 622d676202bad317a58529efc3c15d08fd04aad1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 24 Feb 2021 18:32:00 -0300 Subject: shader: Fix conditional execution of exit instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 +++++---- src/shader_recompiler/frontend/maxwell/control_flow.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 52be41b848..d0dc663307 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -322,12 +322,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond) { + EndClass insn_end_class, IR::Condition cond, + bool visit_conditional_inst) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -354,7 +355,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc + 1; + conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; @@ -423,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); return AnalysisState::Branch; } if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 8179787b87..209c9e5510 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -128,7 +128,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond); + IR::Condition cond, bool visit_conditional_inst); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, -- cgit v1.2.3-70-g09d2 From 71f96fa6366dc6dd306a953bca1b958fb32bc55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 14 Mar 2021 03:41:05 -0300 Subject: shader: Implement CAL inlining function calls --- src/shader_recompiler/CMakeLists.txt | 6 +- .../backend/spirv/emit_context.cpp | 6 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 17 +- src/shader_recompiler/frontend/ir/function.cpp | 5 - src/shader_recompiler/frontend/ir/function.h | 18 - src/shader_recompiler/frontend/ir/program.cpp | 18 +- src/shader_recompiler/frontend/ir/program.h | 5 +- .../frontend/ir/structured_control_flow.cpp | 744 -------------------- .../frontend/ir/structured_control_flow.h | 22 - .../frontend/maxwell/control_flow.cpp | 78 +-- .../frontend/maxwell/control_flow.h | 19 +- src/shader_recompiler/frontend/maxwell/program.cpp | 71 +- .../frontend/maxwell/structured_control_flow.cpp | 770 +++++++++++++++++++++ .../frontend/maxwell/structured_control_flow.h | 24 + .../frontend/maxwell/translate/impl/impl.h | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 4 +- .../ir_opt/collect_shader_info_pass.cpp | 8 +- .../ir_opt/constant_propagation_pass.cpp | 8 +- .../ir_opt/dead_code_elimination_pass.cpp | 10 +- .../global_memory_to_storage_buffer_pass.cpp | 12 +- .../ir_opt/identity_removal_pass.cpp | 4 +- .../ir_opt/lower_fp16_to_fp32.cpp | 8 +- src/shader_recompiler/ir_opt/passes.h | 18 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 5 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 12 +- src/shader_recompiler/ir_opt/verification_pass.cpp | 16 +- 26 files changed, 933 insertions(+), 977 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/function.cpp delete mode 100644 src/shader_recompiler/frontend/ir/function.h delete mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp delete mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.h (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 171fdd321d..20409e09a2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -27,8 +27,6 @@ add_library(shader_recompiler STATIC frontend/ir/condition.h frontend/ir/flow_test.cpp frontend/ir/flow_test.h - frontend/ir/function.cpp - frontend/ir/function.h frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp @@ -43,8 +41,6 @@ add_library(shader_recompiler STATIC frontend/ir/program.cpp frontend/ir/program.h frontend/ir/reg.h - frontend/ir/structured_control_flow.cpp - frontend/ir/structured_control_flow.h frontend/ir/type.cpp frontend/ir/type.h frontend/ir/value.cpp @@ -60,6 +56,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h + frontend/maxwell/structured_control_flow.cpp + frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp frontend/maxwell/translate/impl/common_encoding.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 278b26b503..f848c61757 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -262,10 +262,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } void EmitContext::DefineLabels(IR::Program& program) { - for (const IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - block->SetDefinition(OpLabel()); - } + for (IR::Block* const block : program.blocks) { + block->SetDefinition(OpLabel()); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c7cba62793..7e7db9161d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -10,7 +10,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" @@ -199,18 +198,14 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{profile, program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; - // FIXME: Forward declare functions (needs sirit support) - Id func{}; - for (IR::Function& function : program.functions) { - func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); - for (IR::Block* const block : function.blocks) { - ctx.AddLabel(block->Definition<Id>()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } + const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + ctx.AddLabel(block->Definition<Id>()); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); } - ctx.OpFunctionEnd(); } + ctx.OpFunctionEnd(); boost::container::small_vector<Id, 32> interfaces; const Info& info{program.info}; if (info.uses_workgroup_id) { diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp deleted file mode 100644 index d1fc9461d0..0000000000 --- a/src/shader_recompiler/frontend/ir/function.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h deleted file mode 100644 index d1f0611467..0000000000 --- a/src/shader_recompiler/frontend/ir/function.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <boost/container/small_vector.hpp> - -#include "shader_recompiler/frontend/ir/basic_block.h" - -namespace Shader::IR { - -struct Function { - BlockList blocks; - BlockList post_order_blocks; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 8c301c3a10..5f51aeb5f3 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -9,7 +9,8 @@ #include <fmt/format.h> -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::IR { @@ -19,18 +20,13 @@ std::string DumpProgram(const Program& program) { std::map<const IR::Inst*, size_t> inst_to_index; std::map<const IR::Block*, size_t> block_to_index; - for (const IR::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); - ++index; - } + for (const IR::Block* const block : program.blocks) { + block_to_index.emplace(block, index); + ++index; } std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } + for (const auto& block : program.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 98aab2dc68..bce8b19b3a 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -8,13 +8,14 @@ #include <boost/container/small_vector.hpp> -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { - boost::container::small_vector<Function, 1> functions; + BlockList blocks; + BlockList post_order_blocks; Info info; }; diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp deleted file mode 100644 index bfba55a7ed..0000000000 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <memory> -#include <ranges> -#include <string> -#include <unordered_map> -#include <utility> -#include <vector> - -#include <fmt/format.h> - -#include <boost/intrusive/list.hpp> - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::IR { -namespace { -struct Statement; - -// Use normal_link because we are not guaranteed to destroy the tree in order -using ListBaseHook = - boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; - -using Tree = boost::intrusive::list<Statement, - // Allow using Statement without a definition - boost::intrusive::base_hook<ListBaseHook>, - // Avoid linear complexity on splice, size is never called - boost::intrusive::constant_time_size<false>>; -using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; - -enum class StatementType { - Code, - Goto, - Label, - If, - Loop, - Break, - Return, - Function, - Identity, - Not, - Or, - SetVariable, - Variable, -}; - -bool HasChildren(StatementType type) { - switch (type) { - case StatementType::If: - case StatementType::Loop: - case StatementType::Function: - return true; - default: - return false; - } -} - -struct Goto {}; -struct Label {}; -struct If {}; -struct Loop {}; -struct Break {}; -struct Return {}; -struct FunctionTag {}; -struct Identity {}; -struct Not {}; -struct Or {}; -struct SetVariable {}; -struct Variable {}; - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement -#endif -struct Statement : ListBaseHook { - Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} - Statement(Goto, Statement* cond_, Node label_, Statement* up_) - : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} - Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} - Statement(If, Statement* cond_, Tree&& children_, Statement* up_) - : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} - Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) - : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} - Statement(Break, Statement* cond_, Statement* up_) - : cond{cond_}, up{up_}, type{StatementType::Break} {} - Statement(Return) : type{StatementType::Return} {} - Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} - Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} - Statement(Or, Statement* op_a_, Statement* op_b_) - : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} - Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) - : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} - Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} - - ~Statement() { - if (HasChildren(type)) { - std::destroy_at(&children); - } - } - - union { - Block* code; - Node label; - Tree children; - Condition guest_cond; - Statement* op; - Statement* op_a; - }; - union { - Statement* cond; - Statement* op_b; - u32 id; - }; - Statement* up{}; - StatementType type; -}; -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -std::string DumpExpr(const Statement* stmt) { - switch (stmt->type) { - case StatementType::Identity: - return fmt::format("{}", stmt->guest_cond); - case StatementType::Not: - return fmt::format("!{}", DumpExpr(stmt->op)); - case StatementType::Or: - return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); - case StatementType::Variable: - return fmt::format("goto_L{}", stmt->id); - default: - return "<invalid type>"; - } -} - -std::string DumpTree(const Tree& tree, u32 indentation = 0) { - std::string ret; - std::string indent(indentation, ' '); - for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { - switch (stmt->type) { - case StatementType::Code: - ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); - break; - case StatementType::Goto: - ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), - stmt->label->id); - break; - case StatementType::Label: - ret += fmt::format("{}L{}:\n", indent, stmt->id); - break; - case StatementType::If: - ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); - ret += DumpTree(stmt->children, indentation + 4); - ret += fmt::format("{} }}\n", indent); - break; - case StatementType::Loop: - ret += fmt::format("{} do {{\n", indent); - ret += DumpTree(stmt->children, indentation + 4); - ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); - break; - case StatementType::Break: - ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); - break; - case StatementType::Return: - ret += fmt::format("{} return;\n", indent); - break; - case StatementType::SetVariable: - ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); - break; - case StatementType::Function: - case StatementType::Identity: - case StatementType::Not: - case StatementType::Or: - case StatementType::Variable: - throw LogicError("Statement can't be printed"); - } - } - return ret; -} - -bool HasNode(const Tree& tree, ConstNode stmt) { - const auto end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { - return true; - } - } - return false; -} - -Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { - const ConstNode label_stmt{goto_stmt->label}; - const ConstNode end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { - return it; - } - } - throw LogicError("Lift label not in tree"); -} - -void SanitizeNoBreaks(const Tree& tree) { - if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { - throw NotImplementedException("Capturing statement with break nodes"); - } -} - -size_t Level(Node stmt) { - size_t level{0}; - Statement* node{stmt->up}; - while (node) { - ++level; - node = node->up; - } - return level; -} - -bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { - const size_t goto_level{Level(goto_stmt)}; - const size_t label_level{Level(label_stmt)}; - size_t min_level; - size_t max_level; - Node min; - Node max; - if (label_level < goto_level) { - min_level = label_level; - max_level = goto_level; - min = label_stmt; - max = goto_stmt; - } else { // goto_level < label_level - min_level = goto_level; - max_level = label_level; - min = goto_stmt; - max = label_stmt; - } - while (max_level > min_level) { - --max_level; - max = max->up; - } - return min->up == max->up; -} - -bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { - return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); -} - -bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { - ++offset; - - const auto end = tree.end(); - for (ConstNode it = tree.begin(); it != end; ++it) { - ++offset; - if (stmt == it) { - return true; - } - if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { - return true; - } - } - return false; -} - -class GotoPass { -public: - explicit GotoPass(std::span<Block* const> blocks, ObjectPool<Statement>& stmt_pool) - : pool{stmt_pool} { - std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); - } - } - - Statement& RootStatement() noexcept { - return root_stmt; - } - -private: - void RemoveGoto(Node goto_stmt) { - // Force goto_stmt and label_stmt to be directly related - const Node label_stmt{goto_stmt->label}; - if (IsIndirectlyRelated(goto_stmt, label_stmt)) { - // Move goto_stmt out using outward-movement transformation until it becomes - // directly related to label_stmt - while (!IsDirectlyRelated(goto_stmt, label_stmt)) { - goto_stmt = MoveOutward(goto_stmt); - } - } - // Force goto_stmt and label_stmt to be siblings - if (IsDirectlyRelated(goto_stmt, label_stmt)) { - const size_t label_level{Level(label_stmt)}; - size_t goto_level{Level(goto_stmt)}; - if (goto_level > label_level) { - // Move goto_stmt out of its level using outward-movement transformations - while (goto_level > label_level) { - goto_stmt = MoveOutward(goto_stmt); - --goto_level; - } - } else { // Level(goto_stmt) < Level(label_stmt) - if (Offset(goto_stmt) > Offset(label_stmt)) { - // Lift goto_stmt to above stmt containing label_stmt using goto-lifting - // transformations - goto_stmt = Lift(goto_stmt); - } - // Move goto_stmt into label_stmt's level using inward-movement transformation - while (goto_level < label_level) { - goto_stmt = MoveInward(goto_stmt); - ++goto_level; - } - } - } - // TODO: Remove this - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); - } - // goto_stmt and label_stmt are guaranteed to be siblings, eliminate - if (std::next(goto_stmt) == label_stmt) { - // Simply eliminate the goto if the label is next to it - goto_stmt->up->children.erase(goto_stmt); - } else if (Offset(goto_stmt) < Offset(label_stmt)) { - // Eliminate goto_stmt with a conditional - EliminateAsConditional(goto_stmt, label_stmt); - } else { - // Eliminate goto_stmt with a loop - EliminateAsLoop(goto_stmt, label_stmt); - } - } - - std::vector<Node> BuildUnorderedTreeGetGotos(std::span<Block* const> blocks) { - // Assume all blocks have two branches - std::vector<Node> gotos; - gotos.reserve(blocks.size() * 2); - - const std::unordered_map labels_map{BuildLabels(blocks)}; - Tree& root{root_stmt.children}; - auto insert_point{root.begin()}; - // Skip all goto variables zero-initialization - std::advance(insert_point, labels_map.size()); - - for (Block* const block : blocks) { - // Skip label - ++insert_point; - // Skip set variable - ++insert_point; - root.insert(insert_point, *pool.Create(block, &root_stmt)); - - if (block->IsTerminationBlock()) { - root.insert(insert_point, *pool.Create(Return{})); - continue; - } - const Condition cond{block->BranchCondition()}; - Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; - if (cond == Condition{true} || cond == Condition{false}) { - const bool is_true{cond == Condition{true}}; - const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; - const Node label{labels_map.at(branch)}; - Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_stmt)); - } else { - Statement* const ident_cond{pool.Create(Identity{}, cond)}; - const Node true_label{labels_map.at(block->TrueBranch())}; - const Node false_label{labels_map.at(block->FalseBranch())}; - Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; - Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_true)); - gotos.push_back(root.insert(insert_point, *goto_false)); - } - } - return gotos; - } - - std::unordered_map<const Block*, Node> BuildLabels(std::span<Block* const> blocks) { - // TODO: Consider storing labels intrusively inside the block - std::unordered_map<const Block*, Node> labels_map; - Tree& root{root_stmt.children}; - u32 label_id{0}; - for (const Block* const block : blocks) { - Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; - labels_map.emplace(block, root.insert(root.end(), *label)); - Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; - root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); - root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); - ++label_id; - } - return labels_map; - } - - void UpdateTreeUp(Statement* tree) { - for (Statement& stmt : tree->children) { - stmt.up = tree; - } - } - - void EliminateAsConditional(Node goto_stmt, Node label_stmt) { - Tree& body{goto_stmt->up->children}; - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); - Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; - Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - body.erase(goto_stmt); - } - - void EliminateAsLoop(Node goto_stmt, Node label_stmt) { - Tree& body{goto_stmt->up->children}; - Tree loop_body; - loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); - Statement* const cond{goto_stmt->cond}; - Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; - UpdateTreeUp(loop); - body.insert(goto_stmt, *loop); - body.erase(goto_stmt); - } - - [[nodiscard]] Node MoveOutward(Node goto_stmt) { - switch (goto_stmt->up->type) { - case StatementType::If: - return MoveOutwardIf(goto_stmt); - case StatementType::Loop: - return MoveOutwardLoop(goto_stmt); - default: - throw LogicError("Invalid outward movement"); - } - } - - [[nodiscard]] Node MoveInward(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const Node label{goto_stmt->label}; - const u32 label_id{label->id}; - - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - body.insert(goto_stmt, *set_var); - - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const neg_var{pool.Create(Not{}, variable)}; - if (!if_body.empty()) { - Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - } - body.erase(goto_stmt); - - switch (label_nested_stmt->type) { - case StatementType::If: - // Update nested if condition - label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); - break; - case StatementType::Loop: - break; - default: - throw LogicError("Invalid inward movement"); - } - Tree& nested_tree{label_nested_stmt->children}; - Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; - return nested_tree.insert(nested_tree.begin(), *new_goto); - } - - [[nodiscard]] Node Lift(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const Node label{goto_stmt->label}; - const u32 label_id{label->id}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; - - Tree loop_body; - loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); - SanitizeNoBreaks(loop_body); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; - UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; - - Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; - loop_stmt->children.push_front(*new_goto); - const Node new_goto_node{loop_stmt->children.begin()}; - - Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; - loop_stmt->children.push_back(*set_var); - - body.erase(goto_stmt); - return new_goto_node; - } - - Node MoveOutwardIf(Node goto_stmt) { - const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; - Tree& body{parent->children}; - const u32 label_id{goto_stmt->label->id}; - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; - body.insert(goto_stmt, *set_goto_var); - - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); - if_body.pop_front(); - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const neg_cond{pool.Create(Not{}, cond)}; - Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - - body.erase(goto_stmt); - - Statement* const new_cond{pool.Create(Variable{}, label_id)}; - Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; - Tree& parent_tree{parent->up->children}; - return parent_tree.insert(std::next(parent), *new_goto); - } - - Node MoveOutwardLoop(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const u32 label_id{goto_stmt->label->id}; - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; - body.insert(goto_stmt, *set_goto_var); - body.insert(goto_stmt, *break_stmt); - body.erase(goto_stmt); - - const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; - Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; - Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; - Tree& parent_tree{loop->up->children}; - return parent_tree.insert(std::next(loop), *new_goto); - } - - size_t Offset(ConstNode stmt) const { - size_t offset{0}; - if (!SearchNode(root_stmt.children, stmt, offset)) { - throw LogicError("Node not found in tree"); - } - return offset; - } - - ObjectPool<Statement>& pool; - Statement root_stmt{FunctionTag{}}; -}; - -Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; - while (forward_node != end && !HasChildren(forward_node->type)) { - if (forward_node->type == StatementType::Code) { - return forward_node->code; - } - ++forward_node; - } - return nullptr; -} - -[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { - switch (stmt.type) { - case StatementType::Identity: - return ir.Condition(stmt.guest_cond); - case StatementType::Not: - return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); - case StatementType::Or: - return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); - case StatementType::Variable: - return ir.GetGotoVariable(stmt.id); - default: - throw NotImplementedException("Statement type {}", stmt.type); - } -} - -class TranslatePass { -public: - TranslatePass(ObjectPool<Inst>& inst_pool_, ObjectPool<Block>& block_pool_, - ObjectPool<Statement>& stmt_pool_, Statement& root_stmt, - const std::function<void(IR::Block*)>& func_, BlockList& block_list_) - : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, - block_list{block_list_} { - Visit(root_stmt, nullptr, nullptr); - } - -private: - void Visit(Statement& parent, Block* continue_block, Block* break_block) { - Tree& tree{parent.children}; - Block* current_block{nullptr}; - - for (auto it = tree.begin(); it != tree.end(); ++it) { - Statement& stmt{*it}; - switch (stmt.type) { - case StatementType::Label: - // Labels can be ignored - break; - case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IREmitter ir{*current_block}; - ir.Branch(stmt.code); - } - current_block = stmt.code; - func(stmt.code); - block_list.push_back(stmt.code); - break; - } - case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } - IREmitter ir{*current_block}; - ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); - break; - } - case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - Block* const merge_block{MergeBlock(parent, stmt)}; - - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - - // Implement if header block - Block* const first_if_block{block_list.at(first_block_index)}; - IREmitter ir{*current_block}; - const U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); - - current_block = merge_block; - break; - } - case StatementType::Loop: { - Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - IREmitter{*current_block}.Branch(loop_header_block); - } - block_list.push_back(loop_header_block); - - Block* const new_continue_block{block_pool.Create(inst_pool)}; - Block* const merge_block{MergeBlock(parent, stmt)}; - - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); - - // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); - - // Implement loop header block - Block* const first_loop_block{block_list.at(first_block_index)}; - IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); - - // Implement continue block - IREmitter continue_ir{*new_continue_block}; - const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); - - current_block = merge_block; - break; - } - case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - Block* const skip_block{MergeBlock(parent, stmt)}; - - IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - - current_block = skip_block; - break; - } - case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IREmitter{*current_block}.Return(); - current_block = nullptr; - break; - } - default: - throw NotImplementedException("Statement type {}", stmt.type); - } - } - if (current_block && continue_block) { - IREmitter ir{*current_block}; - ir.Branch(continue_block); - } - } - - Block* MergeBlock(Statement& parent, Statement& stmt) { - if (Block* const block{TryFindForwardBlock(stmt)}) { - return block; - } - // Create a merge block we can visit later - Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; - } - - ObjectPool<Statement>& stmt_pool; - ObjectPool<Inst>& inst_pool; - ObjectPool<Block>& block_pool; - const std::function<void(IR::Block*)>& func; - BlockList& block_list; -}; -} // Anonymous namespace - -BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, - std::span<Block* const> unordered_blocks, - const std::function<void(Block*)>& func) { - ObjectPool<Statement> stmt_pool{64}; - GotoPass goto_pass{unordered_blocks, stmt_pool}; - BlockList block_list; - TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), - func, block_list}; - return block_list; -} - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h deleted file mode 100644 index a574c24f77..0000000000 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <functional> -#include <span> - -#include <boost/intrusive/list.hpp> - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::IR { - -[[nodiscard]] BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, - std::span<Block* const> unordered_blocks, - const std::function<void(Block*)>& func); - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index d0dc663307..715c0e92d8 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -31,13 +31,12 @@ struct Compare { return lhs.begin < rhs.begin; } }; -} // Anonymous namespace -static u32 BranchOffset(Location pc, Instruction inst) { +u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static void Split(Block* old_block, Block* new_block, Location pc) { +void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } @@ -49,21 +48,19 @@ static void Split(Block* old_block, Block* new_block, Location pc) { .cond{old_block->cond}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, - .ir{nullptr}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, .stack{std::move(old_block->stack)}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{new_block}, .branch_false{nullptr}, - .ir{nullptr}, }; } -static Token OpcodeToken(Opcode opcode) { +Token OpcodeToken(Opcode opcode) { switch (opcode) { case Opcode::PBK: case Opcode::BRK: @@ -89,7 +86,7 @@ static Token OpcodeToken(Opcode opcode) { } } -static bool IsAbsoluteJump(Opcode opcode) { +bool IsAbsoluteJump(Opcode opcode) { switch (opcode) { case Opcode::JCAL: case Opcode::JMP: @@ -100,7 +97,7 @@ static bool IsAbsoluteJump(Opcode opcode) { } } -static bool HasFlowTest(Opcode opcode) { +bool HasFlowTest(Opcode opcode) { switch (opcode) { case Opcode::BRA: case Opcode::BRX: @@ -121,13 +118,14 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string NameOf(const Block& block) { +std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } } +} // Anonymous namespace void Stack::Push(Token token, Location target) { entries.push_back({ @@ -166,26 +164,24 @@ bool Block::Contains(Location pc) const noexcept { return pc >= begin && pc < end; } -Function::Function(Location start_address) +Function::Function(ObjectPool<Block>& block_pool, Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block{nullptr}, + .block{block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{true}, + .branch_true{nullptr}, + .branch_false{nullptr}, + })}, .stack{}, }} {} CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_} { - functions.emplace_back(start_address); - functions.back().labels.back().block = block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .stack{}, - .cond{IR::Condition{true}}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .ir{nullptr}, - }); + functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -308,11 +304,17 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist - if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.emplace_back(cal_pc); + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + const bool exists{it != functions.end()}; + const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + if (!exists) { + functions.emplace_back(block_pool, cal_pc); } - // Handle CAL like a regular instruction - break; + block->end_class = EndClass::Call; + block->function_call = call_id; + block->return_block = AddLabel(block, block->stack, pc + 1, function_id); + block->end = pc; + return AnalysisState::Branch; } default: break; @@ -348,7 +350,6 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block}, .branch_false{nullptr}, - .ir{nullptr}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -401,16 +402,6 @@ void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } -void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { - const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - // Technically CAL pushes into PRET, but that's implicit in the function call for us - // Insert the function to the function list if it doesn't exist - const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; - if (it == functions.end()) { - functions.emplace_back(cal_pc); - } -} - CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; @@ -455,10 +446,9 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .end{pc}, .end_class{EndClass::Branch}, .stack{stack}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{nullptr}, .branch_false{nullptr}, - .ir{nullptr}, })}; function.labels.push_back(Label{ .address{pc}, @@ -495,6 +485,14 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::Call: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); + dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", + node_uid, block.function_call); + dot += '\n'; + ++node_uid; + break; case EndClass::Exit: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 209c9e5510..fe74f210fb 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -20,16 +20,13 @@ #include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { -class Block; -} - namespace Shader::Maxwell::Flow { using FunctionId = size_t; enum class EndClass { Branch, + Call, Exit, Return, }; @@ -75,9 +72,14 @@ struct Block : boost::intrusive::set_base_hook< EndClass end_class; Stack stack; IR::Condition cond; - Block* branch_true; - Block* branch_false; - IR::Block* ir; + union { + Block* branch_true; + FunctionId function_call; + }; + union { + Block* branch_false; + Block* return_block; + }; }; struct Label { @@ -87,7 +89,7 @@ struct Label { }; struct Function { - Function(Location start_address); + explicit Function(ObjectPool<Block>& block_pool, Location start_address); Location entrypoint; boost::container::small_vector<Label, 16> labels; @@ -137,7 +139,6 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); - void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b270bbccdb..8bfa643268 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,67 +8,44 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { -namespace { -IR::BlockList TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::Function& cfg_function) { - const size_t num_blocks{cfg_function.blocks.size()}; - std::vector<IR::Block*> blocks(cfg_function.blocks.size()); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - const u32 begin{cfg_block.begin.Offset()}; - const u32 end{cfg_block.end.Offset()}; - blocks[i] = block_pool.Create(inst_pool, begin, end); - cfg_block.ir = blocks[i]; - ++i; - }); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - IR::Block* const block{blocks[i]}; - ++i; - if (cfg_block.end_class != Flow::EndClass::Branch) { - block->SetReturn(); - } else if (cfg_block.cond == IR::Condition{true}) { - block->SetBranch(cfg_block.branch_true->ir); - } else if (cfg_block.cond == IR::Condition{false}) { - block->SetBranch(cfg_block.branch_false->ir); - } else { - block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, - cfg_block.branch_false->ir); - } + +static void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const IR::BlockList& post_order{program.post_order_blocks}; + std::erase_if(program.blocks, [&](IR::Block* block) { + return std::ranges::find(post_order, block) == post_order.end(); }); - return IR::VisitAST(inst_pool, block_pool, blocks, - [&](IR::Block* block) { Translate(env, block); }); } -} // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - auto& functions{program.functions}; - functions.reserve(cfg.Functions().size()); - for (Flow::Function& cfg_function : cfg.Functions()) { - functions.push_back(IR::Function{ - .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, - .post_order_blocks{}, - }); - } + program.blocks = VisitAST(inst_pool, block_pool, env, cfg); + program.post_order_blocks = PostOrder(program.blocks); + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite Optimization::LowerFp16ToFp32(program); - for (IR::Function& function : functions) { - function.post_order_blocks = PostOrder(function.blocks); - Optimization::SsaRewritePass(function.post_order_blocks); - } + + Optimization::SsaRewritePass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - for (IR::Function& function : functions) { - Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); - Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); - Optimization::IdentityRemovalPass(function); - Optimization::VerificationPass(function); - } + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::IdentityRemovalPass(program); + Optimization::VerificationPass(program); Optimization::CollectShaderInfoPass(program); return program; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 0000000000..5f5d9cf173 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -0,0 +1,770 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <ranges> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; + +using Tree = boost::intrusive::list<Statement, + // Allow using Statement without a definition + boost::intrusive::base_hook<ListBaseHook>, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size<false>>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + IR::Block* code; + Node label; + Tree children; + IR::Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return "<invalid type>"; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(Flow::CFG& cfg, ObjectPool<IR::Inst>& inst_pool_, + ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool) + : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + { + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + } + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector<Node> BuildTree(Flow::CFG& cfg) { + u32 label_id{0}; + std::vector<Node> gotos; + Flow::Function& first_function{cfg.Functions().front()}; + BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); + return gotos; + } + + void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, + std::vector<Node>& gotos, Node function_insert_point, + std::optional<Node> return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Tree& root{root_stmt.children}; + std::unordered_map<Flow::Block*, Node> local_labels; + local_labels.reserve(function.blocks.size()); + + for (Flow::Block& block : function.blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); + ++label_id; + } + for (Flow::Block& block : function.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + + const u32 begin_offset{block.begin.Offset()}; + const u32 end_offset{block.end.Offset()}; + IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; + root.insert(ip, *pool.Create(ir_block, &root_stmt)); + + switch (block.end_class) { + case Flow::EndClass::Branch: { + Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + if (block.cond == IR::Condition{true}) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition{false}) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case Flow::EndClass::Call: { + Flow::Function& call{cfg.Functions()[block.function_call]}; + const Node call_return_label{local_labels.at(block.return_block)}; + BuildTree(cfg, call, label_id, gotos, ip, call_return_label); + break; + } + case Flow::EndClass::Exit: + root.insert(ip, *pool.Create(Return{})); + break; + case Flow::EndClass::Return: { + Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + break; + } + } + } + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + switch (label_nested_stmt->type) { + case StatementType::If: + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool<IR::Inst>& inst_pool; + ObjectPool<IR::Block>& block_pool; + ObjectPool<Statement>& pool; + Statement root_stmt{FunctionTag{}}; +}; + +IR::Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, + ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, + IR::BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + Tree& tree{parent.children}; + IR::Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IR::IREmitter{*current_block}.Branch(stmt.code); + } + current_block = stmt.code; + Translate(env, stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + IR::Block* const first_if_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*current_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IR::IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + IR::Block* const first_loop_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IR::IREmitter continue_ir{*new_continue_block}; + const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const skip_block{MergeBlock(parent, stmt)}; + + IR::IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IR::IREmitter{*current_block}.Branch(continue_block); + } + } + + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + if (IR::Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + IR::Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool<Statement>& stmt_pool; + ObjectPool<IR::Inst>& inst_pool; + ObjectPool<IR::Block>& block_pool; + Environment& env; + IR::BlockList& block_list; +}; +} // Anonymous namespace + +IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg) { + ObjectPool<Statement> stmt_pool{64}; + GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::BlockList block_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; + return block_list; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 0000000000..e4797291e2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -0,0 +1,24 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <span> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + Flow::CFG& cfg); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c6253c40c1..45d6f5e060 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -62,7 +62,7 @@ public: void BRA(u64 insn); void BRK(u64 insn); void BRX(u64 insn); - void CAL(u64 insn); + void CAL(); void CCTL(u64 insn); void CCTLL(u64 insn); void CONT(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 01ecbb4cc7..92da5c7e83 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,8 +65,8 @@ void TranslatorVisitor::BRX(u64) { ThrowNotImplemented(Opcode::BRX); } -void TranslatorVisitor::CAL(u64) { - ThrowNotImplemented(Opcode::CAL); +void TranslatorVisitor::CAL() { + // CAL is a no-op } void TranslatorVisitor::CCTL(u64) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 70d75ad6cc..708b6b2672 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -296,11 +296,9 @@ void Visit(Info& info, IR::Inst& inst) { void CollectShaderInfoPass(IR::Program& program) { Info& info{program.info}; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - Visit(info, inst); - } + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); } } } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7ba9ebe9bb..a39db2bf12 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -371,9 +371,11 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } } // Anonymous namespace -void ConstantPropagationPass(IR::Block& block) { - for (IR::Inst& inst : block) { - ConstantPropagation(block, inst); +void ConstantPropagationPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + ConstantPropagation(*block, inst); + } } } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 132b2012a8..8ad59f42e0 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -10,12 +10,14 @@ namespace Shader::Optimization { -void DeadCodeEliminationPass(IR::Block& block) { +void DeadCodeEliminationPass(IR::Program& program) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (IR::Inst& inst : block | std::views::reverse) { - if (!inst.HasUses() && !inst.MayHaveSideEffects()) { - inst.Invalidate(); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions() | std::views::reverse) { + if (!inst.HasUses() && !inst.MayHaveSideEffects()) { + inst.Invalidate(); + } } } } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 5d98d278e2..1faa1ec88b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,14 +351,12 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsGlobalMemory(inst)) { - continue; - } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); } } Info& info{program.info}; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 593efde398..8790b48f21 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -10,10 +10,10 @@ namespace Shader::Optimization { -void IdentityRemovalPass(IR::Function& function) { +void IdentityRemovalPass(IR::Program& program) { std::vector<IR::Inst*> to_invalidate; - for (IR::Block* const block : function.blocks) { + for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 14a5cb50f2..74acb8bb6b 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -77,11 +77,9 @@ IR::Opcode Replace(IR::Opcode op) { } // Anonymous namespace void LowerFp16ToFp32(IR::Program& program) { - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); - } + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); } } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 3b7e7306bb..5c1fc166ce 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,26 +8,18 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { -template <typename Func> -void PostOrderInvoke(Func&& func, IR::Function& function) { - for (const auto& block : function.post_order_blocks) { - func(*block); - } -} - void CollectShaderInfoPass(IR::Program& program); -void ConstantPropagationPass(IR::Block& block); -void DeadCodeEliminationPass(IR::Block& block); +void ConstantPropagationPass(IR::Program& program); +void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); -void IdentityRemovalPass(IR::Function& function); +void IdentityRemovalPass(IR::Program& program); void LowerFp16ToFp32(IR::Program& program); -void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); +void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); -void VerificationPass(const IR::Function& function); +void VerificationPass(const IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 19d35b1f83..f89fd51c8a 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,7 +23,6 @@ #include <boost/container/flat_set.hpp> #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" @@ -262,9 +261,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { } } // Anonymous namespace -void SsaRewritePass(std::span<IR::Block* const> post_order_blocks) { +void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { VisitBlock(pass, block); } } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index ec802e02c5..de9d633e28 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -164,14 +164,12 @@ private: void TexturePass(Environment& env, IR::Program& program) { TextureInstVector to_replace; - for (IR::Function& function : program.functions) { - for (IR::Block* const block : function.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (!IsTextureInstruction(inst)) { - continue; - } - to_replace.push_back(MakeInst(env, block, inst)); + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; } + to_replace.push_back(MakeInst(env, block, inst)); } } // Sort instructions to visit textures by constant buffer index, then by offset diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 32b56eb57c..4080b37cca 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -11,8 +11,8 @@ namespace Shader::Optimization { -static void ValidateTypes(const IR::Function& function) { - for (const auto& block : function.blocks) { +static void ValidateTypes(const IR::Program& program) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { if (inst.Opcode() == IR::Opcode::Phi) { // Skip validation on phi nodes @@ -30,9 +30,9 @@ static void ValidateTypes(const IR::Function& function) { } } -static void ValidateUses(const IR::Function& function) { +static void ValidateUses(const IR::Program& program) { std::map<IR::Inst*, int> actual_uses; - for (const auto& block : function.blocks) { + for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { @@ -45,14 +45,14 @@ static void ValidateUses(const IR::Function& function) { } for (const auto [inst, uses] : actual_uses) { if (inst->UseCount() != uses) { - throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); + throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); } } } -void VerificationPass(const IR::Function& function) { - ValidateTypes(function); - ValidateUses(function); +void VerificationPass(const IR::Program& program) { + ValidateTypes(program); + ValidateUses(program); } } // namespace Shader::Optimization -- cgit v1.2.3-70-g09d2 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/spirv/emit_context.cpp | 64 ++- src/shader_recompiler/backend/spirv/emit_context.h | 18 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 44 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 18 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 55 ++- .../backend/spirv/emit_spirv_control_flow.cpp | 23 +- src/shader_recompiler/environment.h | 14 + src/shader_recompiler/frontend/ir/attribute.cpp | 2 +- src/shader_recompiler/frontend/ir/attribute.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 + src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 11 +- src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/ir/reg.h | 4 +- .../frontend/maxwell/control_flow.cpp | 31 +- .../frontend/maxwell/control_flow.h | 3 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 18 + .../frontend/maxwell/translate/impl/exit.cpp | 15 - .../maxwell/translate/impl/exit_program.cpp | 43 ++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++- .../maxwell/translate/impl/not_implemented.cpp | 16 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 60 ++- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/program_header.h | 143 +++++++ src/shader_recompiler/recompiler.cpp | 28 -- src/shader_recompiler/recompiler.h | 20 - src/shader_recompiler/shader_info.h | 10 + src/shader_recompiler/stage.h | 19 + src/video_core/CMakeLists.txt | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 24 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 162 ++++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++-------- .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 +++++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 346 ++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 82 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 47 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 +++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +--- src/video_core/renderer_vulkan/vk_texture_cache.h | 29 +- src/video_core/vulkan_common/vulkan_device.cpp | 15 + 54 files changed, 1927 insertions(+), 566 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp create mode 100644 src/shader_recompiler/program_header.h delete mode 100644 src/shader_recompiler/recompiler.cpp delete mode 100644 src/shader_recompiler/recompiler.h create mode 100644 src/shader_recompiler/stage.h create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b870e99378..31c3941064 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h + program_header.h profile.h - recompiler.cpp - recompiler.h shader_info.h ) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 204389d749..6c79b611bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineSpecialVariables(program.info); - - u32 binding{}; + DefineInterfaces(program.info, program.stage); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); - DefineLabels(program); } @@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) { return Constant(F32[1], value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); + case IR::Type::Label: + return value.Label()->Definition<Id>(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + if (info.uses_int8) { AddCapability(spv::Capability::Int8); U8 = Name(TypeInt(8, false), "u8"); @@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineSpecialVariables(const Info& info) { - const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; - Decorate(id, spv::Decoration::BuiltIn, builtin); - return id; - }}; +void EmitContext::DefineInterfaces(const Info& info, Stage stage) { + const auto define{ + [this](Id type, std::optional<spv::BuiltIn> builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + interfaces.push_back(id); + return id; + }}; using namespace std::placeholders; const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; if (info.uses_workgroup_id) { workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); @@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = define_input(F32[4], built_in); + } + for (size_t i = 0; i < info.loads_generics.size(); ++i) { + if (info.loads_generics[i]) { + // FIXME: Declare size from input + input_generics[i] = define_input(F32[4], std::nullopt); + Decorate(input_generics[i], spv::Decoration::Location, static_cast<u32>(i)); + Name(input_generics[i], fmt::format("in_attr{}", i)); + } + } + if (info.stores_position) { + output_position = define_output(F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = define_output(F32[4], std::nullopt); + Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = define_output(F32[4], std::nullopt); + Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 35eca258a8..2d7961ac3b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -46,7 +46,7 @@ struct UniformDefinitions { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -71,6 +71,9 @@ public: UniformDefinitions uniform_types; + Id input_f32{}; + Id output_f32{}; + Id storage_u32{}; std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; @@ -80,10 +83,21 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id input_position{}; + std::array<Id, 32> input_generics{}; + + Id output_position{}; + std::array<Id, 32> output_generics{}; + + std::array<Id, 8> frag_color{}; + Id frag_depth {}; + + std::vector<Id> interfaces; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineSpecialVariables(const Info& info); + void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50c0f72437..b8978b94a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.U32(); } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { return arg.Label(); + } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { + return arg.Attribute(); } } @@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{profile, program}; +std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, + u32& binding) { + EmitContext ctx{profile, program, binding}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { @@ -208,28 +211,41 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program } } ctx.OpFunctionEnd(); - boost::container::small_vector<Id, 32> interfaces; - const Info& info{program.info}; - if (info.uses_workgroup_id) { - interfaces.push_back(ctx.workgroup_id); + + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array<u32, 3> workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; } - if (info.uses_local_invocation_id) { - interfaces.push_back(ctx.local_invocation_id); + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); - - const std::array<u32, 3> workgroup_size{env.WorkgroupSize()}; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], - workgroup_size[2]); + ctx.AddEntryPoint(execution_model, func, "main", interfaces); SetupDenormControl(profile, program, ctx, func); + const Info& info{program.info}; if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { ctx.AddCapability(spv::Capability::SparseResidency); } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89566c83d0..ae121f5344 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,18 +16,18 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program); + IR::Program& program, u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, IR::Block* label); -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx); -void EmitSetAttribute(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitGetAttributeIndexed(EmitContext& ctx); void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 125b58cf74..02d1157403 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -5,6 +5,43 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast<u32>(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast<u32>(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} +} // Anonymous namespace void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); @@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } -void EmitGetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } -void EmitSetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { + ctx.OpStore(OutputAttrPointer(ctx, attr), value); } void EmitGetAttributeIndexed(EmitContext&) { @@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 48755b8278..6b81f01694 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,26 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, IR::Block* label) { - ctx.OpBranch(label->Definition<Id>()); +void EmitBranch(EmitContext& ctx, Id label) { + ctx.OpBranch(label); } -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { - ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>()); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { + ctx.OpBranchConditional(condition, true_label, false_label); } -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { - ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(), - spv::LoopControlMask::MaskNone); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { + ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); } -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { - ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); } void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(continue_label); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0fcb68050d..1fcaa56dda 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,6 +3,8 @@ #include <array> #include "common/common_types.h" +#include "shader_recompiler/stage.h" +#include "shader_recompiler/program_header.h" namespace Shader { @@ -15,6 +17,18 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; + + [[nodiscard]] const ProgramHeader& SPH() const noexcept { + return sph; + } + + [[nodiscard]] Stage ShaderStage() const noexcept { + return stage; + } + +protected: + ProgramHeader sph{}; + Stage stage{}; }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576ff..4811242ea0 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; } -int GenericAttributeIndex(Attribute attribute) { +u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6afd..34ec7e0cd0 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -224,7 +224,7 @@ enum class Attribute : u64 { [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; -[[nodiscard]] int GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160d..672836c0b7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,12 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::DemoteToHelperInvocation(Block* continue_label) { + block->SetBranch(continue_label); + continue_label->AddImmediatePredecessor(block); + Inst(Opcode::DemoteToHelperInvocation, continue_label); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst<U32>(Opcode::GetRegister, reg); } @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + U32 IREmitter::WorkgroupIdX() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b4..72af5db377 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -36,6 +36,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void DemoteToHelperInvocation(Block* continue_label); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -67,6 +68,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); + void SetFragDepth(const F32& value); + [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2f..21b7d8a9f4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::DemoteToHelperInvocation: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetFragColor: + case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: case Opcode::WriteGlobalU16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903d..593faca528 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(DemoteToHelperInvocation, Void, Label, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU64, U64, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, F32, ) +OPCODE(GetAttributeIndexed, F32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3a..733513c8b1 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" namespace Shader::IR { @@ -17,6 +18,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + Stage stage{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7be..3845ec5fb2 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } -[[nodiscard]] constexpr Reg operator++(Reg& reg) { +constexpr Reg operator++(Reg& reg) { reg = reg + 1; return reg; } -[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { +constexpr Reg operator++(Reg& reg, int) { const Reg copy{reg}; reg = reg + 1; return copy; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d8..4f6707fae4 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast<IR::Pred>(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210fb..22f1341944 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa643268..0074eb89b6 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo IR::Program program; program.blocks = VisitAST(inst_pool, block_pool, env, cfg); program.post_order_blocks = PostOrder(program.blocks); + program.stage = env.ShaderStage(); RemoveUnreachableBlocks(program); // Replace instructions before the SSA rewrite diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 5f5d9cf173..cec03e73ee 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -45,6 +45,7 @@ enum class StatementType { Loop, Break, Return, + Kill, Function, Identity, Not, @@ -70,6 +71,7 @@ struct If {}; struct Loop {}; struct Break {}; struct Return {}; +struct Kill {}; struct FunctionTag {}; struct Identity {}; struct Not {}; @@ -93,6 +95,7 @@ struct Statement : ListBaseHook { Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} + Statement(Kill) : type{StatementType::Kill} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -174,6 +177,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Return: ret += fmt::format("{} return;\n", indent); break; + case StatementType::Kill: + ret += fmt::format("{} kill;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d18..0000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 0000000000..ea9b33da9b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b6..ed81d9c369 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4e..2922145eec 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef541..59252bcc50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c648..0fbb87ec43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b007..54f0df7547 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b2672..fbbe286322 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { return; } info.constant_buffer_mask |= 1U << index; - info.constant_buffer_descriptors.push_back({ - .index{index}, - .count{1}, - }); + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void GetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.loads_position = true; + break; + default: + throw NotImplementedException("Get attribute {}", attribute); + } +} + +void SetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.stores_position = true; + break; + default: + throw NotImplementedException("Set attribute {}", attribute); + } } void VisitUsages(Info& info, IR::Inst& inst) { @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + GetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetAttribute: + SetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; break; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec366..bab7ca1868 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -169,7 +169,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op == same || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { // Unique value or self-reference continue; } diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 0000000000..1544bfa427 --- /dev/null +++ b/src/shader_recompiler/program_header.h @@ -0,0 +1,143 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +enum class PixelImap : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html +struct ProgramHeader { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + }; + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + } vtg; + + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; + u8 raw; + } imap_generic_vector[32]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved + + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + + [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; + } + + [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { + const auto& vector{imap_generic_vector[attribute]}; + return {vector.x, vector.y, vector.z, vector.w}; + } + } ps; + + std::array<u32, 0xf> raw; + }; + + [[nodiscard]] u64 LocalMemorySize() const noexcept { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } +}; +static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp deleted file mode 100644 index 527e19c272..0000000000 --- a/src/shader_recompiler/recompiler.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <vector> - -#include "common/common_types.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/object_pool.h" -#include "shader_recompiler/recompiler.h" - -namespace Shader { - -std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env, - u32 start_address) { - ObjectPool<Maxwell::Flow::Block> flow_block_pool; - ObjectPool<IR::Inst> inst_pool; - ObjectPool<IR::Block> block_pool; - - Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; - IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; -} - -} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h deleted file mode 100644 index 2529463aec..0000000000 --- a/src/shader_recompiler/recompiler.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <utility> -#include <vector> - -#include "common/common_types.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/shader_info.h" - -namespace Shader { - -[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, - Environment& env, u32 start_address); - -} // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index adc1d9a64a..6eff762e2c 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -56,6 +56,15 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + + std::array<bool, 32> loads_generics{}; + bool loads_position{}; + + std::array<bool, 8> stores_frag_color{}; + bool stores_frag_depth{}; + std::array<bool, 32> stores_generics{}; + bool stores_position{}; + bool uses_fp16{}; bool uses_fp64{}; bool uses_fp16_denorms_flush{}; @@ -68,6 +77,7 @@ struct Info { bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + bool uses_demote_to_helper_invocation{}; IR::Type used_constant_buffer_types{}; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 0000000000..fc6ce60435 --- /dev/null +++ b/src/shader_recompiler/stage.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +enum class Stage { + Compute, + VertexA, + VertexB, + TessellationControl, + TessellationEval, + Geometry, + Fragment, +}; + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e69169..71b07c1940 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -116,15 +117,18 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f015..d8f6839072 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast<u32>(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast<u8>(rt.format); }); alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); point_size = Common::BitCast<u32>(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68d..348f1d6ce6 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array<bool, 4> Mask() const noexcept { + std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array<u8, Maxwell::NumRenderTargets> color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e94..dc4ff0da2b 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38a..9f78e15b6a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 0000000000..0a59aa6593 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> + +#include <boost/container/small_vector.hpp> + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; + boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ffc..6684d37a6e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include <boost/container/small_vector.hpp> +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array<ImageId, max_elements> image_view_ids; boost::container::static_vector<u32, max_elements> image_view_indices; - boost::container::static_vector<VkSampler, max_elements> sampler_handles; + boost::container::static_vector<VkSampler, max_elements> samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4b..e82e5816b8 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 0000000000..a2ec418b12 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <span> + +#include <boost/container/small_vector.hpp> +#include <boost/container/static_vector.hpp> + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span<const Shader::Info> infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template <class StencilFace> +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array<ImageId, max_images_elements> image_view_ids; + static_vector<u32, max_images_elements> image_view_indices; + static_vector<VkSampler, max_images_elements> samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read<u32>(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; + static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast<u32>(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast<u32>(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast<u32>(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast<VkCullModeFlags>( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast<u32>(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector<VkDynamicState, 17> dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast<u32>(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 0000000000..ba1d34a837 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array<vk::ShaderModule, NUM_STAGES> stages, + const std::array<const Shader::Info*, NUM_STAGES>& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array<vk::ShaderModule, NUM_STAGES> spv_modules; + std::array<Shader::Info, NUM_STAGES> stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b062884035..0000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903b..c9da2080d4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional<u128> Analyze(u32 start_address) { + std::optional<u128> Analyze(u32 start_address) { const std::optional<u64> size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique<u64[]>(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read<u64>(program_base + address); - } - - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array<u32, 3> WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + return gpu_memory->Read<u64>(program_base + address); } -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional<u64> TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional<u64> TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector<u64> code; - u32 read_lowest = 0; + u32 read_lowest = std::numeric_limits<u32>::max(); u32 read_highest = 0; - std::vector<u64> code; u32 cached_lowest = std::numeric_limits<u32>::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array<u32, 3> WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array<u32, 3> WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); + return static_cast<size_t>(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast<Maxwell::ShaderProgram>(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique<ShaderInfo>(); + if (const std::optional<u128> cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> envs; + std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast<Maxwell::ShaderProgram>(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; + std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector<u32> code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27f..60fb976dfa 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include <utility> #include <vector> -#include <boost/functional/hash.hpp> - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>) static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>); static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); +struct GraphicsPipelineCacheKey { + std::array<u128, 6> unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash<Vulkan::ComputePipelineCacheKey> { } }; +template <> +struct hash<Vulkan::GraphicsPipelineCacheKey> { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector<ComputePipelineCacheKey> compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; - [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; + std::unordered_map<GraphicsPipelineCacheKey, GraphicsPipeline> graphics_cache; + + Shader::ObjectPool<Shader::IR::Inst> inst_pool; + Shader::ObjectPool<Shader::IR::Block> block_pool; + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29c..036b531b92 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b9155..88dbd753b0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 0000000000..7e5ae43ea9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> + +#include <boost/container/static_vector.hpp> + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector<VkAttachmentDescription, 9> descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 0000000000..db8e83f1aa --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array<VideoCore::Surface::PixelFormat, 8> color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash<Vulkan::RenderPassKey> { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast<size_t>(key.depth_format) << 48; + value ^= static_cast<size_t>(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map<RenderPassKey, vk::RenderPass> cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f51..1bbc542a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { if (color == std::array<float, 4>{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector<VkAttachmentDescription> descriptions; std::vector<VkImageView> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass_key.color_formats[index] = PixelFormat::Invalid; continue; } - descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM ++num_images; } const size_t num_colors = attachments.size(); - const VkAttachmentReference* depth_attachment = - depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; if (depth_buffer) { - descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); attachments.push_back(depth_buffer->RenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM } renderpass_key.samples = samples; - const auto& device = runtime.device.GetLogical(); - const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); - if (is_new) { - const VkSubpassDescription subpass{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast<u32>(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast<u32>(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast<u32>(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98f..189ee5a68e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array<PixelFormat, NUM_RT> color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash<Vulkan::RenderPassKey> { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast<size_t>(key.depth_format) << 48; - value ^= static_cast<size_t>(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9a..f0e5b098c0 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &demote; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3-70-g09d2 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 <fsahmkow27@gmail.com> Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- src/shader_recompiler/CMakeLists.txt | 4 + src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_context_get_set.cpp | 10 +- .../backend/spirv/emit_spirv_control_flow.cpp | 4 + src/shader_recompiler/environment.h | 2 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../frontend/maxwell/control_flow.cpp | 58 +++++++++-- .../frontend/maxwell/control_flow.h | 7 +- .../maxwell/indirect_branch_table_track.cpp | 108 +++++++++++++++++++++ .../frontend/maxwell/indirect_branch_table_track.h | 28 ++++++ .../frontend/maxwell/instruction.h | 1 + .../frontend/maxwell/structured_control_flow.cpp | 57 +++++++++++ .../maxwell/translate/impl/branch_indirect.cpp | 36 +++++++ .../maxwell/translate/impl/load_constant.cpp | 29 +----- .../maxwell/translate/impl/load_constant.h | 39 ++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 -- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 21 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++- 21 files changed, 437 insertions(+), 48 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 003cbefb14..44ab929b79 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/control_flow.h frontend/maxwell/decode.cpp frontend/maxwell/decode.h + frontend/maxwell/indirect_branch_table_track.cpp + frontend/maxwell/indirect_branch_table_track.h frontend/maxwell/instruction.h frontend/maxwell/location.h frontend/maxwell/maxwell.inc @@ -63,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp + frontend/maxwell/translate/impl/branch_indirect.cpp frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h @@ -110,6 +113,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp frontend/maxwell/translate/impl/load_constant.cpp + frontend/maxwell/translate/impl/load_constant.h frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_local_shared.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 204c5f9e0e..02648d769c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -26,6 +26,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); @@ -35,6 +36,8 @@ void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 52dcef8a42..4a267b16c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,8 +6,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { struct AttrInfo { @@ -74,6 +72,14 @@ void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6b81f01694..335603f881 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitUnreachable(EmitContext& ctx) { + ctx.OpUnreachable(); +} + void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { ctx.OpDemoteToHelperInvocationEXT(); ctx.OpBranch(continue_label); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 9415d02f66..1c50ae51e2 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -15,6 +15,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9b898e4e1d..5524724878 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -87,6 +87,10 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + void IREmitter::DemoteToHelperInvocation(Block* continue_label) { block->SetBranch(continue_label); continue_label->AddImmediatePredecessor(block); @@ -126,6 +130,14 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) { Inst(Opcode::SetGotoVariable, id, value); } +U32 IREmitter::GetIndirectBranchVariable() { + return Inst<U32>(Opcode::GetIndirectBranchVariable); +} + +void IREmitter::SetIndirectBranchVariable(const U32& value) { + Inst(Opcode::SetIndirectBranchVariable, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 269f367a45..17bc32fc83 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -37,6 +37,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void Unreachable(); void DemoteToHelperInvocation(Block* continue_label); void Prologue(); @@ -51,6 +52,9 @@ public: [[nodiscard]] U1 GetGotoVariable(u32 id); void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetIndirectBranchVariable(); + void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 52a5e50349..c3ba6b5222 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,6 +55,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9b050995bd..fb79e3d8dc 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) // Special operations @@ -26,6 +27,8 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetIndirectBranchVariable, U32, ) +OPCODE(SetIndirectBranchVariable, Void, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 4f6707fae4..1e9b8e4260 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -14,6 +14,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { @@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Opcode opcode{Decode(inst.raw)}; switch (opcode) { case Opcode::BRA: - case Opcode::BRX: case Opcode::JMP: - case Opcode::JMX: case Opcode::RET: if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; @@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::JMP: AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); break; - case Opcode::BRX: - case Opcode::JMX: - AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); - break; case Opcode::RET: block->end_class = EndClass::Return; break; @@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::SSY: block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); case Opcode::PRET: @@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { - throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, block->begin); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector<u32> targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += brx_table->branch_offset; + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back(branch); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; } CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, @@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function // Block already exists and it has been visited return &*it; } - // TODO: FIX DANGLING BLOCKS Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, @@ -494,6 +529,11 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::IndirectBranch: + for (Block* const branch : block.indirect_branches) { + add_branch(branch, false); + } + break; case EndClass::Call: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 22f1341944..1e05fcb97c 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -26,6 +26,7 @@ using FunctionId = size_t; enum class EndClass { Branch, + IndirectBranch, Call, Exit, Return, @@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook< union { Block* branch_true; FunctionId function_call; + IR::Reg branch_reg; }; union { Block* branch_false; Block* return_block; + s32 branch_offset; }; + std::vector<Block*> indirect_branches; }; struct Label { @@ -139,7 +143,8 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 0000000000..96453509d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template <typename Callable> +std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast<u32>(ldc.index)}; + const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index{cbuf_index}, + .cbuf_offset{cbuf_offset}, + .num_entries{imnmx_immediate + 1}, + .branch_offset{brx_offset}, + .branch_reg{brx_reg}, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 0000000000..eee5102fa6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h index 57fd531f2b..743d68d615 100644 --- a/src/shader_recompiler/frontend/maxwell/instruction.h +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 9d46883902..a6e55f61ed 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" @@ -46,12 +47,15 @@ enum class StatementType { Break, Return, Kill, + Unreachable, Function, Identity, Not, Or, SetVariable, + SetIndirectBranchVariable, Variable, + IndirectBranchCond, }; bool HasChildren(StatementType type) { @@ -72,12 +76,15 @@ struct Loop {}; struct Break {}; struct Return {}; struct Kill {}; +struct Unreachable {}; struct FunctionTag {}; struct Identity {}; struct Not {}; struct Or {}; struct SetVariable {}; +struct SetIndirectBranchVariable {}; struct Variable {}; +struct IndirectBranchCond {}; #ifdef _MSC_VER #pragma warning(push) @@ -96,6 +103,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(Kill) : type{StatementType::Kill} {} + Statement(Unreachable) : type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -103,7 +111,12 @@ struct Statement : ListBaseHook { : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_) + : location{location_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -118,11 +131,14 @@ struct Statement : ListBaseHook { IR::Condition guest_cond; Statement* op; Statement* op_a; + u32 location; + s32 branch_offset; }; union { Statement* cond; Statement* op_b; u32 id; + IR::Reg branch_reg; }; Statement* up{}; StatementType type; @@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) { return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); case StatementType::Variable: return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); default: return "<invalid type>"; } @@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Kill: ret += fmt::format("{} kill;\n", indent); break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; case StatementType::Function: case StatementType::Identity: case StatementType::Not: case StatementType::Or: case StatementType::Variable: + case StatementType::IndirectBranchCond: throw LogicError("Statement can't be printed"); } } @@ -417,6 +443,17 @@ private: } break; } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset)); + for (Flow::Block* const branch : block.indirect_branches) { + const Node indirect_label{local_labels.at(branch)}; + Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{})); + break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; const Node call_return_label{local_labels.at(block.return_block)}; @@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) { return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); default: throw NotImplementedException("Statement type {}", stmt.type); } @@ -670,6 +709,15 @@ private: ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } + case StatementType::SetIndirectBranchVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } case StatementType::If: { if (!current_block) { current_block = block_pool.Create(inst_pool); @@ -756,6 +804,15 @@ private: current_block = demote_block; break; } + case StatementType::Unreachable: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Unreachable(); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 0000000000..371c0e0f74 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 39becf93c2..49ccb7d62a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -5,25 +5,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" namespace Shader::Maxwell { +using namespace LDC; namespace { -enum class Mode : u64 { - Default, - IL, - IS, - ISL, -}; - -enum class Size : u64 { - U8, - S8, - U16, - S16, - B32, - B64, -}; - std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, const IR::U32& reg, const IR::U32& imm) { switch (mode) { @@ -37,16 +23,7 @@ std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im } // Anonymous namespace void TranslatorVisitor::LDC(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_reg; - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - BitField<44, 2, Mode> mode; - BitField<48, 3, Size> size; - } const ldc{insn}; - + const Encoding ldc{insn}; const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; const IR::U32 reg{X(ldc.src_reg)}; const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 0000000000..3074ea0e3d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b62d8ee2aa..a0057a4739 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) { ThrowNotImplemented(Opcode::BRK); } -void TranslatorVisitor::BRX(u64) { - ThrowNotImplemented(Opcode::BRX); -} - void TranslatorVisitor::CAL() { // CAL is a no-op } @@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) { ThrowNotImplemented(Opcode::JMP); } -void TranslatorVisitor::JMX(u64) { - ThrowNotImplemented(Opcode::JMX); -} - void TranslatorVisitor::KIL() { // KIL is a no-op } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bab7ca1868..2592337461 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,8 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; +struct IndirectBranchVariable { + auto operator<=>(const IndirectBranchVariable&) const noexcept = default; +}; + using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag, - OverflowFlagTag, GotoVariable>; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; struct DefTable { @@ -65,6 +69,10 @@ struct DefTable { return goto_vars[goto_variable.index]; } + [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { + return indirect_branch_var; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -84,6 +92,7 @@ struct DefTable { std::array<ValueMap, IR::NUM_USER_REGS> regs; std::array<ValueMap, IR::NUM_USER_PREDS> preds; boost::container::flat_map<u32, ValueMap> goto_vars; + ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -102,6 +111,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { + return IR::Opcode::UndefU32; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -219,6 +232,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetIndirectBranchVariable: + pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -244,6 +260,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetIndirectBranchVariable: + inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8b2816c131..6cde014912 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { } u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast<u64>(index) << 32) | static_cast<u64>(offset); + return (static_cast<u64>(index) << 32) | offset; } class GenericEnvironment : public Shader::Environment { @@ -114,11 +114,13 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast<u64>(texture_types.size())}; + const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) @@ -130,6 +132,10 @@ public: file.write(reinterpret_cast<const char*>(&key), sizeof(key)) .write(reinterpret_cast<const char*>(&type), sizeof(type)); } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array<u32, 3> workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -212,6 +218,7 @@ protected: std::vector<u64> code; std::unordered_map<u64, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; u32 read_lowest = std::numeric_limits<u32>::max(); u32 read_highest = 0; @@ -267,6 +274,17 @@ public: ~GraphicsEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{maxwell3d->regs}; const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; @@ -312,6 +330,18 @@ public: ~ComputeEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; @@ -386,8 +416,10 @@ public: void Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_cbuf_values{}; file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) @@ -403,6 +435,13 @@ public: .read(reinterpret_cast<char*>(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); @@ -418,6 +457,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == texture_types.end()) { @@ -445,6 +492,7 @@ public: private: std::unique_ptr<u64[]> code; std::unordered_map<u64, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; std::array<u32, 3> workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{}; -- cgit v1.2.3-70-g09d2 From b0d5572abfe1f14e02d8219f0a4d7dd09ff36fd1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Mon, 29 Mar 2021 22:13:37 -0300 Subject: shader: Fix indirect branches to scheduler instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 ++++++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 9 ++++++++- .../frontend/maxwell/structured_control_flow.cpp | 6 +++--- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1e9b8e4260..784f9df8a5 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -434,7 +434,10 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, block->indirect_branches.reserve(targets.size()); for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; - block->indirect_branches.push_back(branch); + block->indirect_branches.push_back({ + .block{branch}, + .address{target}, + }); } block->cond = IR::Condition{true}; block->end = pc + 1; @@ -530,8 +533,8 @@ std::string CFG::Dot() const { } break; case EndClass::IndirectBranch: - for (Block* const branch : block.indirect_branches) { - add_branch(branch, false); + for (const IndirectBranch& branch : block.indirect_branches) { + add_branch(branch.block, false); } break; case EndClass::Call: diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 1e05fcb97c..a8c90d27ad 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -22,6 +22,8 @@ namespace Shader::Maxwell::Flow { +struct Block; + using FunctionId = size_t; enum class EndClass { @@ -60,6 +62,11 @@ private: boost::container::small_vector<StackEntry, 3> entries; }; +struct IndirectBranch { + Block* block; + u32 address; +}; + struct Block : boost::intrusive::set_base_hook< // Normal link is ~2.5% faster compared to safe link boost::intrusive::link_mode<boost::intrusive::normal_link>> { @@ -84,7 +91,7 @@ struct Block : boost::intrusive::set_base_hook< Block* return_block; s32 branch_offset; }; - std::vector<Block*> indirect_branches; + std::vector<IndirectBranch> indirect_branches; }; struct Label { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index a6e55f61ed..c804c2a8e9 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -446,9 +446,9 @@ private: case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, block.branch_offset)); - for (Flow::Block* const branch : block.indirect_branches) { - const Node indirect_label{local_labels.at(branch)}; - Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + for (const Flow::IndirectBranch& indirect : block.indirect_branches) { + const Node indirect_label{local_labels.at(indirect.block)}; + Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } -- cgit v1.2.3-70-g09d2 From b4a5e767d0a60d44c77460bd3a4062c5f69fb6c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Fri, 2 Apr 2021 01:17:47 -0300 Subject: shader: Fix branches to visited virtual blocks --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 ++ src/shader_recompiler/frontend/maxwell/control_flow.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 9dc769307d..b8e3b8527f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -56,6 +56,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.Label(); } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { return arg.Attribute(); + } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { + return arg.Reg(); } } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 784f9df8a5..ac8707847f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -486,6 +486,16 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { // Block already exists and it has been visited + if (function.blocks.begin() != it) { + // Check if the previous node is the virtual variant of the label + // This won't exist if a virtual node is not needed or it hasn't been visited + // If it hasn't been visited and a virtual node is needed, this will still behave as + // expected because the node impersonated with its virtual node. + const auto prev{std::prev(it)}; + if (it->begin.Virtual() == prev->begin) { + return &*prev; + } + } return &*it; } Block* const new_block{block_pool.Create(Block{ -- cgit v1.2.3-70-g09d2 From 9e6fe430bdc615ae5f7cc4fbc32d7e2baccd7ceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 4 Apr 2021 20:00:34 -0300 Subject: shader: Fix splits on blocks using indirect branches --- .../frontend/maxwell/control_flow.cpp | 35 ++++++++++++++++++---- .../frontend/maxwell/control_flow.h | 18 +++++------ src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- 3 files changed, 38 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index ac8707847f..eb0f7c8d19 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -45,19 +45,29 @@ void Split(Block* old_block, Block* new_block, Location pc) { .begin{pc}, .end{old_block->end}, .end_class{old_block->end_class}, - .stack{old_block->stack}, .cond{old_block->cond}, + .stack{old_block->stack}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, + .function_call{old_block->function_call}, + .return_block{old_block->return_block}, + .branch_reg{old_block->branch_reg}, + .branch_offset{old_block->branch_offset}, + .indirect_branches{std::move(old_block->indirect_branches)}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, - .stack{std::move(old_block->stack)}, .cond{true}, + .stack{std::move(old_block->stack)}, .branch_true{new_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; } @@ -173,10 +183,15 @@ Function::Function(ObjectPool<Block>& block_pool, Location start_address) .begin{start_address}, .end{start_address}, .end_class{EndClass::Branch}, - .stack{}, .cond{true}, + .stack{}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}, .stack{}, }} {} @@ -351,10 +366,15 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .begin{block->begin.Virtual()}, .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .stack{block->stack}, .cond{cond}, + .stack{block->stack}, .branch_true{conditional_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -502,10 +522,15 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .begin{pc}, .end{pc}, .end_class{EndClass::Branch}, - .stack{stack}, .cond{true}, + .stack{stack}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}; function.labels.push_back(Label{ .address{pc}, diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index a8c90d27ad..466b141983 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -79,18 +79,14 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; EndClass end_class; - Stack stack; IR::Condition cond; - union { - Block* branch_true; - FunctionId function_call; - IR::Reg branch_reg; - }; - union { - Block* branch_false; - Block* return_block; - s32 branch_offset; - }; + Stack stack; + Block* branch_true; + Block* branch_false; + FunctionId function_call; + Block* return_block; + IR::Reg branch_reg; + s32 branch_offset; std::vector<IndirectBranch> indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 05b7591bcb..58caa35a13 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -21,7 +21,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const auto begin{std::next(program.blocks.begin())}; + const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); -- cgit v1.2.3-70-g09d2 From 20ba0ea0a94fa915cad6392b3742d8e58e2fa0d9 Mon Sep 17 00:00:00 2001 From: FernandoS27 <fsahmkow27@gmail.com> Date: Tue, 6 Apr 2021 02:01:01 +0200 Subject: shader: Fix BRX tracking --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 6 +++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index eb0f7c8d19..1a4ee4f6cb 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -197,7 +197,7 @@ Function::Function(ObjectPool<Block>& block_pool, Location start_address) }} {} CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_} { + : env{env_}, block_pool{block_pool_}, program_start{start_address} { functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -427,9 +427,9 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, FunctionId function_id) { - const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; if (!brx_table) { - TrackIndirectBranchTable(env, pc, block->begin); + TrackIndirectBranchTable(env, pc, program_start); throw NotImplementedException("Failed to track indirect branch"); } const IR::FlowTest flow_test{inst.branch.flow_test}; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 466b141983..9f570fbb50 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -157,6 +157,7 @@ private: ObjectPool<Block>& block_pool; boost::container::small_vector<Function, 1> functions; FunctionId current_function_id{0}; + Location program_start; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3-70-g09d2 From 233e39bb7b9ca7660c7a63a386e285aa5524bd20 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 7 Apr 2021 16:48:39 -0300 Subject: shader: Fix dangling labels --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1a4ee4f6cb..847bb19864 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -518,6 +518,11 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } return &*it; } + // Make sure we don't insert the same layer twice + const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; + if (label_it != function.labels.end()) { + return label_it->block; + } Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, -- cgit v1.2.3-70-g09d2 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp <reinuseslisp@airmail.cc> --- src/common/thread_worker.h | 1 + src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +-- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_warp.cpp | 2 +- src/shader_recompiler/file_environment.h | 2 +- src/shader_recompiler/frontend/ir/attribute.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +-- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - .../ir_opt/collect_shader_info_pass.cpp | 20 +-- .../ir_opt/constant_propagation_pass.cpp | 49 ++++---- .../global_memory_to_storage_buffer_pass.cpp | 42 +++---- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 ++--- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 +- src/tests/common/unique_function.cpp | 2 + src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 - .../renderer_vulkan/vk_texture_cache.cpp | 2 +- 66 files changed, 308 insertions(+), 313 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 0a975a869d..cd0017726f 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <condition_variable> #include <functional> #include <mutex> #include <stop_token> diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 22639fe132..551bf1c582 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -196,6 +196,8 @@ else() $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable + + $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b738e00cc2..0c114402b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <array> +#include <climits> #include <string_view> #include <fmt/format.h> @@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie const std::string_view def_name_view( def_name.data(), fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + defs[static_cast<size_t>(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 32512a0e5f..355cf0ca8a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -16,7 +16,7 @@ namespace Shader::Backend::SPIRV { namespace { template <class Func> -struct FuncTraits : FuncTraits<Func> {}; +struct FuncTraits {}; template <class ReturnType_, class... Args> struct FuncTraits<ReturnType_ (*)(Args...)> { @@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { template <auto func, bool is_first_arg_inst, size_t... I> void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { using Traits = FuncTraits<decltype(func)>; - if constexpr (std::is_same_v<Traits::ReturnType, Id>) { + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { if constexpr (is_first_arg_inst) { - SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + SetDefinition<func>( + ctx, inst, inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); } } } @@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ return Invoke<&Emit##name>(ctx, inst); #include "shader_recompiler/frontend/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->Opcode()); + throw LogicError("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f0f8db8c37..815ca62992 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -43,11 +43,13 @@ public: // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); return; } - const IR::Opcode opcode{values[0]->Opcode()}; - if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + auto read{[&](unsigned int a, unsigned int b) { + return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); + }}; const Id offsets{ ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), @@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#ifdef _WIN32 #pragma optimize("", off) +#endif Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c57bd291db..12a03ed6ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; + [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 17640a6229..71601f8fd6 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -7,7 +7,7 @@ namespace Shader { -class FileEnvironment final : public Environment { +class FileEnvironment : public Environment { public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea0..7993e5c436 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast<int>(attribute) - static_cast<int>(Attribute::Generic0X)) / 4; + return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6e..e1f0191f40 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2bc..fc18ea2a2f 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf5..aa8597c608 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast<IR::FlowTest>(flow_test); } - [[nodiscard]] std::pair<IR::Pred, bool> Pred() const noexcept { + [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept { return {static_cast<IR::Pred>(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c4..a2104bdb31 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94b..ceb44e6042 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique<AssociatedInsts>(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique<AssociatedInsts>(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674e..97dc91d855 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9c..002dbf94e9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f3..89a17fb1b4 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <map> #include <string> diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487f..1e7ffb86d5 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e704..a0962863d8 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue<Type::U1>; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb19864..cb8ec7eaa3 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool<Block>& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast<unsigned int>(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e5..932d19c1d4 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d5..008625cb37 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e9..02cef26455 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea7..5a1b3a8fcb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff73218629..7238414962 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95c..4a49299a0b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c849..b8c89810cb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209e..80109ca0e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e1..b9f4ee0d9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde03..035f8782a7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1e..cf3cf1ba69 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c7..fa2a7807b7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032b..8ae4375287 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae8..06226b7ce2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db8438..5f93a15130 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a216..7550a8d4c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047d..f2738a93b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6ae..fd79867016 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef68..3f548ce761 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c8..cca5b831fd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92d..b3931dae32 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast<u32>(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast<u32>(hsetp2.low << 6) | + static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hsetp2.high << 22) | + static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4d..88bbac0a50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast<u32>(imm.value) << 12}; return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e18151..8ffd84867d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2d..5a0fc36a03 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea325..2300088e38 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea6..e24b497210 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair<int, bool> GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d78..36c5cff2f1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9c..95d4165863 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f42..fe2c7db85d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad9..2ba9c1018a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23b..0863bdfcd4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bcab..0459e5473e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fccf..ec5e74f6d8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82af..edbfcd3082 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a092..61fbbe04cb 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template <typename Func, size_t... I> IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { using Traits = LambdaTraits<decltype(func)>; - return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)}; + return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bddd..90a65dd167 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f21..38af72dfea 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed61..52576b07fc 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d14..346fcc3774 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce41791..c8aee3d3d5 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector<TextureInst, 24>; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags<IR::TextureInstInfo>()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cca..dbec96d84a 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738a..aa6e865934 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c1940..3166a69dc1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4aa..57e2d569c2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast<u32>(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92f..fcebb8f6e2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast<u64>(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab9..991afe521e 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <unordered_map> #include <boost/container/static_vector.hpp> diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5f..70328680dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { std::vector<VkBufferCopy> result(copies.size()); std::ranges::transform( -- cgit v1.2.3-70-g09d2 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/shader_recompiler/CMakeLists.txt | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ---- src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 1 - src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 4 ++-- src/shader_recompiler/frontend/maxwell/control_flow.h | 16 ++++++++-------- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.cpp | 5 +++-- .../frontend/maxwell/translate/impl/not_implemented.cpp | 1 - .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 ++- src/shader_recompiler/object_pool.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 4 ++-- 12 files changed, 22 insertions(+), 24 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 551bf1c582..6b5df23e29 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -197,6 +197,8 @@ else() $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable + # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. + # And this in turns limits the size of a std::array. $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 815ca62992..6a89c0f795 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -304,10 +304,6 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } -#ifdef _WIN32 -#pragma optimize("", off) -#endif - Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags<IR::TextureInstInfo>()}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 12a03ed6ed..f6196653a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,6 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 002dbf94e9..7d3e0b2ab5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type = type_token, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index cb8ec7eaa3..9811183f12 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -44,7 +44,7 @@ void Split(Block* old_block, Block* new_block, Location pc) { *new_block = Block{}; new_block->begin = pc; new_block->end = old_block->end; - new_block->end_class = old_block->end_class, + new_block->end_class = old_block->end_class; new_block->cond = old_block->cond; new_block->stack = old_block->stack; new_block->branch_true = old_block->branch_true; @@ -428,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += static_cast<unsigned int>(brx_table->branch_offset); + target += static_cast<u32>(brx_table->branch_offset); target += 8; targets.push_back(target); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 9f570fbb50..89966b16aa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -78,15 +78,15 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; - EndClass end_class; - IR::Condition cond; + EndClass end_class{}; + IR::Condition cond{}; Stack stack; - Block* branch_true; - Block* branch_false; - FunctionId function_call; - Block* return_block; - IR::Reg branch_reg; - s32 branch_offset; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; std::vector<IndirectBranch> indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 932d19c1d4..972f677dc9 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode = Opcode::name, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index d30e82b10e..10bb01d99d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,8 +72,9 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, - FPCompareOp compare_op, IR::FpControl control) { +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { case FPCompareOp::F: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba0cfa673b..c239010527 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,7 +65,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 90a65dd167..afe871505e 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -164,7 +164,8 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst.GetOpcode()); } } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 4242816344..f8b255b66c 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template <typename... Args> - requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward<Args>(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 104e6cc850..8efdc29260 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,9 +4,9 @@ #pragma once -#include <mutex> -#include <condition_variable> #include <atomic> +#include <condition_variable> +#include <mutex> #include "common/common_types.h" #include "common/thread_worker.h" -- cgit v1.2.3-70-g09d2 From da936d6ad8cef5418b7644754ee4bcbf7f6125f8 Mon Sep 17 00:00:00 2001 From: FernandoS27 <fsahmkow27@gmail.com> Date: Sun, 18 Apr 2021 19:10:55 +0200 Subject: shader: Implement delegation of Exit to dispatcher on CFG --- .../frontend/maxwell/control_flow.cpp | 41 ++++++++++++++++++++-- .../frontend/maxwell/control_flow.h | 9 ++++- 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 9811183f12..298faa03e0 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -185,8 +185,20 @@ Function::Function(ObjectPool<Block>& block_pool, Location start_address) label.block->branch_false = nullptr; } -CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_}, program_start{start_address} { +CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address, + bool exits_to_dispatcher_) + : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ + exits_to_dispatcher_} { + if (exits_to_dispatcher) { + dispatch_block = block_pool.Create(Block{}); + dispatch_block->begin = {}; + dispatch_block->end = {}; + dispatch_block->end_class = EndClass::Exit; + dispatch_block->cond = IR::Condition(true); + dispatch_block->stack = {}; + dispatch_block->branch_true = nullptr; + dispatch_block->branch_false = nullptr; + } functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -196,6 +208,12 @@ CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_addre AnalyzeLabel(function_id, label); } } + if (exits_to_dispatcher) { + const auto it = functions[0].blocks.rbegin(); + dispatch_block->begin = it->end + 1; + dispatch_block->end = it->end + 1; + functions[0].blocks.insert(*dispatch_block); + } } void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { @@ -462,11 +480,22 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati // EXIT will never be taken return AnalysisState::Continue; } + if (exits_to_dispatcher && function_id != 0) { + throw NotImplementedException("Dispatch EXIT on external function."); + } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; + if (exits_to_dispatcher) { + block->end = pc; + block->branch_true = dispatch_block; + block->end_class = EndClass::Branch; + block->cond = cond; + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + return AnalysisState::Branch; + } AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } @@ -477,6 +506,14 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } + if (exits_to_dispatcher) { + block->cond = IR::Condition{true}; + block->end = pc; + block->end_class = EndClass::Branch; + block->branch_true = dispatch_block; + block->branch_false = nullptr; + return AnalysisState::Branch; + } block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 89966b16aa..0e515c3b6f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -111,7 +111,8 @@ class CFG { }; public: - explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address); + explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address, + bool exits_to_dispatcher = false); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -128,6 +129,10 @@ public: return std::span(functions.data(), functions.size()); } + [[nodiscard]] bool ExitsToDispatcher() const { + return exits_to_dispatcher; + } + private: void AnalyzeLabel(FunctionId function_id, Label& label); @@ -158,6 +163,8 @@ private: boost::container::small_vector<Function, 1> functions; FunctionId current_function_id{0}; Location program_start; + bool exits_to_dispatcher{}; + Block* dispatch_block{}; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3-70-g09d2 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 <fsahmkow27@gmail.com> Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- .../frontend/maxwell/control_flow.cpp | 10 +++--- src/shader_recompiler/frontend/maxwell/program.cpp | 37 +++++++++++----------- src/shader_recompiler/frontend/maxwell/program.h | 1 + src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 20 ++++++------ .../renderer_vulkan/vk_pipeline_cache.cpp | 18 +++++------ 5 files changed, 42 insertions(+), 44 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 298faa03e0..e7abea82f4 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -209,9 +209,9 @@ CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_addre } } if (exits_to_dispatcher) { - const auto it = functions[0].blocks.rbegin(); - dispatch_block->begin = it->end + 1; - dispatch_block->end = it->end + 1; + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; functions[0].blocks.insert(*dispatch_block); } } @@ -481,7 +481,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } if (exits_to_dispatcher && function_id != 0) { - throw NotImplementedException("Dispatch EXIT on external function."); + throw NotImplementedException("Dispatch EXIT on external function"); } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { @@ -490,9 +490,9 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; if (exits_to_dispatcher) { block->end = pc; - block->branch_true = dispatch_block; block->end_class = EndClass::Branch; block->cond = cond; + block->branch_true = dispatch_block; block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); return AnalysisState::Branch; } diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 59897cb3e1..a4fa4319da 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -151,31 +151,30 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env2) { - IR::Program program{}; + Environment& env_vertex_b) { + IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - program.blocks.swap(vertex_a.blocks); + std::swap(result.blocks, vertex_a.blocks); for (IR::Block* block : vertex_b.blocks) { - program.blocks.push_back(block); + result.blocks.push_back(block); } - program.stage = Stage::VertexB; - program.info = vertex_a.info; - program.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; index++) { - program.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - program.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } - Optimization::JoinTextureInfo(program.info, vertex_b.info); - Optimization::JoinStorageInfo(program.info, vertex_b.info); - Optimization::DualVertexJoinPass(program); - program.post_order_blocks = PostOrder(program.blocks); - Optimization::DeadCodeEliminationPass(program); - Optimization::IdentityRemovalPass(program); - Optimization::VerificationPass(program); - Optimization::CollectShaderInfoPass(env2, program); - return program; + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DualVertexJoinPass(result); + result.post_order_blocks = PostOrder(result.blocks); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 6e5d5ddd0a..f7f5930e4d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -23,4 +23,5 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); + } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index f35c6478ac..f2d7db0e6b 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -4,8 +4,6 @@ #include <algorithm> #include <ranges> -#include <tuple> -#include <type_traits> #include "common/bit_cast.h" #include "common/bit_util.h" @@ -40,7 +38,7 @@ void VertexATransformPass(IR::Program& program) { } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Prologue) { return inst.Invalidate(); @@ -51,24 +49,24 @@ void VertexBTransformPass(IR::Program& program) { void DualVertexJoinPass(IR::Program& program) { const auto& blocks = program.blocks; - s64 s = static_cast<s64>(blocks.size()) - 1; - if (s < 1) { - throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + const s64 sub_size = static_cast<s64>(blocks.size()) - 1; + if (sub_size < 1) { + throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); } - for (s64 index = 0; index < s; index++) { - IR::Block* const current_block = blocks[index]; - IR::Block* const next_block = blocks[index + 1]; + for (s64 index = 0; index < sub_size; ++index) { + IR::Block* const current_block{blocks[index]}; + IR::Block* const next_block{blocks[index + 1]}; for (IR::Inst& inst : current_block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Join) { IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; ir.Branch(next_block); inst.Invalidate(); - // only 1 join should exist + // Only 1 join should exist return; } } } - throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); + throw LogicError("Dual Vertex Join pass failed, no join present"); } } // namespace Shader::Optimization diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 638475251e..634bbb4501 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -288,32 +288,32 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; - bool uses_vertex_a{}; - std::size_t start_value_processing{}; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } - uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { + // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - continue; + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - start_value_processing = 1; } std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; u32 binding{0}; - for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3-70-g09d2 From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: shader: Avoid usage of C++20 ranges to build in clang --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 7 +++++-- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 9 +++++++-- .../frontend/maxwell/control_flow.cpp | 13 ++++++------- .../frontend/maxwell/structured_control_flow.cpp | 8 ++++---- .../frontend/maxwell/translate_program.cpp | 20 +++++++++++++------- .../ir_opt/constant_propagation_pass.cpp | 5 +++-- .../ir_opt/dead_code_elimination_pass.cpp | 2 -- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 6 ------ .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 1 - .../ir_opt/lower_int64_to_int32.cpp | 5 +++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 10 ++++++---- 11 files changed, 47 insertions(+), 39 deletions(-) (limited to 'src/shader_recompiler/frontend/maxwell/control_flow.cpp') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 64787b3537..a5e8c9b6e0 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> +#include <algorithm> #include <string> #include <tuple> @@ -196,7 +196,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index c5e819a0a2..8a430d5739 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> +#include <algorithm> #include <string> +#include <tuple> +#include <type_traits> #include "common/div_ceil.h" #include "common/settings.h" @@ -120,7 +122,10 @@ void PrecolorInst(IR::Inst& phi) { void Precolor(const IR::Program& program) { for (IR::Block* const block : program.blocks) { - for (IR::Inst& phi : block->Instructions() | std::views::take_while(IR::IsPhi)) { + for (IR::Inst& phi : block->Instructions()) { + if (!IR::IsPhi(phi)) { + break; + } PrecolorInst(phi); } } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e7abea82f4..1a954a509f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <array> #include <optional> -#include <ranges> #include <string> #include <utility> @@ -151,18 +150,18 @@ std::pair<Location, Stack> Stack::Pop(Token token) const { } std::optional<Location> Stack::Peek(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - if (it == reverse_entries.end()) { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { return std::nullopt; } return it->target; } Stack Stack::Remove(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - const auto pos{std::distance(reverse_entries.begin(), it)}; + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; Stack result; result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); return result; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 221454b995..8b3e0a15c7 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <memory> -#include <ranges> #include <string> #include <unordered_map> #include <utility> @@ -167,7 +166,7 @@ std::string DumpExpr(const Statement* stmt) { } } -std::string DumpTree(const Tree& tree, u32 indentation = 0) { +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { std::string ret; std::string indent(indentation, ' '); for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { @@ -315,8 +314,9 @@ class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 83c77967d3..c067d459cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -4,7 +4,6 @@ #include <algorithm> #include <memory> -#include <ranges> #include <vector> #include "common/settings.h" @@ -20,12 +19,19 @@ namespace Shader::Maxwell { namespace { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } return blocks; } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 3c72203add..8dd6d6c2c8 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <algorithm> -#include <ranges> #include <tuple> #include <type_traits> @@ -599,7 +598,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void ConstantPropagationPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { ConstantPropagation(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 1e4a3fdaed..4008363016 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> - #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index 3d2c205c2b..055ba9c54d 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -2,12 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <algorithm> -#include <ranges> - -#include "common/bit_cast.h" -#include "common/bit_util.h" -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/ir_opt/passes.h" diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f9de17b250..4197b0095d 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <compare> #include <optional> -#include <ranges> #include <queue> #include <boost/container/flat_set.hpp> diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index abf7c87c79..e80d3d1d94 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <ranges> #include <utility> #include "shader_recompiler/exception.h" @@ -207,7 +206,9 @@ void Lower(IR::Block& block, IR::Inst& inst) { } // Anonymous namespace void LowerInt64ToInt32(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + const auto end{program.post_order_blocks.rend()}; + for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { + IR::Block* const block{*it}; for (IR::Inst& inst : block->Instructions()) { Lower(*block, inst); } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index dcaced83f3..53145fb5e3 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -14,7 +14,6 @@ // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 // -#include <ranges> #include <span> #include <variant> #include <vector> @@ -243,7 +242,9 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& [variant, phi] : it->second) { + for (auto& pair : it->second) { + auto& variant{pair.first}; + auto& phi{pair.second}; std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -373,8 +374,9 @@ void VisitBlock(Pass& pass, IR::Block* block) { void SsaRewritePass(IR::Program& program) { Pass pass; - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { - VisitBlock(pass, block); + const auto end{program.post_order_blocks.rend()}; + for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { + VisitBlock(pass, *block); } } -- cgit v1.2.3-70-g09d2