From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: shader: Initial recompiler work --- src/shader_recompiler/frontend/ir/basic_block.cpp | 142 ++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 src/shader_recompiler/frontend/ir/basic_block.cpp (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 0000000000..0406726ad9 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <initializer_list> +#include <map> +#include <memory> + +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} + +Block::~Block() = default; + +void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { + PrependNewInst(end(), op, args); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list<Value> args) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + const auto result_it{instructions.insert(insertion_point, *inst)}; + + if (inst->NumArgs() != args.size()) { + throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + } + std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { + inst->SetArg(index, arg); + ++index; + }); + return result_it; +} + +u32 Block::LocationBegin() const noexcept { + return location_begin; +} + +u32 Block::LocationEnd() const noexcept { + return location_end; +} + +Block::InstructionList& Block::Instructions() noexcept { + return instructions; +} + +const Block::InstructionList& Block::Instructions() const noexcept { + return instructions; +} + +static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, + const std::map<const Inst*, size_t>& inst_to_index, + const Value& arg) { + if (arg.IsEmpty()) { + return "<null>"; + } + if (arg.IsLabel()) { + if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(arg.Label())); + } + if (!arg.IsImmediate()) { + if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { + return fmt::format("%{}", it->second); + } + return fmt::format("%<unknown inst {:016x}>", reinterpret_cast<u64>(arg.Inst())); + } + switch (arg.Type()) { + case Type::U1: + return fmt::format("#{}", arg.U1() ? '1' : '0'); + case Type::U8: + return fmt::format("#{}", arg.U8()); + case Type::U16: + return fmt::format("#{}", arg.U16()); + case Type::U32: + return fmt::format("#{}", arg.U32()); + case Type::U64: + return fmt::format("#{}", arg.U64()); + case Type::Reg: + return fmt::format("{}", arg.Reg()); + case Type::Pred: + return fmt::format("{}", arg.Pred()); + case Type::Attribute: + return fmt::format("{}", arg.Attribute()); + default: + return "<unknown immediate type>"; + } +} + +std::string DumpBlock(const Block& block) { + size_t inst_index{0}; + std::map<const Inst*, size_t> inst_to_index; + return DumpBlock(block, {}, inst_to_index, inst_index); +} + +std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index, + std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) { + std::string ret{"Block"}; + if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { + ret += fmt::format(" ${}", it->second); + } + ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); + + for (const Inst& inst : block) { + const Opcode op{inst.Opcode()}; + ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); + if (TypeOf(op) != Type::Void) { + ret += fmt::format("%{:<5} = {}", inst_index, op); + } else { + ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces + } + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } + } + if (TypeOf(op) != Type::Void) { + ret += fmt::format(" (uses: {})\n", inst.UseCount()); + } else { + ret += '\n'; + } + + inst_to_index.emplace(&inst, inst_index); + ++inst_index; + } + return ret; +} + +} // namespace Shader::IR -- cgit v1.2.3-70-g09d2 From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: shader: SSA and dominance --- src/shader_recompiler/CMakeLists.txt | 3 + src/shader_recompiler/frontend/ir/basic_block.cpp | 51 +++++-- src/shader_recompiler/frontend/ir/basic_block.h | 20 ++- src/shader_recompiler/frontend/ir/function.cpp | 5 + src/shader_recompiler/frontend/ir/function.h | 25 ++++ .../frontend/ir/microinstruction.cpp | 22 +++ .../frontend/ir/microinstruction.h | 10 ++ src/shader_recompiler/frontend/ir/opcode.inc | 8 ++ src/shader_recompiler/frontend/ir/pred.h | 7 + src/shader_recompiler/frontend/ir/reg.h | 9 +- src/shader_recompiler/frontend/ir/value.cpp | 37 +++++ src/shader_recompiler/frontend/ir/value.h | 3 + .../frontend/maxwell/control_flow.cpp | 130 ++++++++++++++++- .../frontend/maxwell/control_flow.h | 44 +++++- src/shader_recompiler/frontend/maxwell/program.cpp | 75 +++++----- src/shader_recompiler/frontend/maxwell/program.h | 11 +- .../frontend/maxwell/termination_code.cpp | 7 + .../frontend/maxwell/termination_code.h | 1 + .../frontend/maxwell/translate/impl/impl.h | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 6 +- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/passes.h | 9 ++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 155 +++++++++++++++++++++ src/shader_recompiler/main.cpp | 4 +- 24 files changed, 570 insertions(+), 77 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/function.cpp create mode 100644 src/shader_recompiler/frontend/ir/function.h create mode 100644 src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index c65846bc44..36a61f21ac 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -11,6 +11,8 @@ add_executable(shader_recompiler frontend/ir/condition.h frontend/ir/flow_test.cpp frontend/ir/flow_test.h + frontend/ir/function.cpp + frontend/ir/function.h frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp @@ -51,6 +53,7 @@ add_executable(shader_recompiler ir_opt/get_set_elimination_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h + ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp main.cpp ) diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 0406726ad9..e795618fcf 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -37,6 +37,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } +void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { + imm_predecessors.push_back(immediate_predecessor); +} + u32 Block::LocationBegin() const noexcept { return location_begin; } @@ -53,6 +57,18 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept { + return imm_predecessors; +} + +static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); +} + static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, const std::map<const Inst*, size_t>& inst_to_index, const Value& arg) { @@ -60,10 +76,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind return "<null>"; } if (arg.IsLabel()) { - if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { - return fmt::format("{{Block ${}}}", it->second); - } - return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(arg.Label())); + return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { @@ -115,16 +128,26 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + if (op == Opcode::Phi) { + size_t val_index{0}; + for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { + ret += val_index != 0 ? ", " : " "; + ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), + BlockToIndex(block_to_index, phi_block)); + ++val_index; + } + } else { + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3ed2eb9571..4b6b80c4b2 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -6,6 +6,8 @@ #include <initializer_list> #include <map> +#include <span> +#include <vector> #include <boost/intrusive/list.hpp> #include <boost/pool/pool_alloc.hpp> @@ -36,7 +38,11 @@ public: void AppendNewInst(Opcode op, std::initializer_list<Value> args); /// Prepends a new instruction to this basic block before the insertion point. - iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args); + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list<Value> args = {}); + + /// Adds a new immediate predecessor to the basic block. + void AddImmediatePredecessor(IR::Block* immediate_predecessor); /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; @@ -44,9 +50,12 @@ public: [[nodiscard]] u32 LocationEnd() const noexcept; /// Gets a mutable reference to the instruction list for this basic block. - InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept; /// Gets an immutable reference to the instruction list for this basic block. - const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept; + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept; [[nodiscard]] bool empty() const { return instructions.empty(); @@ -115,13 +124,16 @@ private: /// End location of this block u32 location_end; - /// List of instructions in this block. + /// List of instructions in this block InstructionList instructions; /// Memory pool for instruction list boost::fast_pool_allocator<Inst, boost::default_user_allocator_malloc_free, boost::details::pool::null_mutex> instruction_alloc_pool; + + /// Block immediate predecessors + std::vector<IR::Block*> imm_predecessors; }; [[nodiscard]] std::string DumpBlock(const Block& block); diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp new file mode 100644 index 0000000000..d1fc9461d0 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.cpp @@ -0,0 +1,5 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h new file mode 100644 index 0000000000..2d4dc5b981 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +struct Function { + struct InplaceDelete { + void operator()(IR::Block* block) const noexcept { + std::destroy_at(block); + } + }; + using UniqueBlock = std::unique_ptr<IR::Block, InplaceDelete>; + + std::vector<UniqueBlock> blocks; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 553fec3b7f..ecf76e23d8 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -30,6 +30,11 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::Branch: + case Opcode::BranchConditional: + case Opcode::Exit: + case Opcode::Return: + case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: @@ -113,6 +118,17 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } +std::span<const std::pair<Block*, Value>> Inst::PhiOperands() const noexcept { + return phi_operands; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_operands.emplace_back(predecessor, value); +} + void Inst::Invalidate() { ClearArgs(); op = Opcode::Void; @@ -125,6 +141,12 @@ void Inst::ClearArgs() { } value = {}; } + for (auto& [phi_block, phi_op] : phi_operands) { + if (!phi_op.IsImmediate()) { + UndoUse(phi_op); + } + } + phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 43460b9505..7f1ed6710c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,6 +5,8 @@ #pragma once #include <array> +#include <span> +#include <vector> #include <boost/intrusive/list.hpp> @@ -15,6 +17,8 @@ namespace Shader::IR { +class Block; + constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { @@ -59,6 +63,11 @@ public: /// Set the value of a given argument index. void SetArg(size_t index, Value value); + /// Get an immutable span to the phi operands. + [[nodiscard]] std::span<const std::pair<Block*, Value>> PhiOperands() const noexcept; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + void Invalidate(); void ClearArgs(); @@ -76,6 +85,7 @@ private: Inst* carry_inst{}; Inst* overflow_inst{}; Inst* zsco_inst{}; + std::vector<std::pair<Block*, Value>> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 371064bf3d..40759e96ab 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -5,6 +5,7 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) @@ -35,6 +36,13 @@ OPCODE(SetSFlag, Void, U1, OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + // Memory operations OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index 37cc530068..daf23193f2 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -10,6 +10,13 @@ namespace Shader::IR { enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +constexpr size_t NUM_USER_PREDS = 6; +constexpr size_t NUM_PREDS = 7; + +[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { + return static_cast<size_t>(pred); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 316fc4be8d..771094eb9a 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -271,6 +271,9 @@ enum class Reg : u64 { }; static_assert(static_cast<int>(Reg::RZ) == 255); +constexpr size_t NUM_USER_REGS = 255; +constexpr size_t NUM_REGS = 256; + [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { if (reg == Reg::RZ) { // Adding or subtracting registers from RZ yields RZ @@ -290,8 +293,12 @@ static_assert(static_cast<int>(Reg::RZ) == 255); return reg + (-num); } +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast<size_t>(reg); +} + [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (static_cast<size_t>(reg) / align) * align == static_cast<size_t>(reg); + return (RegIndex(reg) / align) * align == RegIndex(reg); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7b5b35d6c5..1e974e88c7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -115,6 +115,43 @@ u64 Value::U64() const { return imm_u64; } +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::Label: + return label == other.label; + case Type::Reg: + return reg == other.reg; + case Type::Pred: + return pred == other.pred; + case Type::Attribute: + return attribute == other.attribute; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + return imm_u16 == other.imm_u16; + case Type::U32: + return imm_u32 == other.imm_u32; + case Type::U64: + return imm_u64 == other.imm_u64; + case Type::ZSCO: + throw NotImplementedException("ZSCO comparison"); + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + void Value::ValidateAccess(IR::Type expected) const { if (type != expected) { throw LogicError("Reading {} out of {}", expected, type); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 664dacf9d7..368119921b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -48,6 +48,9 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + private: void ValidateAccess(IR::Type expected) const; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index fc4dba8269..21ee981371 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -36,6 +36,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { .cond{true}, .branch_true{new_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }, Block{ .begin{pc}, @@ -46,6 +47,7 @@ static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { .cond{block.cond}, .branch_true{block.branch_true}, .branch_false{block.branch_false}, + .imm_predecessors{}, }, }; } @@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string Name(const Block& block) { +static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.id); } else { @@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(Location start_address) - : entrypoint{start_address}, labels{Label{ + : entrypoint{start_address}, labels{{ .address{start_address}, .block_id{0}, .stack{}, }} {} +void Function::BuildBlocksMap() { + const size_t num_blocks{NumBlocks()}; + blocks_map.resize(num_blocks); + for (size_t block_index = 0; block_index < num_blocks; ++block_index) { + Block& block{blocks_data[block_index]}; + blocks_map[block.id] = █ + } +} + +void Function::BuildImmediatePredecessors() { + for (const Block& block : blocks_data) { + if (block.branch_true != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); + } + if (block.branch_false != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); + } + } +} + +void Function::BuildPostOrder() { + boost::container::small_vector<BlockId, 0x110> block_stack; + post_order_map.resize(NumBlocks()); + + Block& first_block{blocks_data[blocks.front()]}; + first_block.post_order_visited = true; + block_stack.push_back(first_block.id); + + const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { + if (branch_id == UNREACHABLE_BLOCK_ID) { + return false; + } + if (blocks_map[branch_id]->post_order_visited) { + return false; + } + blocks_map[branch_id]->post_order_visited = true; + + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block_id); + block_stack.push_back(branch_id); + return true; + }; + while (!block_stack.empty()) { + const Block* const block{blocks_map[block_stack.back()]}; + block_stack.pop_back(); + + if (!visit_branch(block->id, block->branch_true) && + !visit_branch(block->id, block->branch_false)) { + post_order_map[block->id] = static_cast<u32>(post_order_blocks.size()); + post_order_blocks.push_back(block->id); + } + } +} + +void Function::BuildImmediateDominators() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; + auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; + auto intersect{[this](Block* finger1, Block* finger2) { + while (finger1 != finger2) { + while (post_order_map[finger1->id] < post_order_map[finger2->id]) { + finger1 = finger1->imm_dominator; + } + while (post_order_map[finger2->id] < post_order_map[finger1->id]) { + finger2 = finger2->imm_dominator; + } + } + return finger1; + }}; + for (Block& block : blocks_data) { + block.imm_dominator = nullptr; + } + Block* const start_block{&blocks_data[blocks.front()]}; + start_block->imm_dominator = start_block; + + bool changed{true}; + while (changed) { + changed = false; + for (Block* const block : post_order_blocks | reverse_order_but_first) { + Block* new_idom{}; + for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { + new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; + } + changed |= block->imm_dominator != new_idom; + block->imm_dominator = new_idom; + } + } +} + +void Function::BuildDominanceFrontier() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; + for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { + for (Block* current : block.imm_predecessors | transform_block_id) { + while (current != block.imm_dominator) { + current->dominance_frontiers.push_back(current->id); + current = current->imm_dominator; + } + } + } +} + CFG::CFG(Environment& env_, Location start_address) : env{env_} { + VisitFunctions(start_address); + + for (Function& function : functions) { + function.BuildBlocksMap(); + function.BuildImmediatePredecessors(); + function.BuildPostOrder(); + function.BuildImmediateDominators(); + function.BuildDominanceFrontier(); + } +} + +void CFG::VisitFunctions(Location start_address) { functions.emplace_back(start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { .cond{true}, .branch_true{UNREACHABLE_BLOCK_ID}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; @@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.push_back(cal_pc); + functions.emplace_back(cal_pc); } // Handle CAL like a regular instruction break; @@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, })}; // Set the end properties of the conditional instruction and give it a new identity Block& conditional_block{block}; @@ -465,14 +583,14 @@ std::string CFG::Dot() const { dot += fmt::format("\t\tnode [style=filled];\n"); for (const u32 block_index : function.blocks) { const Block& block{function.blocks_data[block_index]}; - const std::string name{Name(block)}; + const std::string name{NameOf(block)}; const auto add_branch = [&](BlockId branch_id, bool add_label) { const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; dot += fmt::format("\t\t{}->", name); if (it == function.blocks_data.end()) { dot += fmt::format("\"Unknown label {}\"", branch_id); } else { - dot += Name(*it); + dot += NameOf(*it); }; if (add_label && block.cond != true && block.cond != false) { dot += fmt::format(" [label=\"{}\"]", block.cond); @@ -520,7 +638,7 @@ std::string CFG::Dot() const { if (functions.front().blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index b2ab0cdc35..20ada8afd9 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -70,6 +70,12 @@ struct Block { IR::Condition cond; BlockId branch_true; BlockId branch_false; + boost::container::small_vector<BlockId, 4> imm_predecessors; + boost::container::small_vector<BlockId, 8> dominance_frontiers; + union { + bool post_order_visited{false}; + Block* imm_dominator; + }; }; struct Label { @@ -81,11 +87,30 @@ struct Label { struct Function { Function(Location start_address); + void BuildBlocksMap(); + + void BuildImmediatePredecessors(); + + void BuildPostOrder(); + + void BuildImmediateDominators(); + + void BuildDominanceFrontier(); + + [[nodiscard]] size_t NumBlocks() const noexcept { + return static_cast<size_t>(current_block_id) + 1; + } + Location entrypoint; BlockId current_block_id{0}; boost::container::small_vector<Label, 16> labels; boost::container::small_vector<u32, 0x130> blocks; boost::container::small_vector<Block, 0x130> blocks_data; + // Translates from BlockId to block index + boost::container::small_vector<Block*, 0x130> blocks_map; + + boost::container::small_vector<u32, 0x130> post_order_blocks; + boost::container::small_vector<BlockId, 0x130> post_order_map; }; class CFG { @@ -97,6 +122,12 @@ class CFG { public: explicit CFG(Environment& env, Location start_address); + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + [[nodiscard]] std::string Dot() const; [[nodiscard]] std::span<const Function> Functions() const noexcept { @@ -104,20 +135,22 @@ public: } private: + void VisitFunctions(Location start_address); + void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited - [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, - Instruction inst, Opcode opcode); + bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); @@ -126,8 +159,7 @@ private: AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, - FunctionId function_id); + BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); Environment& env; boost::container::small_vector<Function, 1> functions; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 67a98ba57a..49d1f4bfb7 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,40 +8,53 @@ #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { +namespace { +void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span<IR::Block*> block_map, IR::Block* block_memory) { + const size_t num_blocks{cfg_function.blocks.size()}; + function.blocks.reserve(num_blocks); -Program::Function::~Function() { - std::ranges::for_each(blocks, &std::destroy_at<IR::Block>); -} - -Program::Program(Environment& env, const Flow::CFG& cfg) { - std::vector<IR::Block*> block_map; - functions.reserve(cfg.Functions().size()); + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - for (const Flow::Function& cfg_function : cfg.Functions()) { - Function& function{functions.emplace_back()}; + function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); + block_map[flow_block.id] = function.blocks.back().get(); + ++block_memory; + } +} - const size_t num_blocks{cfg_function.blocks.size()}; - IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; - function.blocks.reserve(num_blocks); +void EmitTerminationInsts(const Flow::Function& cfg_function, + std::span<IR::Block* const> block_map) { + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } +} - block_map.resize(cfg_function.blocks_data.size()); +void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + IR::Block* block_memory) { + std::vector<IR::Block*> block_map; + block_map.resize(cfg_function.blocks_data.size()); - // Visit the instructions of all blocks - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + TranslateCode(env, cfg_function, function, block_map, block_memory); + EmitTerminationInsts(cfg_function, block_map); +} +} // Anonymous namespace - IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; - ++block_memory; - function.blocks.push_back(block); - block_map[flow_block.id] = block; - } - // Now that all blocks are defined, emit the termination instructions - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } +Program::Program(Environment& env, const Flow::CFG& cfg) { + functions.reserve(cfg.Functions().size()); + for (const Flow::Function& cfg_function : cfg.Functions()) { + TranslateFunction(env, cfg_function, functions.emplace_back(), + block_alloc_pool.allocate(cfg_function.blocks.size())); + } + std::ranges::for_each(functions, Optimization::SsaRewritePass); + for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); + Optimization::Invoke(Optimization::IdentityRemovalPass, function); + // Optimization::Invoke(Optimization::VerificationPass, function); } } @@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) { std::map<const IR::Inst*, size_t> inst_to_index; std::map<const IR::Block*, size_t> block_to_index; - for (const Program::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); + for (const IR::Function& function : program.functions) { + for (const auto& block : function.blocks) { + block_to_index.emplace(block.get(), index); ++index; } } std::string ret; - for (const Program::Function& function : program.functions) { + for (const IR::Function& function : program.functions) { ret += fmt::format("Function\n"); - for (const IR::Block* const block : function.blocks) { + for (const auto& block : function.blocks) { ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } } diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 7814b2c016..36e678a9ea 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -4,13 +4,16 @@ #pragma once +#include <memory> #include <string> #include <vector> +#include <boost/container/small_vector.hpp> #include <boost/pool/pool_alloc.hpp> #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" namespace Shader::Maxwell { @@ -22,16 +25,10 @@ public: explicit Program(Environment& env, const Flow::CFG& cfg); private: - struct Function { - ~Function(); - - std::vector<IR::Block*> blocks; - }; - boost::pool_allocator<IR::Block, boost::default_user_allocator_new_delete, boost::details::pool::null_mutex> block_alloc_pool; - std::vector<Function> functions; + boost::container::small_vector<IR::Function, 1> functions; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp index a4ea5c5e38..ed5137f20c 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map, IR::IREmitter& ir) { + const auto add_immediate_predecessor = [&](Flow::BlockId label) { + block_map[label]->AddImmediatePredecessor(&ir.block); + }; if (flow_block.cond == true) { + add_immediate_predecessor(flow_block.branch_true); return ir.Branch(block_map[flow_block.branch_true]); } if (flow_block.cond == false) { + add_immediate_predecessor(flow_block.branch_false); return ir.Branch(block_map[flow_block.branch_false]); } + add_immediate_predecessor(flow_block.branch_true); + add_immediate_predecessor(flow_block.branch_false); return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], block_map[flow_block.branch_false]); } diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h index b0d667942e..04e0445340 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -11,6 +11,7 @@ namespace Shader::Maxwell { +/// Emit termination instructions and collect immediate predecessors void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bc607b0025..8be7d6ff1b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -208,7 +208,7 @@ public: void P2R_reg(u64 insn); void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); - void PBK(u64 insn); + void PBK(); void PCNT(u64 insn); void PEXIT(u64 insn); void PIXLD(u64 insn); @@ -252,7 +252,7 @@ public: void SHR_reg(u64 insn); void SHR_cbuf(u64 insn); void SHR_imm(u64 insn); - void SSY(u64 insn); + void SSY(); void ST(u64 insn); void STG(u64 insn); void STL(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c907c1ffb5..0f52696d10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) { ThrowNotImplemented(Opcode::P2R_imm); } -void TranslatorVisitor::PBK(u64) { +void TranslatorVisitor::PBK() { // PBK is a no-op } @@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) { ThrowNotImplemented(Opcode::SHR_imm); } -void TranslatorVisitor::SSY(u64) { - ThrowNotImplemented(Opcode::SSY); +void TranslatorVisitor::SSY() { + // SSY is a no-op } void TranslatorVisitor::ST(u64) { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index f9bb063fb1..7f85000878 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -28,7 +28,6 @@ void IdentityRemovalPass(IR::Block& block) { ++inst; } } - for (IR::Inst* const inst : to_invalidate) { inst->Invalidate(); } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index fe5454e9a6..83f094d735 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -5,12 +5,21 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" namespace Shader::Optimization { +template <typename Func> +void Invoke(Func&& func, IR::Function& function) { + for (const auto& block : function.blocks) { + func(*block); + } +} + void DeadCodeEliminationPass(IR::Block& block); void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); +void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 0000000000..a4b256a40e --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -0,0 +1,155 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// This file implements the SSA rewriting algorithm proposed in +// +// Simple and Efficient Construction of Static Single Assignment Form. +// Braun M., Buchwald S., Hack S., Lei�a R., Mallon C., Zwinkau A. (2013) +// In: Jhala R., De Bosschere K. (eds) +// Compiler Construction. CC 2013. +// Lecture Notes in Computer Science, vol 7791. +// Springer, Berlin, Heidelberg +// +// https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 +// + +#include <map> + +#include <boost/container/flat_map.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; + +struct DefTable { + [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { + return regs[IR::RegIndex(variable)]; + } + + [[nodiscard]] ValueMap& operator[](IR::Pred variable) noexcept { + return preds[IR::PredIndex(variable)]; + } + + std::array<ValueMap, IR::NUM_USER_REGS> regs; + std::array<ValueMap, IR::NUM_USER_PREDS> preds; +}; + +IR::Opcode UndefOpcode(IR::Reg) noexcept { + return IR::Opcode::Undef32; +} + +IR::Opcode UndefOpcode(IR::Pred) noexcept { + return IR::Opcode::Undef1; +} + +[[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { + return inst.Opcode() == IR::Opcode::Phi; +} + +class Pass { +public: + void WriteVariable(auto variable, IR::Block* block, const IR::Value& value) { + current_def[variable].insert_or_assign(block, value); + } + + IR::Value ReadVariable(auto variable, IR::Block* block) { + auto& def{current_def[variable]}; + if (const auto it{def.find(block)}; it != def.end()) { + return it->second; + } + return ReadVariableRecursive(variable, block); + } + +private: + IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { + IR::Value val; + if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + val = ReadVariable(variable, preds.front()); + } else { + // Break potential cycles with operandless phi + val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + WriteVariable(variable, block, val); + val = AddPhiOperands(variable, val, block); + } + WriteVariable(variable, block, val); + return val; + } + + IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + for (IR::Block* const pred : block->ImmediatePredecessors()) { + phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + } + return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); + } + + IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value same; + for (const auto& pair : phi.Inst()->PhiOperands()) { + const IR::Value& op{pair.second}; + if (op == same || op == phi) { + // Unique value or self-reference + continue; + } + if (!same.IsEmpty()) { + // The phi merges at least two values: not trivial + return phi; + } + same = op; + } + if (same.IsEmpty()) { + // The phi is unreachable or in the start block + const auto first_not_phi{std::ranges::find_if_not(block->Instructions(), IsPhi)}; + same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; + } + // Reroute all uses of phi to same and remove phi + phi.Inst()->ReplaceUsesWith(same); + // TODO: Try to recursively remove all phi users, which might have become trivial + return same; + } + + DefTable current_def; +}; +} // Anonymous namespace + +void SsaRewritePass(IR::Function& function) { + Pass pass; + for (const auto& block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block.get(), inst.Arg(1)); + } + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + } + break; + default: + break; + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 39f0bf3336..e3c9ad6e8f 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -35,12 +35,12 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str())); }); - for (int i = 0; i < 1; ++i) { + for (int i = 0; i < 300; ++i) { for (auto& env : map) { // fmt::print(stdout, "Decoding {}\n", path.string()); const Location start_address{0}; auto cfg{std::make_unique<Flow::CFG>(*env, start_address)}; - // fmt::print(stdout, "{}\n", cfg.Dot()); + // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; -- cgit v1.2.3-70-g09d2 From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 3 Feb 2021 16:43:04 -0300 Subject: shader: Initial instruction support --- src/shader_recompiler/CMakeLists.txt | 13 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 200 +++++++++++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 67 ++++++- .../frontend/ir/microinstruction.h | 12 +- src/shader_recompiler/frontend/ir/modifiers.h | 28 +++ src/shader_recompiler/frontend/ir/opcode.inc | 139 +++++++++----- src/shader_recompiler/frontend/ir/pred.h | 11 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/common_encoding.h | 56 ++++++ .../maxwell/translate/impl/floating_point_add.cpp | 71 ++++++++ .../impl/floating_point_fused_multiply_add.cpp | 73 ++++++++ .../translate/impl/floating_point_multiply.cpp | 108 +++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 26 ++- .../frontend/maxwell/translate/impl/impl.h | 9 +- .../maxwell/translate/impl/integer_add.cpp | 106 +++++++++++ .../maxwell/translate/impl/integer_scaled_add.cpp | 73 ++++++++ .../translate/impl/integer_set_predicate.cpp | 99 ++++++++++ .../maxwell/translate/impl/integer_shift_left.cpp | 71 ++++++++ .../translate/impl/integer_short_multiply_add.cpp | 110 ++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 149 ++++++++++++--- .../maxwell/translate/impl/move_register.cpp | 45 +++++ .../translate/impl/move_special_register.cpp | 114 ++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 149 +-------------- .../maxwell/translate/impl/register_move.cpp | 45 ----- .../ir_opt/get_set_elimination_pass.cpp | 87 --------- src/shader_recompiler/ir_opt/passes.h | 1 - src/shader_recompiler/main.cpp | 3 +- 29 files changed, 1494 insertions(+), 378 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/modifiers.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp delete mode 100644 src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 36a61f21ac..f5dd4d29ea 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -39,18 +39,27 @@ add_executable(shader_recompiler frontend/maxwell/program.h frontend/maxwell/termination_code.cpp frontend/maxwell/termination_code.h + frontend/maxwell/translate/impl/common_encoding.h + frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp + frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp + frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h + frontend/maxwell/translate/impl/integer_add.cpp + frontend/maxwell/translate/impl/integer_scaled_add.cpp + frontend/maxwell/translate/impl/integer_set_predicate.cpp + frontend/maxwell/translate/impl/integer_shift_left.cpp + frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/not_implemented.cpp - frontend/maxwell/translate/impl/register_move.cpp + frontend/maxwell/translate/impl/move_register.cpp + frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/dead_code_elimination_pass.cpp - ir_opt/get_set_elimination_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e795618fcf..249251dd0b 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list<Value> args) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + std::initializer_list<Value> args, u64 flags) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 4b6b80c4b2..ec4a41cb1a 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -39,7 +39,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list<Value> args = {}); + std::initializer_list<Value> args = {}, u64 flags = 0); /// Adds a new immediate predecessor to the basic block. void AddImmediatePredecessor(IR::Block* immediate_predecessor); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6450e4b2c0..87b253c9ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { Inst(Opcode::SetAttribute, attribute, value); } +U32 IREmitter::WorkgroupIdX() { + return Inst<U32>(Opcode::WorkgroupIdX); +} + +U32 IREmitter::WorkgroupIdY() { + return Inst<U32>(Opcode::WorkgroupIdY); +} + +U32 IREmitter::WorkgroupIdZ() { + return Inst<U32>(Opcode::WorkgroupIdZ); +} + +U32 IREmitter::LocalInvocationIdX() { + return Inst<U32>(Opcode::LocalInvocationIdX); +} + +U32 IREmitter::LocalInvocationIdY() { + return Inst<U32>(Opcode::LocalInvocationIdY); +} + +U32 IREmitter::LocalInvocationIdZ() { + return Inst<U32>(Opcode::LocalInvocationIdZ); +} + +U32 IREmitter::LoadGlobalU8(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalU8, address); +} + +U32 IREmitter::LoadGlobalS8(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalS8, address); +} + +U32 IREmitter::LoadGlobalU16(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalU16, address); +} + +U32 IREmitter::LoadGlobalS16(const U64& address) { + return Inst<U32>(Opcode::LoadGlobalS16, address); +} + +U32 IREmitter::LoadGlobal32(const U64& address) { + return Inst<U32>(Opcode::LoadGlobal32, address); +} + +Value IREmitter::LoadGlobal64(const U64& address) { + return Inst<Value>(Opcode::LoadGlobal64, address); +} + +Value IREmitter::LoadGlobal128(const U64& address) { + return Inst<Value>(Opcode::LoadGlobal128, address); +} + void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { Inst(Opcode::WriteGlobalU8, address, value); } @@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst<U1>(Opcode::GetOverflowFromOp, op); } -U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst<U16>(Opcode::FPAdd16, a, b); + return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b); case Type::U32: - return Inst<U32>(Opcode::FPAdd32, a, b); + return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b); case Type::U64: - return Inst<U64>(Opcode::FPAdd64, a, b); + return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } @@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { if (e1.Type() != e2.Type()) { - throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); + throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } return Inst(Opcode::CompositeConstruct2, e1, e2); } Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); + throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); } return Inst(Opcode::CompositeConstruct3, e1, e2, e3); } @@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, const UAny& e4) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), - e4.Type()); + throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), + e3.Type(), e4.Type()); } return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); } @@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); } +UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { + if (true_value.Type() != false_value.Type()) { + throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); + } + switch (true_value.Type()) { + case Type::U8: + return Inst<UAny>(Opcode::Select8, condition, true_value, false_value); + case Type::U16: + return Inst<UAny>(Opcode::Select16, condition, true_value, false_value); + case Type::U32: + return Inst<UAny>(Opcode::Select32, condition, true_value, false_value); + case Type::U64: + return Inst<UAny>(Opcode::Select64, condition, true_value, false_value); + default: + throw InvalidArgument("Invalid type {}", true_value.Type()); + } +} + U64 IREmitter::PackUint2x32(const Value& vector) { return Inst<U64>(Opcode::PackUint2x32, vector); } @@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) { return Inst<Value>(Opcode::UnpackDouble2x32, value); } -U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst<U16>(Opcode::FPMul16, a, b); + return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b); case Type::U32: - return Inst<U32>(Opcode::FPMul32, a, b); + return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b); case Type::U64: - return Inst<U64>(Opcode::FPMul64, a, b); + return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control) { + if (a.Type() != b.Type() || a.Type() != c.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::U32: + return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::U64: + return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c); default: ThrowInvalidType(a.Type()); } @@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { } } +U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst<U32>(Opcode::IAdd32, a, b); + case Type::U64: + return Inst<U64>(Opcode::IAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::IMul(const U32& a, const U32& b) { + return Inst<U32>(Opcode::IMul32, a, b); +} + +U32 IREmitter::INeg(const U32& value) { + return Inst<U32>(Opcode::INeg32, value); +} + +U32 IREmitter::IAbs(const U32& value) { + return Inst<U32>(Opcode::IAbs32, value); +} + +U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { + return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); +} + +U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { + return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); +} + +U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { + return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); +} + +U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseAnd32, a, b); +} + +U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseOr32, a, b); +} + +U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { + return Inst<U32>(Opcode::BitwiseXor32, a, b); +} + +U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count) { + return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); +} + +U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed) { + return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, + count); +} + +U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +} + +U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { + return Inst<U1>(Opcode::IEqual, lhs, rhs); +} + +U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +} + +U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { + return Inst<U1>(Opcode::INotEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst<U1>(Opcode::LogicalOr, a, b); } @@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { return Inst<U1>(Opcode::LogicalAnd, a, b); } +U1 IREmitter::LogicalXor(const U1& a, const U1& b) { + return Inst<U1>(Opcode::LogicalXor, a, b); +} + U1 IREmitter::LogicalNot(const U1& value) { return Inst<U1>(Opcode::LogicalNot, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1af79f41cb..7ff763ecf4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -4,8 +4,12 @@ #pragma once +#include <cstring> +#include <type_traits> + #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { @@ -52,6 +56,22 @@ public: [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const U32& value); + [[nodiscard]] U32 WorkgroupIdX(); + [[nodiscard]] U32 WorkgroupIdY(); + [[nodiscard]] U32 WorkgroupIdZ(); + + [[nodiscard]] U32 LocalInvocationIdX(); + [[nodiscard]] U32 LocalInvocationIdY(); + [[nodiscard]] U32 LocalInvocationIdZ(); + + [[nodiscard]] U32 LoadGlobalU8(const U64& address); + [[nodiscard]] U32 LoadGlobalS8(const U64& address); + [[nodiscard]] U32 LoadGlobalU16(const U64& address); + [[nodiscard]] U32 LoadGlobalS16(const U64& address); + [[nodiscard]] U32 LoadGlobal32(const U64& address); + [[nodiscard]] Value LoadGlobal64(const U64& address); + [[nodiscard]] Value LoadGlobal128(const U64& address); + void WriteGlobalU8(const U64& address, const U32& value); void WriteGlobalS8(const U64& address, const U32& value); void WriteGlobalU16(const U64& address, const U32& value); @@ -71,6 +91,8 @@ public: const UAny& e4); [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); @@ -80,8 +102,10 @@ public: [[nodiscard]] U64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const U64& value); - [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); - [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); + [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control = {}); [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); @@ -100,8 +124,31 @@ public: [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 INeg(const U32& value); + [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); + [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); + [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count); + [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed); + + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalNot(const U1& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); @@ -118,6 +165,22 @@ private: auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } + + template <typename T> + requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags { + Flags() = default; + Flags(T proxy_) : proxy{proxy_} {} + + T proxy; + }; + + template <typename T = Value, typename FlagType, typename... Args> + T Inst(Opcode op, Flags<FlagType> flags, Args... args) { + u64 raw_flags{}; + std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); + auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + return T{Value{&*it}}; + } }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 7f1ed6710c..61849695ac 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,7 +5,9 @@ #pragma once #include <array> +#include <cstring> #include <span> +#include <type_traits> #include <vector> #include <boost/intrusive/list.hpp> @@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_) noexcept : op(op_) {} + explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -73,6 +75,14 @@ public: void ReplaceUsesWith(Value replacement); + template <typename FlagsType> + requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(&ret, &flags, sizeof(ret)); + return ret; + } + private: void Use(const Value& value); void UndoUse(const Value& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 0000000000..28bb9e798c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader::IR { + +enum class FmzMode { + None, // Denorms are not flushed, NAN is propagated (nouveau) + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) +}; + +enum class FpRounding { + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero +}; + +struct FpControl { + bool no_contraction{false}; + FpRounding rounding : 8 = FpRounding::RN; + FmzMode fmz_mode : 8 = FmzMode::FTZ; +}; +static_assert(sizeof(FpControl) <= sizeof(u64)); +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 40759e96ab..4ecb5e936d 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupIdX, U32, ) +OPCODE(WorkgroupIdY, U32, ) +OPCODE(WorkgroupIdZ, U32, ) +OPCODE(LocalInvocationIdX, U32, ) +OPCODE(LocalInvocationIdY, U32, ) +OPCODE(LocalInvocationIdZ, U32, ) // Undefined OPCODE(Undef1, U1, ) @@ -44,6 +50,13 @@ OPCODE(Undef32, U32, OPCODE(Undef64, U64, ) // Memory operations +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, Opaque, U64, ) +OPCODE(LoadGlobal128, Opaque, U64, ) OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, ) @@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeExtract, Opaque, Opaque, U32, ) +// Select operations +OPCODE(Select8, U8, U1, U8, U8, ) +OPCODE(Select16, U16, U1, U16, U16, ) +OPCODE(Select32, U32, U1, U32, U32, ) +OPCODE(Select64, U64, U1, U64, U64, ) + // Bitwise conversions OPCODE(PackUint2x32, U64, Opaque, ) OPCODE(UnpackUint2x32, Opaque, U64, ) @@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, U16, U16 ) -OPCODE(FPAbs32, U32, U32 ) -OPCODE(FPAbs64, U64, U64 ) -OPCODE(FPAdd16, U16, U16, U16 ) -OPCODE(FPAdd32, U32, U32, U32 ) -OPCODE(FPAdd64, U64, U64, U64 ) -OPCODE(FPFma16, U16, U16, U16 ) -OPCODE(FPFma32, U32, U32, U32 ) -OPCODE(FPFma64, U64, U64, U64 ) -OPCODE(FPMax32, U32, U32, U32 ) -OPCODE(FPMax64, U64, U64, U64 ) -OPCODE(FPMin32, U32, U32, U32 ) -OPCODE(FPMin64, U64, U64, U64 ) -OPCODE(FPMul16, U16, U16, U16 ) -OPCODE(FPMul32, U32, U32, U32 ) -OPCODE(FPMul64, U64, U64, U64 ) -OPCODE(FPNeg16, U16, U16 ) -OPCODE(FPNeg32, U32, U32 ) -OPCODE(FPNeg64, U64, U64 ) -OPCODE(FPRecip32, U32, U32 ) -OPCODE(FPRecip64, U64, U64 ) -OPCODE(FPRecipSqrt32, U32, U32 ) -OPCODE(FPRecipSqrt64, U64, U64 ) -OPCODE(FPSqrt, U32, U32 ) -OPCODE(FPSin, U32, U32 ) -OPCODE(FPSinNotReduced, U32, U32 ) -OPCODE(FPExp2, U32, U32 ) -OPCODE(FPExp2NotReduced, U32, U32 ) -OPCODE(FPCos, U32, U32 ) -OPCODE(FPCosNotReduced, U32, U32 ) -OPCODE(FPLog2, U32, U32 ) -OPCODE(FPSaturate16, U16, U16 ) -OPCODE(FPSaturate32, U32, U32 ) -OPCODE(FPSaturate64, U64, U64 ) -OPCODE(FPRoundEven16, U16, U16 ) -OPCODE(FPRoundEven32, U32, U32 ) -OPCODE(FPRoundEven64, U64, U64 ) -OPCODE(FPFloor16, U16, U16 ) -OPCODE(FPFloor32, U32, U32 ) -OPCODE(FPFloor64, U64, U64 ) -OPCODE(FPCeil16, U16, U16 ) -OPCODE(FPCeil32, U32, U32 ) -OPCODE(FPCeil64, U64, U64 ) -OPCODE(FPTrunc16, U16, U16 ) -OPCODE(FPTrunc32, U32, U32 ) -OPCODE(FPTrunc64, U64, U64 ) +OPCODE(FPAbs16, U16, U16, ) +OPCODE(FPAbs32, U32, U32, ) +OPCODE(FPAbs64, U64, U64, ) +OPCODE(FPAdd16, U16, U16, U16, ) +OPCODE(FPAdd32, U32, U32, U32, ) +OPCODE(FPAdd64, U64, U64, U64, ) +OPCODE(FPFma16, U16, U16, U16, U16, ) +OPCODE(FPFma32, U32, U32, U32, U32, ) +OPCODE(FPFma64, U64, U64, U64, U64, ) +OPCODE(FPMax32, U32, U32, U32, ) +OPCODE(FPMax64, U64, U64, U64, ) +OPCODE(FPMin32, U32, U32, U32, ) +OPCODE(FPMin64, U64, U64, U64, ) +OPCODE(FPMul16, U16, U16, U16, ) +OPCODE(FPMul32, U32, U32, U32, ) +OPCODE(FPMul64, U64, U64, U64, ) +OPCODE(FPNeg16, U16, U16, ) +OPCODE(FPNeg32, U32, U32, ) +OPCODE(FPNeg64, U64, U64, ) +OPCODE(FPRecip32, U32, U32, ) +OPCODE(FPRecip64, U64, U64, ) +OPCODE(FPRecipSqrt32, U32, U32, ) +OPCODE(FPRecipSqrt64, U64, U64, ) +OPCODE(FPSqrt, U32, U32, ) +OPCODE(FPSin, U32, U32, ) +OPCODE(FPSinNotReduced, U32, U32, ) +OPCODE(FPExp2, U32, U32, ) +OPCODE(FPExp2NotReduced, U32, U32, ) +OPCODE(FPCos, U32, U32, ) +OPCODE(FPCosNotReduced, U32, U32, ) +OPCODE(FPLog2, U32, U32, ) +OPCODE(FPSaturate16, U16, U16, ) +OPCODE(FPSaturate32, U32, U32, ) +OPCODE(FPSaturate64, U64, U64, ) +OPCODE(FPRoundEven16, U16, U16, ) +OPCODE(FPRoundEven32, U32, U32, ) +OPCODE(FPRoundEven64, U64, U64, ) +OPCODE(FPFloor16, U16, U16, ) +OPCODE(FPFloor32, U32, U32, ) +OPCODE(FPFloor64, U64, U64, ) +OPCODE(FPCeil16, U16, U16, ) +OPCODE(FPCeil32, U32, U32, ) +OPCODE(FPCeil64, U64, U64, ) +OPCODE(FPTrunc16, U16, U16, ) +OPCODE(FPTrunc32, U32, U32, ) +OPCODE(FPTrunc64, U64, U64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) + +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) OPCODE(LogicalNot, U1, U1, ) // Conversion operations diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index daf23193f2..c6f2f82bfb 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -8,7 +8,16 @@ namespace Shader::IR { -enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +enum class Pred : u64 { + P0, + P1, + P2, + P3, + P4, + P5, + P6, + PT, +}; constexpr size_t NUM_USER_PREDS = 6; constexpr size_t NUM_PREDS = 7; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb7..bd1f96c079 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::Invoke(Optimization::IdentityRemovalPass, function); // Optimization::Invoke(Optimization::VerificationPass, function); } + //*/ } std::string DumpProgram(const Program& program) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 0000000000..3da37a2bb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + return IR::FmzMode::FMZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 0000000000..d2c44b9ccd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (sat) { + throw NotImplementedException("FADD SAT"); + } + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + throw NotImplementedException("FADD (cbuf)"); +} + +void TranslatorVisitor::FADD_imm(u64) { + throw NotImplementedException("FADD (imm)"); +} + +void TranslatorVisitor::FADD32I(u64) { + throw NotImplementedException("FADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 0000000000..30ca052ec5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (sat) { + throw NotImplementedException("FFMA SAT"); + } + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64) { + throw NotImplementedException("FFMA (rc)"); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64) { + throw NotImplementedException("FFMA (imm)"); +} + +void TranslatorVisitor::FFMA32I(u64) { + throw NotImplementedException("FFMA32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 0000000000..743a1e2f0f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + if (sat) { + throw NotImplementedException("FMUL SAT"); + } + IR::U32 op_a{v.X(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + throw NotImplementedException("FMUL (cbuf)"); +} + +void TranslatorVisitor::FMUL_imm(u64) { + throw NotImplementedException("FMUL (imm)"); +} + +void TranslatorVisitor::FMUL32I(u64) { + throw NotImplementedException("FMUL32I"); +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f29..548c7f611d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::U32 TranslatorVisitor::GetImm(u64 insn) { +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; BitField<20, 19, u64> value; @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { return ir.Imm32(value); } +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast<u32>(imm.value)); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1b..ef6d977fef 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -46,7 +46,7 @@ public: void DADD_reg(u64 insn); void DADD_cbuf(u64 insn); void DADD_imm(u64 insn); - void DEPBAR(u64 insn); + void DEPBAR(); void DFMA_reg(u64 insn); void DFMA_rc(u64 insn); void DFMA_cr(u64 insn); @@ -298,9 +298,14 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::U32 GetImm(u64 insn); + [[nodiscard]] IR::U32 GetImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 0000000000..60f79b1606 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + const bool neg_a{!po && iadd.neg_a != 0}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64) { + throw NotImplementedException("IADD (reg)"); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64) { + throw NotImplementedException("IADD (imm)"); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 0000000000..f92c0bbd60 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + const bool po{iscadd.three_for_po == 3}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (!po) { + // When PO is not present, the bits are interpreted as negation + if (iscadd.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iscadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + if (po) { + // .PO adds one to the final result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + v.X(iscadd.dest_reg, result); + + if (iscadd.cc != 0) { + throw NotImplementedException("ISCADD CC"); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> op_b; + } const iscadd{insn}; + + ISCADD(*this, insn, X(iscadd.op_b)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + throw NotImplementedException("ISCADD (cbuf)"); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + throw NotImplementedException("ISCADD (imm)"); +} + +void TranslatorVisitor::ISCADD32I(u64) { + throw NotImplementedException("ISCADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 0000000000..76c6b52910 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class CompareOp : u64 { + F, // Always false + LT, // Less than + EQ, // Equal + LE, // Less than or equal + GT, // Greater than + NE, // Not equal + GE, // Greater than or equal + T, // Always true +}; + +enum class Bop : u64 { + AND, + OR, + XOR, +}; + +IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, + bool is_signed) { + switch (op) { + case CompareOp::F: + return ir.Imm1(false); + case CompareOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case CompareOp::EQ: + return ir.IEqual(lhs, rhs); + case CompareOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + case CompareOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case CompareOp::NE: + return ir.INotEqual(lhs, rhs); + case CompareOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case CompareOp::T: + return ir.Imm1(true); + } + throw NotImplementedException("Invalid ISETP compare op {}", op); +} + +IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { + switch (bop) { + case Bop::AND: + return ir.LogicalAnd(comparison, bop_pred); + case Bop::OR: + return ir.LogicalOr(comparison, bop_pred); + case Bop::XOR: + return ir.LogicalXor(comparison, bop_pred); + } + throw NotImplementedException("Invalid ISETP bop {}", bop); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, Bop> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const Bop bop{isetp.bop}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; + const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64) { + throw NotImplementedException("ISETP_imm"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 0000000000..d4b417d14a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to clamp it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64) { + throw NotImplementedException("SHL_reg"); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + throw NotImplementedException("SHL_cbuf"); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 0000000000..70a7c76c55 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64) { + throw NotImplementedException("XMAD (reg)"); +} + +void TranslatorVisitor::XMAD_rc(u64) { + throw NotImplementedException("XMAD (rc)"); +} + +void TranslatorVisitor::XMAD_cr(u64) { + throw NotImplementedException("XMAD (cr)"); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<39, 8, IR::Reg> src_c; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))}; + const IR::U32 src_c{X(xmad.src_c)}; + XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, + xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cfb..c9669c6178 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -10,16 +10,35 @@ namespace Shader::Maxwell { namespace { +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, + U128, // ??? +}; + enum class StoreSize : u64 { - U8, - S8, - U16, - S16, + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend B32, B64, B128, }; +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) +}; + // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html enum class StoreCache : u64 { WB, // Cache write-back all coherent levels @@ -27,61 +46,137 @@ enum class StoreCache : u64 { CS, // Cache streaming, likely to be accessed once WT, // Cache write-through (to system memory) }; -} // Anonymous namespace -void TranslatorVisitor::STG(u64 insn) { - // STG stores registers into global memory. +IR::U64 Address(TranslatorVisitor& v, u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> data_reg; BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; BitField<45, 1, u64> e; - BitField<46, 2, StoreCache> cache; - BitField<48, 3, StoreSize> size; - } const stg{insn}; + } const mem{insn}; const IR::U64 address{[&]() -> IR::U64 { - if (stg.e == 0) { - // STG without .E uses a 32-bit pointer, zero-extend it - return ir.ConvertU(64, X(stg.addr_reg)); + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.ConvertU(64, v.X(mem.addr_reg)); } - if (!IR::IsAligned(stg.addr_reg, 2)) { + if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); } - // Pack two registers to build the 32-bit address - return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast<u64>(mem.rz_addr_offset.Value()); + } else { + return static_cast<u64>(mem.addr_offset.Value()); + } }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::B128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::U128: + throw NotImplementedException("LDG U.128"); + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; switch (stg.size) { case StoreSize::U8: - ir.WriteGlobalU8(address, X(stg.data_reg)); + ir.WriteGlobalU8(address, X(data_reg)); break; case StoreSize::S8: - ir.WriteGlobalS8(address, X(stg.data_reg)); + ir.WriteGlobalS8(address, X(data_reg)); break; case StoreSize::U16: - ir.WriteGlobalU16(address, X(stg.data_reg)); + ir.WriteGlobalU16(address, X(data_reg)); break; case StoreSize::S16: - ir.WriteGlobalS16(address, X(stg.data_reg)); + ir.WriteGlobalS16(address, X(data_reg)); break; case StoreSize::B32: - ir.WriteGlobal32(address, X(stg.data_reg)); + ir.WriteGlobal32(address, X(data_reg)); break; case StoreSize::B64: { - if (!IR::IsAligned(stg.data_reg, 2)) { + if (!IR::IsAligned(data_reg, 2)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; ir.WriteGlobal64(address, vector); break; } case StoreSize::B128: - if (!IR::IsAligned(stg.data_reg, 4)) { + if (!IR::IsAligned(data_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), - X(stg.data_reg + 2), X(stg.data_reg + 3))}; + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; ir.WriteGlobal128(address, vector); break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 0000000000..1711d3f48a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 0000000000..93cea302a2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 49, + SR_CirQueueIncrMinusOne = 50, + SR_NLATC = 51, + SR_SWINLO = 57, + SR_SWINSZ = 58, + SR_SMEMSZ = 59, + SR_SMEMBANKS = 60, + SR_LWINLO = 61, + SR_LWINSZ = 62, + SR_LMEMLOSZ = 63, + SR_LMEMHIOFF = 64, + SR_EQMASK = 65, + SR_LTMASK = 66, + SR_LEMASK = 67, + SR_GTMASK = 68, + SR_GEMASK = 69, + SR_REGALLOC = 70, + SR_GLOBALERRORSTATUS = 73, + SR_WARPERRORSTATUS = 75, + SR_PM_HI0 = 81, + SR_PM_HI1 = 82, + SR_PM_HI2 = 83, + SR_PM_HI3 = 84, + SR_PM_HI4 = 85, + SR_PM_HI5 = 86, + SR_PM_HI6 = 87, + SR_PM_HI7 = 88, + SR_CLOCKLO = 89, + SR_CLOCKHI = 90, + SR_GLOBALTIMERLO = 91, + SR_GLOBALTIMERHI = 92, + SR_HWTASKID = 105, + SR_CIRCULARQUEUEENTRYINDEX = 106, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d10..d70399f6bf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -7,21 +7,8 @@ #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#include "shader_recompiler/ir_opt/passes.h" - namespace Shader::Maxwell { -[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { - auto raw{IR::DumpBlock(block)}; - - Optimization::GetSetElimination(block); - Optimization::DeadCodeEliminationPass(block); - Optimization::IdentityRemovalPass(block); - auto dumped{IR::DumpBlock(block)}; - - fmt::print(stderr, "{}", dumped); -} - [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { throw NotImplementedException("Instruction {} is not implemented", opcode); } @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { ThrowNotImplemented(Opcode::DADD_imm); } -void TranslatorVisitor::DEPBAR(u64) { - ThrowNotImplemented(Opcode::DEPBAR); +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op } void TranslatorVisitor::DFMA_reg(u64) { @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { ThrowNotImplemented(Opcode::F2F_imm); } -void TranslatorVisitor::FADD_reg(u64) { - ThrowNotImplemented(Opcode::FADD_reg); -} - -void TranslatorVisitor::FADD_cbuf(u64) { - ThrowNotImplemented(Opcode::FADD_cbuf); -} - -void TranslatorVisitor::FADD_imm(u64) { - ThrowNotImplemented(Opcode::FADD_imm); -} - -void TranslatorVisitor::FADD32I(u64) { - ThrowNotImplemented(Opcode::FADD32I); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FFMA_reg(u64) { - ThrowNotImplemented(Opcode::FFMA_reg); -} - -void TranslatorVisitor::FFMA_rc(u64) { - ThrowNotImplemented(Opcode::FFMA_rc); -} - -void TranslatorVisitor::FFMA_cr(u64) { - ThrowNotImplemented(Opcode::FFMA_cr); -} - -void TranslatorVisitor::FFMA_imm(u64) { - ThrowNotImplemented(Opcode::FFMA_imm); -} - -void TranslatorVisitor::FFMA32I(u64) { - ThrowNotImplemented(Opcode::FFMA32I); -} - void TranslatorVisitor::FLO_reg(u64) { ThrowNotImplemented(Opcode::FLO_reg); } @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { ThrowNotImplemented(Opcode::FMNMX_imm); } -void TranslatorVisitor::FMUL_reg(u64) { - ThrowNotImplemented(Opcode::FMUL_reg); -} - -void TranslatorVisitor::FMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::FMUL_cbuf); -} - -void TranslatorVisitor::FMUL_imm(u64) { - ThrowNotImplemented(Opcode::FMUL_imm); -} - -void TranslatorVisitor::FMUL32I(u64) { - ThrowNotImplemented(Opcode::FMUL32I); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD_reg(u64) { - ThrowNotImplemented(Opcode::IADD_reg); -} - -void TranslatorVisitor::IADD_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD_cbuf); -} - -void TranslatorVisitor::IADD_imm(u64) { - ThrowNotImplemented(Opcode::IADD_imm); -} - void TranslatorVisitor::IADD3_reg(u64) { ThrowNotImplemented(Opcode::IADD3_reg); } @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::IADD32I(u64) { - ThrowNotImplemented(Opcode::IADD32I); -} - void TranslatorVisitor::ICMP_reg(u64) { ThrowNotImplemented(Opcode::ICMP_reg); } @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISCADD_reg(u64) { - ThrowNotImplemented(Opcode::ISCADD_reg); -} - -void TranslatorVisitor::ISCADD_cbuf(u64) { - ThrowNotImplemented(Opcode::ISCADD_cbuf); -} - -void TranslatorVisitor::ISCADD_imm(u64) { - ThrowNotImplemented(Opcode::ISCADD_imm); -} - -void TranslatorVisitor::ISCADD32I(u64) { - ThrowNotImplemented(Opcode::ISCADD32I); -} - void TranslatorVisitor::ISET_reg(u64) { ThrowNotImplemented(Opcode::ISET_reg); } @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { ThrowNotImplemented(Opcode::ISET_imm); } -void TranslatorVisitor::ISETP_reg(u64) { - ThrowNotImplemented(Opcode::ISETP_reg); -} - -void TranslatorVisitor::ISETP_cbuf(u64) { - ThrowNotImplemented(Opcode::ISETP_cbuf); -} - -void TranslatorVisitor::ISETP_imm(u64) { - ThrowNotImplemented(Opcode::ISETP_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { ThrowNotImplemented(Opcode::LDC); } -void TranslatorVisitor::LDG(u64) { - ThrowNotImplemented(Opcode::LDG); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -void TranslatorVisitor::S2R(u64) { - ThrowNotImplemented(Opcode::S2R); -} - void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHL_reg(u64) { - ThrowNotImplemented(Opcode::SHL_reg); -} - -void TranslatorVisitor::SHL_cbuf(u64) { - ThrowNotImplemented(Opcode::SHL_cbuf); -} - -void TranslatorVisitor::SHL_imm(u64) { - ThrowNotImplemented(Opcode::SHL_imm); -} - void TranslatorVisitor::SHR_reg(u64) { ThrowNotImplemented(Opcode::SHR_reg); } @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { ThrowNotImplemented(Opcode::VSHR); } -void TranslatorVisitor::XMAD_reg(u64) { - ThrowNotImplemented(Opcode::XMAD_reg); -} - -void TranslatorVisitor::XMAD_rc(u64) { - ThrowNotImplemented(Opcode::XMAD_rc); -} - -void TranslatorVisitor::XMAD_cr(u64) { - ThrowNotImplemented(Opcode::XMAD_cr); -} - -void TranslatorVisitor::XMAD_imm(u64) { - ThrowNotImplemented(Opcode::XMAD_imm); -} - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp deleted file mode 100644 index 7fa35ba3a2..0000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { - throw NotImplementedException("Non-full move mask"); - } -} -} // Anonymous namespace - -void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); -} - -void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); -} - -void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm(insn)); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp b/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp deleted file mode 100644 index 21b8526cd9..0000000000 --- a/src/shader_recompiler/ir_opt/get_set_elimination_pass.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Optimization { -namespace { -using Iterator = IR::Block::iterator; - -enum class TrackingType { - Reg, -}; - -struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - Iterator last_set_instruction; - bool set_instruction_present = false; -}; - -void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, - TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; -} - -RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { - RegisterInfo info{}; - info.register_value = IR::Value{&*get_inst}; - info.tracking_type = tracking_type; - return info; -} - -void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - if (info.register_value.IsEmpty()) { - info = Nothing(get_inst, tracking_type); - return; - } - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - info = Nothing(get_inst, tracking_type); -} -} // Anonymous namespace - -void GetSetElimination(IR::Block& block) { - std::array<RegisterInfo, 255> reg_info; - - for (Iterator inst = block.begin(); inst != block.end(); ++inst) { - switch (inst->Opcode()) { - case IR::Opcode::GetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast<size_t>(reg)}; - DoGet(reg_info.at(index), inst, TrackingType::Reg); - break; - } - case IR::Opcode::SetRegister: { - const IR::Reg reg{inst->Arg(0).Reg()}; - if (reg == IR::Reg::RZ) { - break; - } - const size_t index{static_cast<size_t>(reg)}; - DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); - break; - } - default: - break; - } - } -} - -} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 83f094d735..7ed4005ed1 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) { } void DeadCodeEliminationPass(IR::Block& block); -void GetSetElimination(IR::Block& block); void IdentityRemovalPass(IR::Block& block); void SsaRewritePass(IR::Function& function); void VerificationPass(const IR::Block& block); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index e3c9ad6e8f..4022c6fe2a 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -51,7 +51,8 @@ void RunDatabase() { int main() { // RunDatabase(); - FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; auto cfg{std::make_unique<Flow::CFG>(env, 0)}; // fmt::print(stdout, "{}\n", cfg->Dot()); -- cgit v1.2.3-70-g09d2 From 16cb00c521cae6e93ec49d10e15b575b7bc4857e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Fri, 5 Feb 2021 23:11:23 -0300 Subject: shader: Add pools and rename files --- src/shader_recompiler/CMakeLists.txt | 14 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 21 ++ src/shader_recompiler/frontend/ir/basic_block.cpp | 5 +- src/shader_recompiler/frontend/ir/basic_block.h | 11 +- src/shader_recompiler/frontend/ir/function.h | 12 +- .../frontend/ir/microinstruction.h | 2 +- src/shader_recompiler/frontend/ir/opcode.cpp | 67 ------ src/shader_recompiler/frontend/ir/opcode.h | 44 ---- src/shader_recompiler/frontend/ir/opcode.inc | 237 --------------------- src/shader_recompiler/frontend/ir/opcodes.cpp | 67 ++++++ src/shader_recompiler/frontend/ir/opcodes.h | 44 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 237 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/program.cpp | 38 ++++ src/shader_recompiler/frontend/ir/program.h | 21 ++ src/shader_recompiler/frontend/ir/value.cpp | 2 +- .../frontend/maxwell/control_flow.h | 2 +- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- src/shader_recompiler/frontend/maxwell/decode.h | 2 +- src/shader_recompiler/frontend/maxwell/opcode.cpp | 26 --- src/shader_recompiler/frontend/maxwell/opcode.h | 30 --- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 26 +++ src/shader_recompiler/frontend/maxwell/opcodes.h | 30 +++ src/shader_recompiler/frontend/maxwell/program.cpp | 49 ++--- src/shader_recompiler/frontend/maxwell/program.h | 22 +- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_multi_function.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 2 +- .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/move_register.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 5 +- .../frontend/maxwell/translate/translate.h | 7 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 28 +-- src/shader_recompiler/main.cpp | 11 +- src/shader_recompiler/object_pool.h | 89 ++++++++ 35 files changed, 655 insertions(+), 508 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.h delete mode 100644 src/shader_recompiler/frontend/ir/opcode.cpp delete mode 100644 src/shader_recompiler/frontend/ir/opcode.h delete mode 100644 src/shader_recompiler/frontend/ir/opcode.inc create mode 100644 src/shader_recompiler/frontend/ir/opcodes.cpp create mode 100644 src/shader_recompiler/frontend/ir/opcodes.h create mode 100644 src/shader_recompiler/frontend/ir/opcodes.inc create mode 100644 src/shader_recompiler/frontend/ir/program.cpp create mode 100644 src/shader_recompiler/frontend/ir/program.h delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.h create mode 100644 src/shader_recompiler/object_pool.h (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 72d5f41d21..248e90d4b6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_executable(shader_recompiler + backend/spirv/emit_spirv.h environment.h exception.h file_environment.cpp @@ -17,10 +18,12 @@ add_executable(shader_recompiler frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h - frontend/ir/opcode.cpp - frontend/ir/opcode.h - frontend/ir/opcode.inc + frontend/ir/opcodes.cpp + frontend/ir/opcodes.h + frontend/ir/opcodes.inc frontend/ir/pred.h + frontend/ir/program.cpp + frontend/ir/program.h frontend/ir/reg.h frontend/ir/type.cpp frontend/ir/type.h @@ -33,8 +36,8 @@ add_executable(shader_recompiler frontend/maxwell/instruction.h frontend/maxwell/location.h frontend/maxwell/maxwell.inc - frontend/maxwell/opcode.cpp - frontend/maxwell/opcode.h + frontend/maxwell/opcodes.cpp + frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h frontend/maxwell/termination_code.cpp @@ -67,6 +70,7 @@ add_executable(shader_recompiler ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp main.cpp + object_pool.h ) target_link_libraries(shader_recompiler PRIVATE fmt::fmt) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 0000000000..99cc8e08ad --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { + +class EmitSPIRV { +public: +private: + // Microinstruction emitters +#define OPCODE(name, result_type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 249251dd0b..1a5d821357 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,7 +14,8 @@ namespace Shader::IR { -Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end) + : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} Block::~Block() = default; @@ -24,7 +25,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args, u64 flags) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; + Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec4a41cb1a..ec3ad62634 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -10,9 +10,9 @@ #include <vector> #include <boost/intrusive/list.hpp> -#include <boost/pool/pool_alloc.hpp> #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -25,7 +25,7 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(u32 begin, u32 end); + explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end); ~Block(); Block(const Block&) = delete; @@ -119,6 +119,8 @@ public: } private: + /// Memory pool for instruction list + ObjectPool<Inst>* inst_pool; /// Starting location of this block u32 location_begin; /// End location of this block @@ -127,11 +129,6 @@ private: /// List of instructions in this block InstructionList instructions; - /// Memory pool for instruction list - boost::fast_pool_allocator<Inst, boost::default_user_allocator_malloc_free, - boost::details::pool::null_mutex> - instruction_alloc_pool; - /// Block immediate predecessors std::vector<IR::Block*> imm_predecessors; }; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index 2d4dc5b981..bba7d1d395 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -4,22 +4,14 @@ #pragma once -#include <memory> -#include <vector> +#include <boost/container/small_vector.hpp> #include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { struct Function { - struct InplaceDelete { - void operator()(IR::Block* block) const noexcept { - std::destroy_at(block); - } - }; - using UniqueBlock = std::unique_ptr<IR::Block, InplaceDelete>; - - std::vector<UniqueBlock> blocks; + boost::container::small_vector<Block*, 16> blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 22101c9e2d..80baffb2e8 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -13,7 +13,7 @@ #include <boost/intrusive/list.hpp> #include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/opcode.cpp b/src/shader_recompiler/frontend/ir/opcode.cpp deleted file mode 100644 index 65d0740296..0000000000 --- a/src/shader_recompiler/frontend/ir/opcode.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include <string_view> - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/opcode.h" - -namespace Shader::IR { -namespace { -struct OpcodeMeta { - std::string_view name; - Type type; - std::array<Type, 4> arg_types; -}; - -using enum Type; - -constexpr std::array META_TABLE{ -#define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ - .name{#name_token}, \ - .type{type_token}, \ - .arg_types{__VA_ARGS__}, \ - }, -#include "opcode.inc" -#undef OPCODE -}; - -void ValidateOpcode(Opcode op) { - const size_t raw{static_cast<size_t>(op)}; - if (raw >= META_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", raw); - } -} -} // Anonymous namespace - -Type TypeOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast<size_t>(op)].type; -} - -size_t NumArgsOf(Opcode op) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; - const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; - return static_cast<size_t>(distance); -} - -Type ArgTypeOf(Opcode op, size_t arg_index) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; - if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { - throw InvalidArgument("Out of bounds argument"); - } - return arg_types[arg_index]; -} - -std::string_view NameOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast<size_t>(op)].name; -} - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.h b/src/shader_recompiler/frontend/ir/opcode.h deleted file mode 100644 index 1f4440379d..0000000000 --- a/src/shader_recompiler/frontend/ir/opcode.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <string_view> - -#include <fmt/format.h> - -#include "shader_recompiler/frontend/ir/type.h" - -namespace Shader::IR { - -enum class Opcode { -#define OPCODE(name, ...) name, -#include "opcode.inc" -#undef OPCODE -}; - -/// Get return type of an opcode -[[nodiscard]] Type TypeOf(Opcode op); - -/// Get the number of arguments an opcode accepts -[[nodiscard]] size_t NumArgsOf(Opcode op); - -/// Get the required type of an argument of an opcode -[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); - -/// Get the name of an opcode -[[nodiscard]] std::string_view NameOf(Opcode op); - -} // namespace Shader::IR - -template <> -struct fmt::formatter<Shader::IR::Opcode> { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - template <typename FormatContext> - auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { - return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); - } -}; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc deleted file mode 100644 index 6eb105d929..0000000000 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Void, Void, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Phi, Opaque, /*todo*/ ) - -// Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) -OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) - -// Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, U32, Attribute, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupIdX, U32, ) -OPCODE(WorkgroupIdY, U32, ) -OPCODE(WorkgroupIdZ, U32, ) -OPCODE(LocalInvocationIdX, U32, ) -OPCODE(LocalInvocationIdY, U32, ) -OPCODE(LocalInvocationIdZ, U32, ) - -// Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) - -// Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) - -// Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) - -// Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) - -// Select operations -OPCODE(Select8, U8, U1, U8, U8, ) -OPCODE(Select16, U16, U1, U16, U16, ) -OPCODE(Select32, U32, U1, U32, U32, ) -OPCODE(Select64, U64, U1, U64, U64, ) - -// Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) - -// Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) - -// Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPSinNotReduced, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPExp2NotReduced, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPCosNotReduced, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) - -// Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) - -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) - -// Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) - -// Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) - -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp new file mode 100644 index 0000000000..1f188411a9 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <string_view> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/opcodes.h" + +namespace Shader::IR { +namespace { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array<Type, 4> arg_types; +}; + +using enum Type; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type{type_token}, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; + +void ValidateOpcode(Opcode op) { + const size_t raw{static_cast<size_t>(op)}; + if (raw >= META_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", raw); + } +} +} // Anonymous namespace + +Type TypeOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast<size_t>(op)].type; +} + +size_t NumArgsOf(Opcode op) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; + const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; + return static_cast<size_t>(distance); +} + +Type ArgTypeOf(Opcode op, size_t arg_index) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; + if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { + throw InvalidArgument("Out of bounds argument"); + } + return arg_types[arg_index]; +} + +std::string_view NameOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast<size_t>(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h new file mode 100644 index 0000000000..999fb2e775 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string_view> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcodes.inc" +#undef OPCODE +}; + +/// Get return type of an opcode +[[nodiscard]] Type TypeOf(Opcode op); + +/// Get the number of arguments an opcode accepts +[[nodiscard]] size_t NumArgsOf(Opcode op); + +/// Get the required type of an argument of an opcode +[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter<Shader::IR::Opcode> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { + return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc new file mode 100644 index 0000000000..6eb105d929 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -0,0 +1,237 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) + +// Control flow +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(Exit, Void, ) +OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) + +// Context getters/setters +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupIdX, U32, ) +OPCODE(WorkgroupIdY, U32, ) +OPCODE(WorkgroupIdZ, U32, ) +OPCODE(LocalInvocationIdX, U32, ) +OPCODE(LocalInvocationIdY, U32, ) +OPCODE(LocalInvocationIdZ, U32, ) + +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + +// Memory operations +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) + +// Storage buffer operations +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) + +// Vector utility +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) + +// Select operations +OPCODE(Select8, U8, U1, U8, U8, ) +OPCODE(Select16, U16, U1, U16, U16, ) +OPCODE(Select32, U32, U1, U32, U32, ) +OPCODE(Select64, U64, U1, U64, U64, ) + +// Bitwise conversions +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackDouble2x32, U64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, U64, ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) + +// Floating-point operations +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPSinNotReduced, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPExp2NotReduced, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPCosNotReduced, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) + +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) + +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 0000000000..0ce99ef2a9 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <string> + +#include <fmt/format.h> + +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::IR { + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map<const IR::Inst*, size_t> inst_to_index; + std::map<const IR::Block*, size_t> block_to_index; + + for (const IR::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const IR::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const auto& block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 0000000000..efaf1aa1eb --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +#include <boost/container/small_vector.hpp> + +#include "shader_recompiler/frontend/ir/function.h" + +namespace Shader::IR { + +struct Program { + boost::container::small_vector<Function, 1> functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 93ff8ccf16..9ea61813ba 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 20ada8afd9..49b369282a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -16,7 +16,7 @@ #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell::Flow { diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index ab1cc6c8d3..bd85afa1e5 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/decode.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { namespace { diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h index 2a3dd28e84..b4f080fd7e 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.h +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp deleted file mode 100644 index 8a7bdb6115..0000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <array> - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" - -namespace Shader::Maxwell { -namespace { -constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, -#include "maxwell.inc" -#undef INST -}; -} // Anonymous namespace - -const char* NameOf(Opcode opcode) { - if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); - } - return NAME_TABLE[static_cast<size_t>(opcode)]; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h deleted file mode 100644 index cd574f29d0..0000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <fmt/format.h> - -namespace Shader::Maxwell { - -enum class Opcode { -#define INST(name, cute, encode) name, -#include "maxwell.inc" -#undef INST -}; - -const char* NameOf(Opcode opcode); - -} // namespace Shader::Maxwell - -template <> -struct fmt::formatter<Shader::Maxwell::Opcode> { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - template <typename FormatContext> - auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { - return format_to(ctx.out(), "{}", NameOf(opcode)); - } -}; diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 0000000000..12ddf2ac95 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); + } + return NAME_TABLE[static_cast<size_t>(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 0000000000..cd574f29d0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <fmt/format.h> + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter<Shader::Maxwell::Opcode> { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template <typename FormatContext> + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b3f2de852f..8cdd20804e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -5,6 +5,7 @@ #include <algorithm> #include <memory> +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -12,17 +13,18 @@ namespace Shader::Maxwell { namespace { -void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span<IR::Block*> block_map, IR::Block* block_memory) { +void TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span<IR::Block*> block_map) { const size_t num_blocks{cfg_function.blocks.size()}; function.blocks.reserve(num_blocks); for (const Flow::BlockId block_id : cfg_function.blocks) { const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); - block_map[flow_block.id] = function.blocks.back().get(); - ++block_memory; + IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; + block_map[flow_block.id] = ir_block; + function.blocks.emplace_back(ir_block); } } @@ -34,21 +36,24 @@ void EmitTerminationInsts(const Flow::Function& cfg_function, } } -void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - IR::Block* block_memory) { +void TranslateFunction(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, const Flow::Function& cfg_function, + IR::Function& function) { std::vector<IR::Block*> block_map; block_map.resize(cfg_function.blocks_data.size()); - TranslateCode(env, cfg_function, function, block_map, block_memory); + TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); EmitTerminationInsts(cfg_function, block_map); } } // Anonymous namespace -Program::Program(Environment& env, const Flow::CFG& cfg) { +IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, const Flow::CFG& cfg) { + IR::Program program; + auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(env, cfg_function, functions.emplace_back(), - block_alloc_pool.allocate(cfg_function.blocks.size())); + TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { @@ -59,27 +64,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::VerificationPass(function); } //*/ -} - -std::string DumpProgram(const Program& program) { - size_t index{0}; - std::map<const IR::Inst*, size_t> inst_to_index; - std::map<const IR::Block*, size_t> block_to_index; - - for (const IR::Function& function : program.functions) { - for (const auto& block : function.blocks) { - block_to_index.emplace(block.get(), index); - ++index; - } - } - std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } - } - return ret; + return program; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 36e678a9ea..3355ab1299 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -9,28 +9,16 @@ #include <vector> #include <boost/container/small_vector.hpp> -#include <boost/pool/pool_alloc.hpp> #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -class Program { - friend std::string DumpProgram(const Program& program); - -public: - explicit Program(Environment& env, const Flow::CFG& cfg); - -private: - boost::pool_allocator<IR::Block, boost::default_user_allocator_new_delete, - boost::details::pool::null_mutex> - block_alloc_pool; - boost::container::small_vector<IR::Function, 1> functions; -}; - -[[nodiscard]] std::string DumpProgram(const Program& program); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + const Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index acd8445ad1..3d0c48457c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index 90cddb18b4..ba005fbf45 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index de65173e8d..ad97786d4e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 9f1570479d..727524284d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1711d3f48a..1f83d10683 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d70399f6bf..1bb160acbc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 66a306745e..dcc3f6c0ed 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,8 +23,9 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(Environment& env, const Flow::Block& flow_block) { - IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; +IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, + const Flow::Block& flow_block) { + IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; TranslatorVisitor visitor{env, block}; const Location pc_end{flow_block.end}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index 788742dea1..c1c21b2782 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,11 +6,14 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); +[[nodiscard]] IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, + const Flow::Block& flow_block); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a62d3f56b1..7713e3ba9f 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -19,7 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" #include "shader_recompiler/ir_opt/passes.h" @@ -150,52 +150,52 @@ private: void SsaRewritePass(IR::Function& function) { Pass pass; - for (const auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { switch (inst.Opcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block.get(), inst.Arg(1)); + pass.WriteVariable(reg, block, inst.Arg(1)); } break; case IR::Opcode::SetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block.get(), inst.Arg(1)); + pass.WriteVariable(pred, block, inst.Arg(1)); } break; case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); break; case IR::Opcode::GetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); } break; case IR::Opcode::GetPred: if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); break; case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); break; case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); break; default: break; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index e6596d8287..19e36590c5 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -56,6 +56,13 @@ int main() { auto cfg{std::make_unique<Flow::CFG>(env, 0)}; // fmt::print(stdout, "{}\n", cfg->Dot()); - Program program{env, *cfg}; - fmt::print(stdout, "{}\n", DumpProgram(program)); + auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()}; + auto block_pool{std::make_unique<ObjectPool<IR::Block>>()}; + + for (int i = 0; i < 8192 * 4; ++i) { + void(inst_pool->Create(IR::Opcode::Void, 0)); + } + + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 0000000000..7c65bbd921 --- /dev/null +++ b/src/shader_recompiler/object_pool.h @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <type_traits> + +namespace Shader { + +template <typename T, size_t chunk_size = 8192> +requires std::is_destructible_v<T> class ObjectPool { +public: + ~ObjectPool() { + std::unique_ptr<Chunk> tree_owner; + Chunk* chunk{&root}; + while (chunk) { + for (size_t obj_id = chunk->free_objects; obj_id < chunk_size; ++obj_id) { + chunk->storage[obj_id].object.~T(); + } + tree_owner = std::move(chunk->next); + chunk = tree_owner.get(); + } + } + + template <typename... Args> + requires std::is_constructible_v<T, Args...> [[nodiscard]] T* Create(Args&&... args) { + return std::construct_at(Memory(), std::forward<Args>(args)...); + } + + void ReleaseContents() { + Chunk* chunk{&root}; + if (chunk) { + const size_t free_objects{chunk->free_objects}; + if (free_objects == chunk_size) { + break; + } + chunk->free_objects = chunk_size; + for (size_t obj_id = free_objects; obj_id < chunk_size; ++obj_id) { + chunk->storage[obj_id].object.~T(); + } + chunk = chunk->next.get(); + } + node = &root; + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Storage { + Storage() noexcept {} + ~Storage() noexcept {} + + NonTrivialDummy dummy{}; + T object; + }; + + struct Chunk { + size_t free_objects = chunk_size; + std::array<Storage, chunk_size> storage; + std::unique_ptr<Chunk> next; + }; + + [[nodiscard]] T* Memory() { + Chunk* const chunk{FreeChunk()}; + return &chunk->storage[--chunk->free_objects].object; + } + + [[nodiscard]] Chunk* FreeChunk() { + if (node->free_objects > 0) { + return node; + } + if (node->next) { + node = node->next.get(); + return node; + } + node->next = std::make_unique<Chunk>(); + node = node->next.get(); + return node; + } + + Chunk* node{&root}; + Chunk root; +}; + +} // namespace Shader -- cgit v1.2.3-70-g09d2 From da8096e6e35af250dcc56a1af76b8a211df63a90 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 6 Feb 2021 02:38:22 -0300 Subject: shader: Properly store phi on Inst --- src/shader_recompiler/frontend/ir/basic_block.cpp | 33 +++---- .../frontend/ir/microinstruction.cpp | 102 +++++++++++++++------ .../frontend/ir/microinstruction.h | 37 +++++--- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 22 +++-- src/shader_recompiler/object_pool.h | 11 +-- 6 files changed, 132 insertions(+), 75 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 1a5d821357..50c6a83cd0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -129,26 +129,21 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - if (op == Opcode::Phi) { - size_t val_index{0}; - for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { - ret += val_index != 0 ? ", " : " "; - ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), - BlockToIndex(block_to_index, phi_block)); - ++val_index; + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + ret += arg_index != 0 ? ", " : " "; + if (op == Opcode::Phi) { + ret += fmt::format("[ {}, {} ]", arg_index, + BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); + } else { + ret += arg_str; } - } else { - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); - } + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index de953838cc..e7ca92039e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <memory> #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" @@ -30,6 +31,22 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } +Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { + if (op == Opcode::Phi) { + std::construct_at(&phi_args); + } else { + std::construct_at(&args); + } +} + +Inst::~Inst() { + if (op == Opcode::Phi) { + std::destroy_at(&phi_args); + } else { + std::destroy_at(&args); + } +} + bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: @@ -71,7 +88,10 @@ bool Inst::IsPseudoInstruction() const noexcept { } } -bool Inst::AreAllArgsImmediates() const noexcept { +bool Inst::AreAllArgsImmediates() const { + if (op == Opcode::Phi) { + throw LogicError("Testing for all arguments are immediates on phi instruction"); + } return std::all_of(args.begin(), args.begin() + NumArgs(), [](const IR::Value& value) { return value.IsImmediate(); }); } @@ -101,7 +121,7 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } size_t Inst::NumArgs() const { - return NumArgsOf(op); + return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); } IR::Type Inst::Type() const { @@ -109,13 +129,23 @@ IR::Type Inst::Type() const { } Value Inst::Arg(size_t index) const { - if (index >= NumArgsOf(op)) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + if (op == Opcode::Phi) { + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction", index); + } + return phi_args[index].second; + } else { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + return args[index]; } - return args[index]; } void Inst::SetArg(size_t index, Value value) { + if (op == Opcode::Phi) { + throw LogicError("Setting argument on a phi instruction"); + } if (index >= NumArgsOf(op)) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } @@ -128,15 +158,21 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } -std::span<const std::pair<Block*, Value>> Inst::PhiOperands() const noexcept { - return phi_operands; +Block* Inst::PhiBlock(size_t index) const { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + } + return phi_args[index].first; } void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } - phi_operands.emplace_back(predecessor, value); + phi_args.emplace_back(predecessor, value); } void Inst::Invalidate() { @@ -145,18 +181,22 @@ void Inst::Invalidate() { } void Inst::ClearArgs() { - for (auto& value : args) { - if (!value.IsImmediate()) { - UndoUse(value); + if (op == Opcode::Phi) { + for (auto& pair : phi_args) { + IR::Value& value{pair.second}; + if (!value.IsImmediate()) { + UndoUse(value); + } } - value = {}; - } - for (auto& [phi_block, phi_op] : phi_operands) { - if (!phi_op.IsImmediate()) { - UndoUse(phi_op); + phi_args.clear(); + } else { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + value = {}; } } - phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { @@ -167,24 +207,29 @@ void Inst::ReplaceUsesWith(Value replacement) { if (!replacement.IsImmediate()) { Use(replacement); } - args[0] = replacement; + if (op == Opcode::Phi) { + phi_args[0].second = replacement; + } else { + args[0] = replacement; + } } void Inst::Use(const Value& value) { - ++value.Inst()->use_count; + Inst* const inst{value.Inst()}; + ++inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(value.Inst()->zero_inst, this); + SetPseudoInstruction(inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(value.Inst()->sign_inst, this); + SetPseudoInstruction(inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(value.Inst()->carry_inst, this); + SetPseudoInstruction(inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(value.Inst()->overflow_inst, this); + SetPseudoInstruction(inst->overflow_inst, this); break; default: break; @@ -192,20 +237,21 @@ void Inst::Use(const Value& value) { } void Inst::UndoUse(const Value& value) { - --value.Inst()->use_count; + Inst* const inst{value.Inst()}; + --inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp); + RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp); + RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp); + RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); + RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 80baffb2e8..ddf0f90a9e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -6,8 +6,8 @@ #include <array> #include <cstring> -#include <span> #include <type_traits> +#include <utility> #include <vector> #include <boost/intrusive/list.hpp> @@ -25,7 +25,14 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} + explicit Inst(Opcode op_, u64 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -50,26 +57,26 @@ public: [[nodiscard]] bool IsPseudoInstruction() const noexcept; /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const noexcept; + [[nodiscard]] bool AreAllArgsImmediates() const; /// Determines if there is a pseudo-operation associated with this instruction. [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const; - /// Get the type this instruction returns. [[nodiscard]] IR::Type Type() const; + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const; + /// Get the value of a given argument index. [[nodiscard]] Value Arg(size_t index) const; /// Set the value of a given argument index. void SetArg(size_t index, Value value); - /// Get an immutable span to the phi operands. - [[nodiscard]] std::span<const std::pair<Block*, Value>> PhiOperands() const noexcept; + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; /// Add phi operand to a phi instruction. void AddPhiOperand(Block* predecessor, const Value& value); @@ -87,18 +94,26 @@ public: } private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + void Use(const Value& value); void UndoUse(const Value& value); IR::Opcode op{}; int use_count{}; - std::array<Value, MAX_ARG_COUNT> args{}; + u64 flags{}; + union { + NonTriviallyDummy dummy{}; + std::array<Value, MAX_ARG_COUNT> args; + std::vector<std::pair<Block*, Value>> phi_args; + }; Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; - std::vector<std::pair<Block*, Value>> phi_operands; - u64 flags{}; }; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6eb105d929..82b04f37c6 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -3,9 +3,9 @@ // Refer to the license.txt file included. // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Phi, Opaque, ) OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 7713e3ba9f..15a9db90a9 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -104,32 +104,34 @@ private: val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi - val = IR::Value{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; + val = IR::Value{&phi_inst}; WriteVariable(variable, block, val); - val = AddPhiOperands(variable, val, block); + val = AddPhiOperands(variable, phi_inst, block); } WriteVariable(variable, block, val); return val; } - IR::Value AddPhiOperands(auto variable, const IR::Value& phi, IR::Block* block) { + IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const pred : block->ImmediatePredecessors()) { - phi.Inst()->AddPhiOperand(pred, ReadVariable(variable, pred)); + phi.AddPhiOperand(pred, ReadVariable(variable, pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } - IR::Value TryRemoveTrivialPhi(const IR::Value& phi, IR::Block* block, IR::Opcode undef_opcode) { + IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { IR::Value same; - for (const auto& pair : phi.Inst()->PhiOperands()) { - const IR::Value& op{pair.second}; - if (op == same || op == phi) { + const size_t num_args{phi.NumArgs()}; + for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { + const IR::Value& op{phi.Arg(arg_index)}; + if (op == same || op == IR::Value{&phi}) { // Unique value or self-reference continue; } if (!same.IsEmpty()) { // The phi merges at least two values: not trivial - return phi; + return IR::Value{&phi}; } same = op; } @@ -139,7 +141,7 @@ private: same = IR::Value{&*block->PrependNewInst(first_not_phi, undef_opcode)}; } // Reroute all uses of phi to same and remove phi - phi.Inst()->ReplaceUsesWith(same); + phi.ReplaceUsesWith(same); // TODO: Try to recursively remove all phi users, which might have become trivial return same; } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 7c65bbd921..a573add327 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -6,6 +6,7 @@ #include <memory> #include <type_traits> +#include <utility> namespace Shader { @@ -31,14 +32,12 @@ public: void ReleaseContents() { Chunk* chunk{&root}; - if (chunk) { - const size_t free_objects{chunk->free_objects}; - if (free_objects == chunk_size) { + while (chunk) { + if (chunk->free_objects == chunk_size) { break; } - chunk->free_objects = chunk_size; - for (size_t obj_id = free_objects; obj_id < chunk_size; ++obj_id) { - chunk->storage[obj_id].object.~T(); + for (; chunk->free_objects < chunk_size; ++chunk->free_objects) { + chunk->storage[chunk->free_objects].object.~T(); } chunk = chunk->next.get(); } -- cgit v1.2.3-70-g09d2 From 6dafb08f52ac78119669a698c4b9a39bffd48f8f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sat, 6 Feb 2021 04:47:53 -0300 Subject: shader: Better constant folding --- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 59 +++++++++++++++++----- 2 files changed, 48 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 50c6a83cd0..da33ff6f10 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -87,7 +87,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind } switch (arg.Type()) { case Type::U1: - return fmt::format("#{}", arg.U1() ? '1' : '0'); + return fmt::format("#{}", arg.U1() ? "true" : "false"); case Type::U8: return fmt::format("#{}", arg.U8()); case Type::U16: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 7fb3192d8e..f1170c61ee 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <tuple> #include <type_traits> #include "common/bit_cast.h" @@ -13,12 +14,17 @@ namespace Shader::Optimization { namespace { -[[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { - if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { - throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); - } - return (base >> shift) & ((1U << count) - 1); -} +// Metaprogramming stuff to get arguments information out of a lambda +template <typename Func> +struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {}; + +template <typename ReturnType, typename LambdaType, typename... Args> +struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> { + template <size_t I> + using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; + + static constexpr size_t NUM_ARGS{sizeof...(Args)}; +}; template <typename T> [[nodiscard]] T Arg(const IR::Value& value) { @@ -104,6 +110,14 @@ void FoldAdd(IR::Inst& inst) { } } +template <typename T> +void FoldSelect(IR::Inst& inst) { + const IR::Value cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + } +} + void FoldLogicalAnd(IR::Inst& inst) { if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { return; @@ -131,6 +145,21 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { } } +template <typename Func, size_t... I> +IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { + using Traits = LambdaTraits<decltype(func)>; + return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)}; +} + +template <typename Func> +void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -145,14 +174,20 @@ void ConstantPropagation(IR::Inst& inst) { return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd<u64>(inst); - case IR::Opcode::BitFieldUExtract: - if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { - inst.ReplaceUsesWith(IR::Value{ - BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); - } - break; + case IR::Opcode::Select32: + return FoldSelect<u32>(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::ULessThan: + return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + case IR::Opcode::BitFieldUExtract: + return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { + throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, + base, shift, count); + } + return (base >> shift) & ((1U << count) - 1); + }); default: break; } -- cgit v1.2.3-70-g09d2 From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: shader: Initial implementation of an AST --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 45 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 18 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 8 + .../backend/spirv/emit_spirv_control_flow.cpp | 25 - .../backend/spirv/emit_spirv_undefined.cpp | 12 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 64 +- src/shader_recompiler/frontend/ir/basic_block.h | 40 +- src/shader_recompiler/frontend/ir/condition.cpp | 14 +- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/function.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 43 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 23 +- .../frontend/ir/microinstruction.cpp | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 16 +- .../frontend/ir/structured_control_flow.cpp | 742 +++++++++++++++++++++ .../frontend/ir/structured_control_flow.h | 22 + .../frontend/maxwell/control_flow.cpp | 426 +++++------- .../frontend/maxwell/control_flow.h | 77 +-- src/shader_recompiler/frontend/maxwell/location.h | 12 +- src/shader_recompiler/frontend/maxwell/program.cpp | 69 +- src/shader_recompiler/frontend/maxwell/program.h | 2 +- .../frontend/maxwell/termination_code.cpp | 86 --- .../frontend/maxwell/termination_code.h | 17 - .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 17 +- .../frontend/maxwell/translate/translate.h | 7 +- .../ir_opt/constant_propagation_pass.cpp | 50 ++ src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 24 +- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 + src/shader_recompiler/main.cpp | 29 +- src/shader_recompiler/shader_info.h | 28 + 33 files changed, 1346 insertions(+), 590 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/shader_info.h (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/externals/sirit b/externals/sirit index 1f7b70730d..c374bfd9fd 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35 +Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 12fbcb37c3..27fc79e21b 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -36,6 +36,8 @@ add_executable(shader_recompiler frontend/ir/program.cpp frontend/ir/program.h frontend/ir/reg.h + frontend/ir/structured_control_flow.cpp + frontend/ir/structured_control_flow.h frontend/ir/type.cpp frontend/ir/type.h frontend/ir/value.cpp @@ -51,8 +53,6 @@ add_executable(shader_recompiler frontend/maxwell/opcodes.h frontend/maxwell/program.cpp frontend/maxwell/program.h - frontend/maxwell/termination_code.cpp - frontend/maxwell/termination_code.h frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7c4269fad4..5022b51597 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -105,8 +105,26 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->Opcode()); } -void EmitSPIRV::EmitPhi(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +static Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.u1; + default: + throw NotImplementedException("Phi node type {}", type); + } +} + +Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { + const size_t num_args{inst->NumArgs()}; + boost::container::small_vector<Id, 64> operands; + operands.reserve(num_args * 2); + for (size_t index = 0; index < num_args; ++index) { + IR::Block* const phi_block{inst->PhiBlock(index)}; + operands.push_back(ctx.Def(inst->Arg(index))); + operands.push_back(ctx.BlockLabel(phi_block)); + } + const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; + return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } void EmitSPIRV::EmitVoid(EmitContext&) {} @@ -115,6 +133,29 @@ void EmitSPIRV::EmitIdentity(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +// FIXME: Move to its own file +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), + ctx.BlockLabel(inst->Arg(2).Label())); +} + +void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()), + spv::LoopControlMask::MaskNone); +} + +void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) { + ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone); +} + +void EmitSPIRV::EmitReturn(EmitContext& ctx) { + ctx.OpReturn(); +} + void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 3f4b68a7d1..9aa83b5de4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -124,18 +124,20 @@ private: void EmitInst(EmitContext& ctx, IR::Inst* inst); // Microinstruction emitters - void EmitPhi(EmitContext& ctx); + Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); void EmitBranch(EmitContext& ctx, IR::Inst* inst); void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); - void EmitExit(EmitContext& ctx); + void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst); + void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst); void EmitReturn(EmitContext& ctx); - void EmitUnreachable(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); + void EmitSetGotoVariable(EmitContext& ctx); + void EmitGetGotoVariable(EmitContext& ctx); Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx); void EmitSetAttribute(EmitContext& ctx); @@ -151,11 +153,11 @@ private: void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); - void EmitUndef1(EmitContext& ctx); - void EmitUndef8(EmitContext& ctx); - void EmitUndef16(EmitContext& ctx); - void EmitUndef32(EmitContext& ctx); - void EmitUndef64(EmitContext& ctx); + Id EmitUndefU1(EmitContext& ctx); + void EmitUndefU8(EmitContext& ctx); + void EmitUndefU16(EmitContext& ctx); + void EmitUndefU32(EmitContext& ctx); + void EmitUndefU64(EmitContext& ctx); void EmitLoadGlobalU8(EmitContext& ctx); void EmitLoadGlobalS8(EmitContext& ctx); void EmitLoadGlobalU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index b121305ea9..1eab739edb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -22,6 +22,14 @@ void EmitSPIRV::EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 770fe113c8..66ce6c8c54 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,28 +3,3 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" - -namespace Shader::Backend::SPIRV { - -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); -} - -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), - ctx.BlockLabel(inst->Arg(2).Label())); -} - -void EmitSPIRV::EmitExit(EmitContext& ctx) { - ctx.OpReturn(); -} - -void EmitSPIRV::EmitReturn(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitSPIRV::EmitUnreachable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 3850b072ce..859b60a95d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitUndef1(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { + return ctx.OpUndef(ctx.u1); } -void EmitSPIRV::EmitUndef8(EmitContext&) { +void EmitSPIRV::EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef16(EmitContext&) { +void EmitSPIRV::EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef32(EmitContext&) { +void EmitSPIRV::EmitUndefU32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUndef64(EmitContext&) { +void EmitSPIRV::EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index da33ff6f10..b5616f3941 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -17,6 +17,8 @@ namespace Shader::IR { Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end) : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool<Inst>& inst_pool_) : Block{inst_pool_, 0, 0} {} + Block::~Block() = default; void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { @@ -38,8 +40,25 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { - imm_predecessors.push_back(immediate_predecessor); +void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { + branch_cond = cond; + branch_true = branch_true_; + branch_false = branch_false_; +} + +void Block::SetBranch(Block* branch) { + branch_cond = Condition{true}; + branch_true = branch; +} + +void Block::SetReturn() { + branch_cond = Condition{true}; + branch_true = nullptr; + branch_false = nullptr; +} + +bool Block::IsVirtual() const noexcept { + return location_begin == location_end; } u32 Block::LocationBegin() const noexcept { @@ -58,6 +77,12 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +void Block::AddImmediatePredecessor(Block* block) { + if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { + imm_predecessors.push_back(block); + } +} + std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept { return imm_predecessors; } @@ -70,8 +95,17 @@ static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_i return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); } +static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, - const std::map<const Inst*, size_t>& inst_to_index, + std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return "<null>"; @@ -80,10 +114,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { - if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { - return fmt::format("%{}", it->second); - } - return fmt::format("%<unknown inst {:016x}>", reinterpret_cast<u64>(arg.Inst())); + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { case Type::U1: @@ -125,14 +156,14 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& const Opcode op{inst.Opcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); if (TypeOf(op) != Type::Void) { - ret += fmt::format("%{:<5} = {}", inst_index, op); + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; + const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_index, @@ -140,10 +171,12 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += arg_str; } - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { @@ -151,9 +184,6 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& } else { ret += '\n'; } - - inst_to_index.emplace(&inst, inst_index); - ++inst_index; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec3ad62634..3205705e79 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,7 +11,9 @@ #include <boost/intrusive/list.hpp> +#include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -26,6 +28,7 @@ public: using const_reverse_iterator = InstructionList::const_reverse_iterator; explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end); + explicit Block(ObjectPool<Inst>& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -41,9 +44,15 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args = {}, u64 flags = 0); - /// Adds a new immediate predecessor to the basic block. - void AddImmediatePredecessor(IR::Block* immediate_predecessor); + /// Set the branches to jump to when all instructions have executed. + void SetBranches(Condition cond, Block* branch_true, Block* branch_false); + /// Set the branch to unconditionally jump to when all instructions have executed. + void SetBranch(Block* branch); + /// Mark the block as a return block. + void SetReturn(); + /// Returns true when the block does not implement any guest instructions directly. + [[nodiscard]] bool IsVirtual() const noexcept; /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; /// Gets the end location for this basic block. @@ -54,8 +63,23 @@ public: /// Gets an immutable reference to the instruction list for this basic block. [[nodiscard]] const InstructionList& Instructions() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span<IR::Block* const> ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept; + + [[nodiscard]] Condition BranchCondition() const noexcept { + return branch_cond; + } + [[nodiscard]] bool IsTerminationBlock() const noexcept { + return !branch_true && !branch_false; + } + [[nodiscard]] Block* TrueBranch() const noexcept { + return branch_true; + } + [[nodiscard]] Block* FalseBranch() const noexcept { + return branch_false; + } [[nodiscard]] bool empty() const { return instructions.empty(); @@ -129,10 +153,18 @@ private: /// List of instructions in this block InstructionList instructions; + /// Condition to choose the branch to take + Condition branch_cond{true}; + /// Block to jump into when the branch condition evaluates as true + Block* branch_true{nullptr}; + /// Block to jump into when the branch condition evaluates as false + Block* branch_false{nullptr}; /// Block immediate predecessors - std::vector<IR::Block*> imm_predecessors; + std::vector<Block*> imm_predecessors; }; +using BlockList = std::vector<Block*>; + [[nodiscard]] std::string DumpBlock(const Block& block); [[nodiscard]] std::string DumpBlock(const Block& block, diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index edff35dc77..ec1659e2bc 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -16,15 +16,13 @@ std::string NameOf(Condition condition) { ret = fmt::to_string(condition.FlowTest()); } const auto [pred, negated]{condition.Pred()}; - if (pred != Pred::PT || negated) { - if (!ret.empty()) { - ret += '&'; - } - if (negated) { - ret += '!'; - } - ret += fmt::to_string(pred); + if (!ret.empty()) { + ret += '&'; } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); return ret; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 52737025c9..16b4ae888e 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -26,7 +26,7 @@ public: explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept : Condition(FlowTest::T, pred_, pred_negated_) {} - Condition(bool value) : Condition(Pred::PT, !value) {} + explicit Condition(bool value) : Condition(Pred::PT, !value) {} auto operator<=>(const Condition&) const noexcept = default; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index bba7d1d395..fd7d564191 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -11,7 +11,7 @@ namespace Shader::IR { struct Function { - boost::container::small_vector<Block*, 16> blocks; + BlockList blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ada0be8343..30932043f9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -44,24 +44,27 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(IR::Block* label) { +void IREmitter::Branch(Block* label) { + label->AddImmediatePredecessor(block); Inst(Opcode::Branch, label); } -void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { - Inst(Opcode::BranchConditional, cond, true_label, false_label); +void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + true_label->AddImmediatePredecessor(block); + false_label->AddImmediatePredecessor(block); + Inst(Opcode::BranchConditional, condition, true_label, false_label); } -void IREmitter::Exit() { - Inst(Opcode::Exit); +void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { + Inst(Opcode::LoopMerge, merge_block, continue_target); } -void IREmitter::Return() { - Inst(Opcode::Return); +void IREmitter::SelectionMerge(Block* merge_block) { + Inst(Opcode::SelectionMerge, merge_block); } -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); +void IREmitter::Return() { + Inst(Opcode::Return); } U32 IREmitter::GetReg(IR::Reg reg) { @@ -81,6 +84,14 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst<U1>(Opcode::GetGotoVariable, id); +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } @@ -121,6 +132,20 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::Condition(IR::Condition cond) { + if (cond == IR::Condition{true}) { + return Imm1(true); + } else if (cond == IR::Condition{false}) { + return Imm1(false); + } + const FlowTest flow_test{cond.FlowTest()}; + const auto [pred, is_negated]{cond.Pred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } + throw NotImplementedException("Condition {}", cond); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst<F32>(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bfd9916cca..4decb46bc1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -16,11 +16,11 @@ namespace Shader::IR { class IREmitter { public: - explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) - : block{block_}, insertion_point{insertion_point_} {} + : block{&block_}, insertion_point{insertion_point_} {} - Block& block; + Block* block; [[nodiscard]] U1 Imm1(bool value) const; [[nodiscard]] U8 Imm8(u8 value) const; @@ -31,11 +31,11 @@ public: [[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(IR::Block* label); - void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); - void Exit(); + void Branch(Block* label); + void BranchConditional(const U1& condition, Block* true_label, Block* false_label); + void LoopMerge(Block* merge_block, Block* continue_target); + void SelectionMerge(Block* merge_block); void Return(); - void Unreachable(); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -43,6 +43,9 @@ public: [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); void SetPred(IR::Pred pred, const U1& value); + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); @@ -55,6 +58,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); @@ -168,7 +173,7 @@ private: template <typename T = Value, typename... Args> T Inst(Opcode op, Args... args) { - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } @@ -184,7 +189,7 @@ private: T Inst(Opcode op, Flags<FlagType> flags, Args... args) { u64 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e7ca92039e..b4ae371bd1 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -51,9 +51,9 @@ bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: case Opcode::BranchConditional: - case Opcode::Exit: + case Opcode::LoopMerge: + case Opcode::SelectionMerge: case Opcode::Return: - case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5dc65f2dfa..ede5e20c2d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -10,15 +10,17 @@ OPCODE(Identity, Opaque, Opaq // Control flow OPCODE(Branch, Void, Label, ) OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) OPCODE(GetPred, U1, Pred, ) OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) @@ -36,11 +38,11 @@ OPCODE(WorkgroupId, U32x3, OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations OPCODE(LoadGlobalU8, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp new file mode 100644 index 0000000000..2e9ce25256 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -0,0 +1,742 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <ranges> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include <fmt/format.h> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; + +using Tree = boost::intrusive::list<Statement, + // Allow using Statement without a definition + boost::intrusive::base_hook<ListBaseHook>, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size<false>>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + Block* code; + Node label; + Tree children; + Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return "<invalid type>"; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(std::span<Block* const> blocks, ObjectPool<Statement, 64>& stmt_pool) + : pool{stmt_pool} { + std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; + fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector<Node> BuildUnorderedTreeGetGotos(std::span<Block* const> blocks) { + // Assume all blocks have two branches + std::vector<Node> gotos; + gotos.reserve(blocks.size() * 2); + + const std::unordered_map labels_map{BuildLabels(blocks)}; + Tree& root{root_stmt.children}; + auto insert_point{root.begin()}; + for (Block* const block : blocks) { + ++insert_point; // Skip label + ++insert_point; // Skip set variable + root.insert(insert_point, *pool.Create(block, &root_stmt)); + + if (block->IsTerminationBlock()) { + root.insert(insert_point, *pool.Create(Return{})); + continue; + } + const Condition cond{block->BranchCondition()}; + Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; + if (cond == Condition{true} || cond == Condition{false}) { + const bool is_true{cond == Condition{true}}; + const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; + const Node label{labels_map.at(branch)}; + Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_stmt)); + } else { + Statement* const ident_cond{pool.Create(Identity{}, cond)}; + const Node true_label{labels_map.at(block->TrueBranch())}; + const Node false_label{labels_map.at(block->FalseBranch())}; + Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; + Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_true)); + gotos.push_back(root.insert(insert_point, *goto_false)); + } + } + return gotos; + } + + std::unordered_map<const Block*, Node> BuildLabels(std::span<Block* const> blocks) { + // TODO: Consider storing labels intrusively inside the block + std::unordered_map<const Block*, Node> labels_map; + Tree& root{root_stmt.children}; + u32 label_id{0}; + for (const Block* const block : blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + labels_map.emplace(block, root.insert(root.end(), *label)); + Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; + root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + ++label_id; + } + return labels_map; + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + // Update nested if condition + switch (label_nested_stmt->type) { + case StatementType::If: + label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool<Statement, 64>& pool; + Statement root_stmt{FunctionTag{}}; +}; + +Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool<Inst>& inst_pool_, ObjectPool<Block>& block_pool_, + ObjectPool<Statement, 64>& stmt_pool_, Statement& root_stmt, + const std::function<void(IR::Block*)>& func_, BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, Block* continue_block, Block* break_block) { + Tree& tree{parent.children}; + Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IREmitter ir{*current_block}; + ir.Branch(stmt.code); + } + current_block = stmt.code; + func(stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + Block* const first_if_block{block_list.at(first_block_index)}; + IREmitter ir{*current_block}; + const U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + Block* const new_continue_block{block_pool.Create(inst_pool)}; + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + Block* const first_loop_block{block_list.at(first_block_index)}; + IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IREmitter continue_ir{*new_continue_block}; + const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const skip_block{MergeBlock(parent, stmt)}; + + IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IREmitter ir{*current_block}; + ir.Branch(continue_block); + } + } + + Block* MergeBlock(Statement& parent, Statement& stmt) { + if (Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool<Statement, 64>& stmt_pool; + ObjectPool<Inst>& inst_pool; + ObjectPool<Block>& block_pool; + const std::function<void(IR::Block*)>& func; + BlockList& block_list; +}; +} // Anonymous namespace + +BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, + std::span<Block* const> unordered_blocks, + const std::function<void(Block*)>& func) { + ObjectPool<Statement, 64> stmt_pool; + GotoPass goto_pass{unordered_blocks, stmt_pool}; + BlockList block_list; + TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), + func, block_list}; + return block_list; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h new file mode 100644 index 0000000000..a574c24f77 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <span> + +#include <boost/intrusive/list.hpp> + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +[[nodiscard]] BlockList VisitAST(ObjectPool<Inst>& inst_pool, ObjectPool<Block>& block_pool, + std::span<Block* const> unordered_blocks, + const std::function<void(Block*)>& func); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 21ee981371..e766b555bd 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -17,38 +17,49 @@ #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; +} // Anonymous namespace static u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static std::array<Block, 2> Split(Block&& block, Location pc, BlockId new_id) { - if (pc <= block.begin || pc >= block.end) { +static void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - return { - Block{ - .begin{block.begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{block.id}, - .stack{block.stack}, - .cond{true}, - .branch_true{new_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }, - Block{ - .begin{pc}, - .end{block.end}, - .end_class{block.end_class}, - .id{new_id}, - .stack{std::move(block.stack)}, - .cond{block.cond}, - .branch_true{block.branch_true}, - .branch_false{block.branch_false}, - .imm_predecessors{}, - }, + *new_block = Block{ + .begin{pc}, + .end{old_block->end}, + .end_class{old_block->end_class}, + .stack{old_block->stack}, + .cond{old_block->cond}, + .branch_true{old_block->branch_true}, + .branch_false{old_block->branch_false}, + .ir{nullptr}, + }; + *old_block = Block{ + .begin{old_block->begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{std::move(old_block->stack)}, + .cond{IR::Condition{true}}, + .branch_true{new_block}, + .branch_false{nullptr}, + .ir{nullptr}, }; } @@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) { static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { - return fmt::format("\"Virtual {}\"", block.id); + return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } @@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept { Function::Function(Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block_id{0}, + .block{nullptr}, .stack{}, }} {} -void Function::BuildBlocksMap() { - const size_t num_blocks{NumBlocks()}; - blocks_map.resize(num_blocks); - for (size_t block_index = 0; block_index < num_blocks; ++block_index) { - Block& block{blocks_data[block_index]}; - blocks_map[block.id] = █ - } -} - -void Function::BuildImmediatePredecessors() { - for (const Block& block : blocks_data) { - if (block.branch_true != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); - } - if (block.branch_false != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); - } - } -} - -void Function::BuildPostOrder() { - boost::container::small_vector<BlockId, 0x110> block_stack; - post_order_map.resize(NumBlocks()); - - Block& first_block{blocks_data[blocks.front()]}; - first_block.post_order_visited = true; - block_stack.push_back(first_block.id); - - const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { - if (branch_id == UNREACHABLE_BLOCK_ID) { - return false; - } - if (blocks_map[branch_id]->post_order_visited) { - return false; - } - blocks_map[branch_id]->post_order_visited = true; - - // Calling push_back twice is faster than insert on msvc - block_stack.push_back(block_id); - block_stack.push_back(branch_id); - return true; - }; - while (!block_stack.empty()) { - const Block* const block{blocks_map[block_stack.back()]}; - block_stack.pop_back(); - - if (!visit_branch(block->id, block->branch_true) && - !visit_branch(block->id, block->branch_false)) { - post_order_map[block->id] = static_cast<u32>(post_order_blocks.size()); - post_order_blocks.push_back(block->id); - } - } -} - -void Function::BuildImmediateDominators() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; - auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; - auto intersect{[this](Block* finger1, Block* finger2) { - while (finger1 != finger2) { - while (post_order_map[finger1->id] < post_order_map[finger2->id]) { - finger1 = finger1->imm_dominator; - } - while (post_order_map[finger2->id] < post_order_map[finger1->id]) { - finger2 = finger2->imm_dominator; - } - } - return finger1; - }}; - for (Block& block : blocks_data) { - block.imm_dominator = nullptr; - } - Block* const start_block{&blocks_data[blocks.front()]}; - start_block->imm_dominator = start_block; - - bool changed{true}; - while (changed) { - changed = false; - for (Block* const block : post_order_blocks | reverse_order_but_first) { - Block* new_idom{}; - for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { - new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; - } - changed |= block->imm_dominator != new_idom; - block->imm_dominator = new_idom; - } - } -} - -void Function::BuildDominanceFrontier() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; - for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { - for (Block* current : block.imm_predecessors | transform_block_id) { - while (current != block.imm_dominator) { - current->dominance_frontiers.push_back(current->id); - current = current->imm_dominator; - } - } - } -} - -CFG::CFG(Environment& env_, Location start_address) : env{env_} { - VisitFunctions(start_address); - - for (Function& function : functions) { - function.BuildBlocksMap(); - function.BuildImmediatePredecessors(); - function.BuildPostOrder(); - function.BuildImmediateDominators(); - function.BuildDominanceFrontier(); - } -} - -void CFG::VisitFunctions(Location start_address) { +CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) + : env{env_}, block_pool{block_pool_} { functions.emplace_back(start_address); + functions.back().labels.back().block = block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + }); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { return; } // Try to find the next block - Function* function{&functions[function_id]}; + Function* const function{&functions[function_id]}; Location pc{label.address}; - const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, - [function](Location pc, u32 block_index) { - return pc < function->blocks_data[block_index].begin; - })}; - const auto next_index{std::distance(function->blocks.begin(), next)}; - const bool is_last{next == function->blocks.end()}; - Location next_pc; - BlockId next_id{UNREACHABLE_BLOCK_ID}; - if (!is_last) { - next_pc = function->blocks_data[*next].begin; - next_id = function->blocks_data[*next].id; - } + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; // Insert before the next block - Block block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{label.block_id}, - .stack{std::move(label.stack)}, - .cond{true}, - .branch_true{UNREACHABLE_BLOCK_ID}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }; + Block* const block{label.block}; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; - while (is_last || pc < next_pc) { + while (!next || pc < next->begin) { is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; if (is_branch) { break; @@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { if (!is_branch) { // If the block finished without a branch, // it means that the next instruction is already visited, jump to it - block.end = pc; - block.cond = true; - block.branch_true = next_id; - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; } // Function's pointer might be invalid, resolve it again - function = &functions[function_id]; - const u32 new_block_index = static_cast<u32>(function->blocks_data.size()); - function->blocks.insert(function->blocks.begin() + next_index, new_block_index); - function->blocks_data.push_back(std::move(block)); + // Insert the new block + functions[function_id].blocks.insert(*block); } bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { const Location pc{label.address}; Function& function{functions[function_id]}; - const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { - return function.blocks_data[block_index].Contains(pc); - })}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; if (it == function.blocks.end()) { // Address has not been visited return false; } - Block& block{function.blocks_data[*it]}; - if (block.begin == pc) { - throw LogicError("Dangling branch"); - } - const u32 first_index{*it}; - const u32 second_index{static_cast<u32>(function.blocks_data.size())}; - const std::array new_indices{first_index, second_index}; - std::array split_blocks{Split(std::move(block), pc, label.block_id)}; - function.blocks_data[*it] = std::move(split_blocks[0]); - function.blocks_data.push_back(std::move(split_blocks[1])); - function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); + } + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); return true; } -CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { const Instruction inst{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(inst.raw)}; switch (opcode) { @@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); break; case Opcode::RET: - block.end_class = EndClass::Return; + block->end_class = EndClass::Return; break; default: break; } - block.end = pc; + block->end = pc; return AnalysisState::Branch; case Opcode::BRK: case Opcode::CONT: @@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; } - const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; - block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); - block.end = pc; + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; return AnalysisState::Branch; } case Opcode::PBK: @@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati case Opcode::PEXIT: case Opcode::PLONGJMP: case Opcode::SSY: - block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); @@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati return AnalysisState::Branch; } -void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond) { - if (block.begin != pc) { + if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later - block.end = pc; - block.cond = true; - block.branch_true = AddLabel(block, block.stack, pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; return; } - // Impersonate the visited block with a virtual block - // Jump from this virtual to the real conditional instruction and the next instruction - Function& function{functions[function_id]}; - const BlockId conditional_block_id{++function.current_block_id}; - function.blocks.push_back(static_cast<u32>(function.blocks_data.size())); - Block& virtual_block{function.blocks_data.emplace_back(Block{ - .begin{}, // Virtual block - .end{}, + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{ + .begin{block->begin.Virtual()}, + .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .id{block.id}, // Impersonating - .stack{block.stack}, + .stack{block->stack}, .cond{cond}, - .branch_true{conditional_block_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - })}; - // Set the end properties of the conditional instruction and give it a new identity - Block& conditional_block{block}; - conditional_block.end = pc; - conditional_block.end_class = insn_end_class; - conditional_block.id = conditional_block_id; + .branch_true{conditional_block}, + .branch_false{nullptr}, + .ir{nullptr}, + }; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc; + conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction - const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block - virtual_block.branch_false = endif_block_id; + block->branch_false = endif_block; // And branch to it from the conditional instruction if it is a branch if (insn_end_class == EndClass::Branch) { - conditional_block.cond = true; - conditional_block.branch_true = endif_block_id; - conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); } -bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode) { if (inst.branch.is_cbuf) { throw NotImplementedException("Branch with constant buffer offset"); @@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr const bool has_flow_test{HasFlowTest(opcode)}; const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - block.cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); - block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); } else { - block.cond = true; + block->cond = IR::Condition{true}; } return true; } -void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute) { const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { +void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } @@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { } } -CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; const Predicate pred{inst.Pred()}; @@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati return AnalysisState::Continue; } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - if (block.stack.Peek(Token::PEXIT).has_value()) { + if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } - if (const std::optional<Location> exit_pc{block.stack.Peek(Token::PEXIT)}) { - const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; - block.cond = true; - block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; return AnalysisState::Branch; } - block.end = pc; - block.end_class = EndClass::Exit; + block->end = pc; + block->end_class = EndClass::Exit; return AnalysisState::Branch; } -BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { Function& function{functions[function_id]}; - if (block.begin == pc) { - return block.id; + if (block->begin == pc) { + // Jumps to itself + return block; } - const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; - if (target != function.blocks_data.end()) { - return target->id; + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + return &*it; } - const BlockId block_id{++function.current_block_id}; + // TODO: FIX DANGLING BLOCKS + Block* const new_block{block_pool.Create(Block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{stack}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + })}; function.labels.push_back(Label{ .address{pc}, - .block_id{block_id}, + .block{new_block}, .stack{std::move(stack)}, }); - return block_id; + return new_block; } std::string CFG::Dot() const { @@ -581,18 +474,12 @@ std::string CFG::Dot() const { for (const Function& function : functions) { dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); dot += fmt::format("\t\tnode [style=filled];\n"); - for (const u32 block_index : function.blocks) { - const Block& block{function.blocks_data[block_index]}; + for (const Block& block : function.blocks) { const std::string name{NameOf(block)}; - const auto add_branch = [&](BlockId branch_id, bool add_label) { - const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; - dot += fmt::format("\t\t{}->", name); - if (it == function.blocks_data.end()) { - dot += fmt::format("\"Unknown label {}\"", branch_id); - } else { - dot += NameOf(*it); - }; - if (add_label && block.cond != true && block.cond != false) { + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; @@ -600,10 +487,10 @@ std::string CFG::Dot() const { dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: - if (block.cond != false) { + if (block.cond != IR::Condition{false}) { add_branch(block.branch_true, true); } - if (block.cond != true) { + if (block.cond != IR::Condition{true}) { add_branch(block.branch_false, false); } break; @@ -619,12 +506,6 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; - case EndClass::Unreachable: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += fmt::format( - "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } if (function.entrypoint == 8) { @@ -635,10 +516,11 @@ std::string CFG::Dot() const { dot += "\t}\n"; } if (!functions.empty()) { - if (functions.front().blocks.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 49b369282a..8179787b87 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -11,25 +11,27 @@ #include <vector> #include <boost/container/small_vector.hpp> +#include <boost/intrusive/set.hpp> #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +class Block; +} namespace Shader::Maxwell::Flow { -using BlockId = u32; using FunctionId = size_t; -constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast<u32>(-1)}; - enum class EndClass { Branch, Exit, Return, - Unreachable, }; enum class Token { @@ -59,58 +61,37 @@ private: boost::container::small_vector<StackEntry, 3> entries; }; -struct Block { +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode<boost::intrusive::normal_link>> { [[nodiscard]] bool Contains(Location pc) const noexcept; + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + Location begin; Location end; EndClass end_class; - BlockId id; Stack stack; IR::Condition cond; - BlockId branch_true; - BlockId branch_false; - boost::container::small_vector<BlockId, 4> imm_predecessors; - boost::container::small_vector<BlockId, 8> dominance_frontiers; - union { - bool post_order_visited{false}; - Block* imm_dominator; - }; + Block* branch_true; + Block* branch_false; + IR::Block* ir; }; struct Label { Location address; - BlockId block_id; + Block* block; Stack stack; }; struct Function { Function(Location start_address); - void BuildBlocksMap(); - - void BuildImmediatePredecessors(); - - void BuildPostOrder(); - - void BuildImmediateDominators(); - - void BuildDominanceFrontier(); - - [[nodiscard]] size_t NumBlocks() const noexcept { - return static_cast<size_t>(current_block_id) + 1; - } - Location entrypoint; - BlockId current_block_id{0}; boost::container::small_vector<Label, 16> labels; - boost::container::small_vector<u32, 0x130> blocks; - boost::container::small_vector<Block, 0x130> blocks_data; - // Translates from BlockId to block index - boost::container::small_vector<Block*, 0x130> blocks_map; - - boost::container::small_vector<u32, 0x130> post_order_blocks; - boost::container::small_vector<BlockId, 0x130> post_order_map; + boost::intrusive::set<Block> blocks; }; class CFG { @@ -120,7 +101,7 @@ class CFG { }; public: - explicit CFG(Environment& env, Location start_address); + explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -133,35 +114,37 @@ public: [[nodiscard]] std::span<const Function> Functions() const noexcept { return std::span(functions.data(), functions.size()); } + [[nodiscard]] std::span<Function> Functions() noexcept { + return std::span(functions.data(), functions.size()); + } private: - void VisitFunctions(Location start_address); - void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); - void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode); - void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); - AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); Environment& env; + ObjectPool<Block>& block_pool; boost::container::small_vector<Function, 1> functions; FunctionId current_function_id{0}; }; diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h index 66b51a19e6..26d29eae2e 100644 --- a/src/shader_recompiler/frontend/maxwell/location.h +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -15,7 +15,7 @@ namespace Shader::Maxwell { class Location { - static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits<u32>::max()}; + static constexpr u32 VIRTUAL_BIAS{4}; public: constexpr Location() = default; @@ -27,12 +27,18 @@ public: Align(); } + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + [[nodiscard]] constexpr u32 Offset() const noexcept { return offset; } [[nodiscard]] constexpr bool IsVirtual() const { - return offset == VIRTUAL_OFFSET; + return offset % 8 == VIRTUAL_BIAS; } constexpr auto operator<=>(const Location&) const noexcept = default; @@ -89,7 +95,7 @@ private: offset -= 8 + (offset % 32 == 8 ? 8 : 0); } - u32 offset{VIRTUAL_OFFSET}; + u32 offset{0xcccccccc}; }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8cdd20804e..9fa912ed8e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,57 +4,58 @@ #include <algorithm> #include <memory> +#include <vector> #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { namespace { -void TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span<IR::Block*> block_map) { +IR::BlockList TranslateCode(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::Function& cfg_function) { const size_t num_blocks{cfg_function.blocks.size()}; - function.blocks.reserve(num_blocks); - - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; - block_map[flow_block.id] = ir_block; - function.blocks.emplace_back(ir_block); - } -} - -void EmitTerminationInsts(const Flow::Function& cfg_function, - std::span<IR::Block* const> block_map) { - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } -} - -void TranslateFunction(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::Function& cfg_function, - IR::Function& function) { - std::vector<IR::Block*> block_map; - block_map.resize(cfg_function.blocks_data.size()); - - TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); - EmitTerminationInsts(cfg_function, block_map); + std::vector<IR::Block*> blocks(cfg_function.blocks.size()); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + const u32 begin{cfg_block.begin.Offset()}; + const u32 end{cfg_block.end.Offset()}; + blocks[i] = block_pool.Create(inst_pool, begin, end); + cfg_block.ir = blocks[i]; + ++i; + }); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + IR::Block* const block{blocks[i]}; + ++i; + if (cfg_block.end_class != Flow::EndClass::Branch) { + block->SetReturn(); + } else if (cfg_block.cond == IR::Condition{true}) { + block->SetBranch(cfg_block.branch_true->ir); + } else if (cfg_block.cond == IR::Condition{false}) { + block->SetBranch(cfg_block.branch_false->ir); + } else { + block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, + cfg_block.branch_false->ir); + } + }); + return IR::VisitAST(inst_pool, block_pool, blocks, + [&](IR::Block* block) { Translate(env, block); }); } } // Anonymous namespace IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, const Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg) { IR::Program program; auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); + for (Flow::Function& cfg_function : cfg.Functions()) { + functions.push_back(IR::Function{ + .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, + }); } + + fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 3355ab1299..542621a1de 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -19,6 +19,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, - const Flow::CFG& cfg); + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp deleted file mode 100644 index ed5137f20c..0000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <span> - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" - -namespace Shader::Maxwell { - -static void EmitExit(IR::IREmitter& ir) { - ir.Exit(); -} - -static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { - switch (flow_test) { - case IR::FlowTest::T: - return ir.Imm1(true); - case IR::FlowTest::F: - return ir.Imm1(false); - case IR::FlowTest::NE: - // FIXME: Verify this - return ir.LogicalNot(ir.GetZFlag()); - case IR::FlowTest::NaN: - // FIXME: Verify this - return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); - default: - throw NotImplementedException("Flow test {}", flow_test); - } -} - -static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { - const IR::FlowTest flow_test{cond.FlowTest()}; - const auto [pred, pred_negated]{cond.Pred()}; - if (pred == IR::Pred::PT && !pred_negated) { - return GetFlowTest(flow_test, ir); - } - if (flow_test == IR::FlowTest::T) { - return ir.GetPred(pred, pred_negated); - } - return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); -} - -static void EmitBranch(const Flow::Block& flow_block, std::span<IR::Block* const> block_map, - IR::IREmitter& ir) { - const auto add_immediate_predecessor = [&](Flow::BlockId label) { - block_map[label]->AddImmediatePredecessor(&ir.block); - }; - if (flow_block.cond == true) { - add_immediate_predecessor(flow_block.branch_true); - return ir.Branch(block_map[flow_block.branch_true]); - } - if (flow_block.cond == false) { - add_immediate_predecessor(flow_block.branch_false); - return ir.Branch(block_map[flow_block.branch_false]); - } - add_immediate_predecessor(flow_block.branch_true); - add_immediate_predecessor(flow_block.branch_false); - return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], - block_map[flow_block.branch_false]); -} - -void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map) { - IR::Block* const block{block_map[flow_block.id]}; - IR::IREmitter ir(*block); - switch (flow_block.end_class) { - case Flow::EndClass::Branch: - EmitBranch(flow_block, block_map, ir); - break; - case Flow::EndClass::Exit: - EmitExit(ir); - break; - case Flow::EndClass::Return: - ir.Return(); - break; - case Flow::EndClass::Unreachable: - ir.Unreachable(); - break; - } -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h deleted file mode 100644 index 04e0445340..0000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <span> - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" - -namespace Shader::Maxwell { - -/// Emit termination instructions and collect immediate predecessors -void EmitTerminationCode(const Flow::Block& flow_block, std::span<IR::Block* const> block_map); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d4b417d14a..b752785d44 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { IR::U32 result; if (shl.w != 0) { // When .W is set, the shift value is wrapped - // To emulate this we just have to clamp it ourselves. + // To emulate this we just have to wrap it ourselves. const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; result = v.ir.ShiftLeftLogical(base, shift); } else { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 7e6bb07a22..f1230f58fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, - const Flow::Block& flow_block) { - IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; - TranslatorVisitor visitor{env, block}; - - const Location pc_end{flow_block.end}; - Location pc{flow_block.begin}; - while (pc != pc_end) { +void Translate(Environment& env, IR::Block* block) { + if (block->IsVirtual()) { + return; + } + TranslatorVisitor visitor{env, *block}; + const Location pc_end{block->LocationEnd()}; + for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { @@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, default: throw LogicError("Invalid opcode {}", opcode); } - ++pc; } - return block; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index c1c21b2782..e1aa2e0f4b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,14 +6,9 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(ObjectPool<IR::Inst>& inst_pool, Environment& env, - const Flow::Block& flow_block); +void Translate(Environment& env, IR::Block* block); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f1170c61ee..9fba6ac239 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -132,6 +132,32 @@ void FoldLogicalAnd(IR::Inst& inst) { } } +void FoldLogicalOr(IR::Inst& inst) { + if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + return; + } + const IR::Value rhs{inst.Arg(1)}; + if (rhs.IsImmediate()) { + if (rhs.U1()) { + inst.ReplaceUsesWith(IR::Value{true}); + } else { + inst.ReplaceUsesWith(inst.Arg(0)); + } + } +} + +void FoldLogicalNot(IR::Inst& inst) { + const IR::U1 value{inst.Arg(0)}; + if (value.IsImmediate()) { + inst.ReplaceUsesWith(IR::Value{!value.U1()}); + return; + } + IR::Inst* const arg{value.InstRecursive()}; + if (arg->Opcode() == IR::Opcode::LogicalNot) { + inst.ReplaceUsesWith(arg->Arg(0)); + } +} + template <typename Dest, typename Source> void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; @@ -160,6 +186,24 @@ void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); } +void FoldBranchConditional(IR::Inst& inst) { + const IR::U1 cond{inst.Arg(0)}; + if (cond.IsImmediate()) { + // TODO: Convert to Branch + return; + } + const IR::Inst* cond_inst{cond.InstRecursive()}; + if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + const IR::Value true_label{inst.Arg(1)}; + const IR::Value false_label{inst.Arg(2)}; + // Remove negation on the conditional (take the parameter out of LogicalNot) and swap + // the branches + inst.SetArg(0, cond_inst->Arg(0)); + inst.SetArg(1, false_label); + inst.SetArg(2, true_label); + } +} + void ConstantPropagation(IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::GetRegister: @@ -178,6 +222,10 @@ void ConstantPropagation(IR::Inst& inst) { return FoldSelect<u32>(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); + case IR::Opcode::LogicalOr: + return FoldLogicalOr(inst); + case IR::Opcode::LogicalNot: + return FoldLogicalNot(inst); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: @@ -188,6 +236,8 @@ void ConstantPropagation(IR::Inst& inst) { } return (base >> shift) & ((1U << count) - 1); }); + case IR::Opcode::BranchConditional: + return FoldBranchConditional(inst); default: break; } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 15a9db90a9..8ca996e935 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -34,6 +34,13 @@ struct SignFlagTag : FlagTag {}; struct CarryFlagTag : FlagTag {}; struct OverflowFlagTag : FlagTag {}; +struct GotoVariable : FlagTag { + GotoVariable() = default; + explicit GotoVariable(u32 index_) : index{index_} {} + + u32 index; +}; + struct DefTable { [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { return regs[IR::RegIndex(variable)]; @@ -43,6 +50,10 @@ struct DefTable { return preds[IR::PredIndex(variable)]; } + [[nodiscard]] ValueMap& operator[](GotoVariable goto_variable) { + return goto_vars[goto_variable.index]; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -61,6 +72,7 @@ struct DefTable { std::array<ValueMap, IR::NUM_USER_REGS> regs; std::array<ValueMap, IR::NUM_USER_PREDS> preds; + boost::container::flat_map<u32, ValueMap> goto_vars; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -68,15 +80,15 @@ struct DefTable { }; IR::Opcode UndefOpcode(IR::Reg) noexcept { - return IR::Opcode::Undef32; + return IR::Opcode::UndefU32; } IR::Opcode UndefOpcode(IR::Pred) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } IR::Opcode UndefOpcode(const FlagTag&) noexcept { - return IR::Opcode::Undef1; + return IR::Opcode::UndefU1; } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { @@ -165,6 +177,9 @@ void SsaRewritePass(IR::Function& function) { pass.WriteVariable(pred, block, inst.Arg(1)); } break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -187,6 +202,9 @@ void SsaRewritePass(IR::Function& function) { inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); } break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 8a5adf5a23..32b56eb57c 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,6 +14,10 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Function& function) { for (const auto& block : function.blocks) { for (const IR::Inst& inst : *block) { + if (inst.Opcode() == IR::Opcode::Phi) { + // Skip validation on phi nodes + continue; + } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 9887e066d2..3ca1677c4a 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> #include <filesystem> #include <fmt/format.h> @@ -36,34 +37,46 @@ void RunDatabase() { ForEachFile("D:\\Shaders\\Database", [&](const std::filesystem::path& path) { map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str())); }); - for (int i = 0; i < 300; ++i) { + auto block_pool{std::make_unique<ObjectPool<Flow::Block>>()}; + auto t0 = std::chrono::high_resolution_clock::now(); + int N = 1; + int n = 0; + for (int i = 0; i < N; ++i) { for (auto& env : map) { + ++n; // fmt::print(stdout, "Decoding {}\n", path.string()); + const Location start_address{0}; - auto cfg{std::make_unique<Flow::CFG>(*env, start_address)}; + block_pool->ReleaseContents(); + Flow::CFG cfg{*env, *block_pool, start_address}; // fmt::print(stdout, "{}\n", cfg->Dot()); // IR::Program program{env, cfg}; // Optimize(program); // const std::string code{EmitGLASM(program)}; } } + auto t = std::chrono::high_resolution_clock::now(); + fmt::print(stdout, "{} ms", + std::chrono::duration_cast<std::chrono::milliseconds>(t - t0).count() / double(N)); } int main() { // RunDatabase(); + auto flow_block_pool{std::make_unique<ObjectPool<Flow::Block>>()}; auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()}; auto block_pool{std::make_unique<ObjectPool<IR::Block>>()}; - // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + // FileEnvironment env{"D:\\Shaders\\shader.bin"}; for (int i = 0; i < 1; ++i) { block_pool->ReleaseContents(); inst_pool->ReleaseContents(); - auto cfg{std::make_unique<Flow::CFG>(env, 0)}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; - // fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + flow_block_pool->ReleaseContents(); + Flow::CFG cfg{env, *flow_block_pool, 0}; + fmt::print(stdout, "{}\n", cfg.Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); Backend::SPIRV::EmitSPIRV spirv{program}; } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 0000000000..1760bf4a96 --- /dev/null +++ b/src/shader_recompiler/shader_info.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> + +#include <boost/container/static_vector.hpp> + +namespace Shader { + +struct Info { + struct ConstantBuffer { + + }; + + struct { + bool workgroup_id{}; + bool local_invocation_id{}; + bool fp16{}; + bool fp64{}; + } uses; + + std::array<18 +}; + +} // namespace Shader -- cgit v1.2.3-70-g09d2 From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 14 Feb 2021 01:24:32 -0300 Subject: shader: Misc fixes --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 ++ src/shader_recompiler/backend/spirv/emit_spirv.h | 5 + src/shader_recompiler/frontend/ir/basic_block.cpp | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 + .../frontend/ir/microinstruction.cpp | 16 +-- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/integer_set_predicate.cpp | 4 +- .../ir_opt/constant_propagation_pass.cpp | 27 ++--- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 113 +++++++++++---------- src/shader_recompiler/main.cpp | 12 +-- 10 files changed, 104 insertions(+), 89 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5022b51597..e29e448c7c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -25,6 +25,9 @@ EmitContext::EmitContext(IR::Program& program) { f16.Define(*this, TypeFloat(16), "f16"); f64.Define(*this, TypeFloat(64), "f64"); + true_value = ConstantTrue(u1); + false_value = ConstantFalse(u1); + for (const IR::Function& function : program.functions) { for (IR::Block* const block : function.blocks) { block_label_map.emplace_back(block, OpLabel()); @@ -58,6 +61,7 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { std::fclose(file); std::system("spirv-dis shader.spv"); std::system("spirv-val shader.spv"); + std::system("spirv-cross shader.spv"); } template <auto method> @@ -109,6 +113,8 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) { switch (type) { case IR::Type::U1: return ctx.u1; + case IR::Type::U32: + return ctx.u32[1]; default: throw NotImplementedException("Phi node type {}", type); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 9aa83b5de4..46ec7a1bb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -79,6 +79,8 @@ public: return def_map.Consume(value.Inst()); } switch (value.Type()) { + case IR::Type::U1: + return value.U1() ? true_value : false_value; case IR::Type::U32: return Constant(u32[1], value.U32()); case IR::Type::F32: @@ -108,6 +110,9 @@ public: VectorTypes f16; VectorTypes f64; + Id true_value{}; + Id false_value{}; + Id workgroup_id{}; Id local_invocation_id{}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index b5616f3941..c976267128 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -113,7 +113,7 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind if (arg.IsLabel()) { return BlockToIndex(block_to_index, arg.Label()); } - if (!arg.IsImmediate()) { + if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { @@ -166,7 +166,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { - ret += fmt::format("[ {}, {} ]", arg_index, + ret += fmt::format("[ {}, {} ]", arg_str, BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); } else { ret += arg_str; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 30932043f9..f42489d41d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -46,10 +46,12 @@ F64 IREmitter::Imm64(f64 value) const { void IREmitter::Branch(Block* label) { label->AddImmediatePredecessor(block); + block->SetBranch(label); Inst(Opcode::Branch, label); } void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + block->SetBranches(IR::Condition{true}, true_label, false_label); true_label->AddImmediatePredecessor(block); false_label->AddImmediatePredecessor(block); Inst(Opcode::BranchConditional, condition, true_label, false_label); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b4ae371bd1..9279b96928 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -143,19 +143,21 @@ Value Inst::Arg(size_t index) const { } void Inst::SetArg(size_t index, Value value) { - if (op == Opcode::Phi) { - throw LogicError("Setting argument on a phi instruction"); - } - if (index >= NumArgsOf(op)) { + if (index >= NumArgs()) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } - if (!args[index].IsImmediate()) { - UndoUse(args[index]); + const IR::Value arg{Arg(index)}; + if (!arg.IsImmediate()) { + UndoUse(arg); } if (!value.IsImmediate()) { Use(value); } - args[index] = value; + if (op == Opcode::Phi) { + phi_args[index].second = value; + } else { + args[index] = value; + } } Block* Inst::PhiBlock(size_t index) const { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 60f79b1606..623e78ff85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -76,8 +76,8 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } // Anonymous namespace -void TranslatorVisitor::IADD_reg(u64) { - throw NotImplementedException("IADD (reg)"); +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::IADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 76c6b52910..1bc9ef3635 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -92,8 +92,8 @@ void TranslatorVisitor::ISETP_cbuf(u64 insn) { ISETP(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::ISETP_imm(u64) { - throw NotImplementedException("ISETP_imm"); +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9fba6ac239..cbde65b9b4 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -32,6 +32,8 @@ template <typename T> return value.U1(); } else if constexpr (std::is_same_v<T, u32>) { return value.U32(); + } else if constexpr (std::is_same_v<T, s32>) { + return static_cast<s32>(value.U32()); } else if constexpr (std::is_same_v<T, f32>) { return value.F32(); } else if constexpr (std::is_same_v<T, u64>) { @@ -39,17 +41,8 @@ template <typename T> } } -template <typename ImmFn> +template <typename T, typename ImmFn> bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { - const auto arg = [](const IR::Value& value) { - if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) { - return value.U1(); - } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) { - return value.U32(); - } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) { - return value.U64(); - } - }; const IR::Value lhs{inst.Arg(0)}; const IR::Value rhs{inst.Arg(1)}; @@ -57,14 +50,14 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { const bool is_rhs_immediate{rhs.IsImmediate()}; if (is_lhs_immediate && is_rhs_immediate) { - const auto result{imm_fn(arg(lhs), arg(rhs))}; + const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))}; inst.ReplaceUsesWith(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } else { @@ -76,7 +69,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { - const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; + const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); } @@ -101,7 +94,7 @@ void FoldAdd(IR::Inst& inst) { if (inst.HasAssociatedPseudoOperation()) { return; } - if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { + if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -119,7 +112,7 @@ void FoldSelect(IR::Inst& inst) { } void FoldLogicalAnd(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { + if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -133,7 +126,7 @@ void FoldLogicalAnd(IR::Inst& inst) { } void FoldLogicalOr(IR::Inst& inst) { - if (!FoldCommutative(inst, [](bool a, bool b) { return a || b; })) { + if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) { return; } const IR::Value rhs{inst.Arg(1)}; @@ -226,6 +219,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); + case IR::Opcode::SLessThan: + return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); case IR::Opcode::ULessThan: return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); case IR::Opcode::BitFieldUExtract: diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index 8ca996e935..7eaf719c4e 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -113,6 +113,7 @@ private: IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { IR::Value val; if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { + // Optimize the common case of one predecessor: no phi needed val = ReadVariable(variable, preds.front()); } else { // Break potential cycles with operandless phi @@ -160,66 +161,70 @@ private: DefTable current_def; }; + +void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::SetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + pass.WriteVariable(reg, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + pass.WriteVariable(pred, block, inst.Arg(1)); + } + break; + case IR::Opcode::SetGotoVariable: + pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); + break; + case IR::Opcode::SetZFlag: + pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetSFlag: + pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetCFlag: + pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::SetOFlag: + pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); + break; + case IR::Opcode::GetRegister: + if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { + inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + } + break; + case IR::Opcode::GetPred: + if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { + inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); + } + break; + case IR::Opcode::GetGotoVariable: + inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); + break; + case IR::Opcode::GetZFlag: + inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); + break; + case IR::Opcode::GetSFlag: + inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); + break; + case IR::Opcode::GetCFlag: + inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); + break; + case IR::Opcode::GetOFlag: + inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); + break; + default: + break; + } +} } // Anonymous namespace void SsaRewritePass(IR::Function& function) { Pass pass; for (IR::Block* const block : function.blocks) { for (IR::Inst& inst : block->Instructions()) { - switch (inst.Opcode()) { - case IR::Opcode::SetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - pass.WriteVariable(reg, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - pass.WriteVariable(pred, block, inst.Arg(1)); - } - break; - case IR::Opcode::SetGotoVariable: - pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); - break; - case IR::Opcode::SetZFlag: - pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetSFlag: - pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetCFlag: - pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::SetOFlag: - pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); - break; - case IR::Opcode::GetRegister: - if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); - } - break; - case IR::Opcode::GetPred: - if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { - inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); - } - break; - case IR::Opcode::GetGotoVariable: - inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); - break; - case IR::Opcode::GetZFlag: - inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); - break; - case IR::Opcode::GetSFlag: - inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); - break; - case IR::Opcode::GetCFlag: - inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); - break; - case IR::Opcode::GetOFlag: - inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); - break; - default: - break; - } + VisitInst(pass, block, inst); } } } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 3ca1677c4a..92358232c3 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -38,7 +38,8 @@ void RunDatabase() { map.emplace_back(std::make_unique<FileEnvironment>(path.string().c_str())); }); auto block_pool{std::make_unique<ObjectPool<Flow::Block>>()}; - auto t0 = std::chrono::high_resolution_clock::now(); + using namespace std::chrono; + auto t0 = high_resolution_clock::now(); int N = 1; int n = 0; for (int i = 0; i < N; ++i) { @@ -55,9 +56,8 @@ void RunDatabase() { // const std::string code{EmitGLASM(program)}; } } - auto t = std::chrono::high_resolution_clock::now(); - fmt::print(stdout, "{} ms", - std::chrono::duration_cast<std::chrono::milliseconds>(t - t0).count() / double(N)); + auto t = high_resolution_clock::now(); + fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N)); } int main() { @@ -67,8 +67,8 @@ int main() { auto inst_pool{std::make_unique<ObjectPool<IR::Inst>>()}; auto block_pool{std::make_unique<ObjectPool<IR::Block>>()}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - // FileEnvironment env{"D:\\Shaders\\shader.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\shader.bin"}; for (int i = 0; i < 1; ++i) { block_pool->ReleaseContents(); inst_pool->ReleaseContents(); -- cgit v1.2.3-70-g09d2 From 1b0cf2309c760c1cb97a230a1572f8e87f84444a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Sun, 14 Feb 2021 22:46:40 -0300 Subject: shader: Add support for forward declarations --- externals/sirit | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 61 +++++++++++++++++----- src/shader_recompiler/backend/spirv/emit_spirv.h | 40 +------------- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/basic_block.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- .../frontend/ir/microinstruction.cpp | 2 +- .../frontend/ir/microinstruction.h | 20 +++++-- src/shader_recompiler/frontend/ir/modifiers.h | 10 ++-- .../global_memory_to_storage_buffer_pass.cpp | 4 +- src/shader_recompiler/main.cpp | 2 +- 11 files changed, 80 insertions(+), 69 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/externals/sirit b/externals/sirit index c374bfd9fd..f819ade0ef 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit c374bfd9fdff02a0cff85d005488967b1b0f675e +Subproject commit f819ade0efe925a782090dea9e1bf300fedffb39 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e29e448c7c..0895414b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -64,31 +64,49 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { std::system("spirv-cross shader.spv"); } +template <auto method, typename... Args> +static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { + const Id forward_id{inst->Definition<Id>()}; + const bool has_forward_id{Sirit::ValidId(forward_id)}; + Id current_id{}; + if (has_forward_id) { + current_id = ctx.ExchangeCurrentId(forward_id); + } + const Id new_id{(emit.*method)(ctx, std::forward<Args>(args)...)}; + if (has_forward_id) { + ctx.ExchangeCurrentId(current_id); + } else { + inst->SetDefinition<Id>(new_id); + } +} + template <auto method> static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { using M = decltype(method); using std::is_invocable_r_v; if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&>) { - ctx.Define(inst, (emit.*method)(ctx)); + SetDefinition<method>(emit, ctx, inst); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id>) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)))); + SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0))); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, Id>) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, Id, Id>) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2)))); + SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2))); + } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*>) { + SetDefinition<method>(emit, ctx, inst, inst); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*, Id, Id>) { - ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + SetDefinition<method>(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, IR::Inst*, Id, Id, Id>) { - ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2)))); + SetDefinition<method>(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2))); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, Id, u32>) { - ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), inst->Arg(1).U32())); + SetDefinition<method>(emit, ctx, inst, ctx.Def(inst->Arg(0)), inst->Arg(1).U32()); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, const IR::Value&>) { - ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0))); + SetDefinition<method>(emit, ctx, inst, inst->Arg(0)); } else if constexpr (is_invocable_r_v<Id, M, EmitSPIRV&, EmitContext&, const IR::Value&, const IR::Value&>) { - ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0), inst->Arg(1))); + SetDefinition<method>(emit, ctx, inst, inst->Arg(0), inst->Arg(1)); } else if constexpr (is_invocable_r_v<void, M, EmitSPIRV&, EmitContext&, IR::Inst*>) { (emit.*method)(ctx, inst); } else if constexpr (is_invocable_r_v<void, M, EmitSPIRV&, EmitContext&>) { @@ -122,11 +140,28 @@ static Id TypeId(const EmitContext& ctx, IR::Type type) { Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; - boost::container::small_vector<Id, 64> operands; + boost::container::small_vector<Id, 32> operands; operands.reserve(num_args * 2); for (size_t index = 0; index < num_args; ++index) { + // Phi nodes can have forward declarations, if an argument is not defined provide a forward + // declaration of it. Invoke will take care of giving it the right definition when it's + // actually defined. + const IR::Value arg{inst->Arg(index)}; + Id def{}; + if (arg.IsImmediate()) { + // Let the context handle immediate definitions, as it already knows how + def = ctx.Def(arg); + } else { + IR::Inst* const arg_inst{arg.Inst()}; + def = arg_inst->Definition<Id>(); + if (!Sirit::ValidId(def)) { + // If it hasn't been defined, get a forward declaration + def = ctx.ForwardDeclarationId(); + arg_inst->SetDefinition<Id>(def); + } + } IR::Block* const phi_block{inst->PhiBlock(index)}; - operands.push_back(ctx.Def(inst->Arg(index))); + operands.push_back(def); operands.push_back(ctx.BlockLabel(phi_block)); } const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6b09757d12..7d76377b52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -6,8 +6,6 @@ #include <sirit/sirit.h> -#include <boost/container/flat_map.hpp> - #include "common/common_types.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" @@ -16,37 +14,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -class DefMap { -public: - void Define(IR::Inst* inst, Id def_id) { - const InstInfo info{.use_count{inst->UseCount()}, .def_id{def_id}}; - const auto it{map.insert(map.end(), std::make_pair(inst, info))}; - if (it == map.end()) { - throw LogicError("Defining already defined instruction"); - } - } - - [[nodiscard]] Id Consume(IR::Inst* inst) { - const auto it{map.find(inst)}; - if (it == map.end()) { - throw LogicError("Consuming undefined instruction"); - } - const Id def_id{it->second.def_id}; - if (--it->second.use_count == 0) { - map.erase(it); - } - return def_id; - } - -private: - struct InstInfo { - int use_count; - Id def_id; - }; - - boost::container::flat_map<IR::Inst*, InstInfo> map; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -76,7 +43,7 @@ public: [[nodiscard]] Id Def(const IR::Value& value) { if (!value.IsImmediate()) { - return def_map.Consume(value.Inst()); + return value.Inst()->Definition<Id>(); } switch (value.Type()) { case IR::Type::U1: @@ -90,10 +57,6 @@ public: } } - void Define(IR::Inst* inst, Id def_id) { - def_map.Define(inst, def_id); - } - [[nodiscard]] Id BlockLabel(IR::Block* block) const { const auto it{std::ranges::lower_bound(block_label_map, block, {}, &std::pair<IR::Block*, Id>::first)}; @@ -117,7 +80,6 @@ public: Id local_invocation_id{}; private: - DefMap def_map; std::vector<std::pair<IR::Block*, Id>> block_label_map; }; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index c976267128..5ae91dd7dc 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -26,7 +26,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list<Value> args, u64 flags) { + std::initializer_list<Value> args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3205705e79..778b32e432 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -42,7 +42,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list<Value> args = {}, u64 flags = 0); + std::initializer_list<Value> args = {}, u32 flags = 0); /// Set the branches to jump to when all instructions have executed. void SetBranches(Condition cond, Block* branch_true, Block* branch_false); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4decb46bc1..24b012a393 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -178,7 +178,7 @@ private: } template <typename T> - requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags { + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { Flags() = default; Flags(T proxy_) : proxy{proxy_} {} @@ -187,7 +187,7 @@ private: template <typename T = Value, typename FlagType, typename... Args> T Inst(Opcode op, Flags<FlagType> flags, Args... args) { - u64 raw_flags{}; + u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 9279b96928..ee76db9adf 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -31,7 +31,7 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } -Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { +Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { std::construct_at(&phi_args); } else { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index ddf0f90a9e..5b244fa0bf 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -12,6 +12,7 @@ #include <boost/intrusive/list.hpp> +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" @@ -25,7 +26,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept; + explicit Inst(Opcode op_, u32 flags_) noexcept; ~Inst(); Inst& operator=(const Inst&) = delete; @@ -86,13 +87,25 @@ public: void ReplaceUsesWith(Value replacement); template <typename FlagsType> - requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>) + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; std::memcpy(&ret, &flags, sizeof(ret)); return ret; } + /// Intrusively store the host definition of this instruction. + template <typename DefinitionType> + void SetDefinition(DefinitionType def) { + definition = Common::BitCast<u32>(def); + } + + /// Return the intrusively stored host definition of this instruction. + template <typename DefinitionType> + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast<DefinitionType>(definition); + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} @@ -103,7 +116,8 @@ private: IR::Opcode op{}; int use_count{}; - u64 flags{}; + u32 flags{}; + u32 definition{}; union { NonTriviallyDummy dummy{}; std::array<Value, MAX_ARG_COUNT> args; diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 28bb9e798c..c288eede03 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -6,13 +6,13 @@ namespace Shader::IR { -enum class FmzMode { +enum class FmzMode : u8 { None, // Denorms are not flushed, NAN is propagated (nouveau) FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) }; -enum class FpRounding { +enum class FpRounding : u8 { RN, // Round to nearest even, RM, // Round towards negative infinity RP, // Round towards positive infinity @@ -21,8 +21,8 @@ enum class FpRounding { struct FpControl { bool no_contraction{false}; - FpRounding rounding : 8 = FpRounding::RN; - FmzMode fmz_mode : 8 = FmzMode::FTZ; + FpRounding rounding{FpRounding::RN}; + FmzMode fmz_mode{FmzMode::FTZ}; }; -static_assert(sizeof(FpControl) <= sizeof(u64)); +static_assert(sizeof(FpControl) <= sizeof(u32)); } // namespace Shader::IR diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 34393e1d57..08fd364bb4 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -161,8 +161,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index = index.U32(), - .offset = offset.U32(), + .index{index.U32()}, + .offset{offset.U32()}, }; if (bias && !MeetsBias(storage_buffer, *bias)) { // We have to blacklist some addresses in case we wrongly point to them diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 29f65966c3..3b110af61d 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - // Backend::SPIRV::EmitSPIRV spirv{program}; + Backend::SPIRV::EmitSPIRV spirv{program}; } -- cgit v1.2.3-70-g09d2 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/CMakeLists.txt | 13 +- .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 117 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 419 ++-- .../spirv/emit_spirv_bitwise_conversion.cpp | 24 +- .../backend/spirv/emit_spirv_composite.cpp | 48 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 42 +- .../backend/spirv/emit_spirv_control_flow.cpp | 10 +- .../backend/spirv/emit_spirv_floating_point.cpp | 92 +- .../backend/spirv/emit_spirv_integer.cpp | 60 +- .../backend/spirv/emit_spirv_logical.cpp | 40 +- .../backend/spirv/emit_spirv_memory.cpp | 56 +- .../backend/spirv/emit_spirv_select.cpp | 8 +- .../backend/spirv/emit_spirv_undefined.cpp | 10 +- src/shader_recompiler/environment.h | 6 +- src/shader_recompiler/file_environment.cpp | 6 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 + src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 + .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - src/shader_recompiler/main.cpp | 2 +- src/shader_recompiler/profile.h | 13 + src/shader_recompiler/recompiler.cpp | 27 + src/shader_recompiler/recompiler.h | 18 + src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 -------------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 43 files changed, 1003 insertions(+), 3036 deletions(-) create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 84be94a8d5..b56bdd3d9c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(shader_recompiler +add_library(shader_recompiler STATIC backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -85,13 +85,19 @@ add_executable(shader_recompiler ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp - main.cpp object_pool.h + profile.h + recompiler.cpp + recompiler.h shader_info.h ) -target_include_directories(video_core PRIVATE sirit) +target_include_directories(shader_recompiler PRIVATE sirit) target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) +target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) + +add_executable(shader_util main.cpp) +target_link_libraries(shader_util PRIVATE shader_recompiler) if (MSVC) target_compile_options(shader_recompiler PRIVATE @@ -121,3 +127,4 @@ else() endif() create_target_directory_groups(shader_recompiler) +create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 1c985aff8b..770067d988 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) { for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); binding += desc.count; @@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) { for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", binding)); std::fill_n(ssbos.data() + binding, desc.count, id); binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 55018332e5..d597184359 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <numeric> +#include <span> +#include <tuple> #include <type_traits> +#include <utility> +#include <vector> #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -14,10 +17,10 @@ namespace Shader::Backend::SPIRV { namespace { template <class Func> -struct FuncTraits : FuncTraits<decltype(&Func::operator())> {}; +struct FuncTraits : FuncTraits<Func> {}; -template <class ClassType, class ReturnType_, class... Args> -struct FuncTraits<ReturnType_ (ClassType::*)(Args...)> { +template <class ReturnType_, class... Args> +struct FuncTraits<ReturnType_ (*)(Args...)> { using ReturnType = ReturnType_; static constexpr size_t NUM_ARGS = sizeof...(Args); @@ -26,15 +29,15 @@ struct FuncTraits<ReturnType_ (ClassType::*)(Args...)> { using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; }; -template <auto method, typename... Args> -void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +template <auto func, typename... Args> +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition<Id>()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; if (has_forward_id) { current_id = ctx.ExchangeCurrentId(forward_id); } - const Id new_id{(emit.*method)(ctx, std::forward<Args>(args)...)}; + const Id new_id{func(ctx, std::forward<Args>(args)...)}; if (has_forward_id) { ctx.ExchangeCurrentId(current_id); } else { @@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } } -template <auto method, bool is_first_arg_inst, size_t... I> -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { - using Traits = FuncTraits<decltype(method)>; +template <auto func, bool is_first_arg_inst, size_t... I> +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { + using Traits = FuncTraits<decltype(func)>; if constexpr (std::is_same_v<Traits::ReturnType, Id>) { if constexpr (is_first_arg_inst) { - SetDefinition<method>(emit, ctx, inst, inst, - Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - SetDefinition<method>(emit, ctx, inst, - Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - (emit.*method)(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - (emit.*method)(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); } } } -template <auto method> -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using Traits = FuncTraits<decltype(method)>; +template <auto func> +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits<decltype(func)>; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { - Invoke<method, false>(emit, ctx, inst, std::make_index_sequence<0>{}); + Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>; using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; - Invoke<method, is_first_arg_inst>(emit, ctx, inst, Indices{}); + Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); } } } // Anonymous namespace -EmitSPIRV::EmitSPIRV(IR::Program& program) { +std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { if (program.info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - - std::vector<u32> result{ctx.Assemble()}; - std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv") == 0 && - std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && - std::system("spirv-cross -V D:\\shader.spv") == 0; -} + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); -void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { -#define OPCODE(name, result_type, ...) \ - case IR::Opcode::name: \ - return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE - } - throw LogicError("Invalid opcode {}", inst->Opcode()); -} + const std::array<u32, 3> workgroup_size{env.WorkgroupSize()}; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], + workgroup_size[2]); -static Id TypeId(const EmitContext& ctx, IR::Type type) { - switch (type) { - case IR::Type::U1: - return ctx.U1; - case IR::Type::U32: - return ctx.U32[1]; - default: - throw NotImplementedException("Phi node type {}", type); - } + return ctx.Assemble(); } -Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; boost::container::small_vector<Id, 32> operands; operands.reserve(num_args * 2); @@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } -void EmitSPIRV::EmitVoid(EmitContext&) {} +void EmitVoid(EmitContext&) {} -Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return ctx.Def(value); } -void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { +void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { +void EmitGetSignFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { +void EmitGetCarryFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { +void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8bde826137..5813f51ff1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,223 +8,218 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -class EmitSPIRV { -public: - explicit EmitSPIRV(IR::Program& program); +[[nodiscard]] std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program); -private: - void EmitInst(EmitContext& ctx, IR::Inst* inst); - - // Microinstruction emitters - Id EmitPhi(EmitContext& ctx, IR::Inst* inst); - void EmitVoid(EmitContext& ctx); - Id EmitIdentity(EmitContext& ctx, const IR::Value& value); - void EmitBranch(EmitContext& ctx, IR::Block* label); - void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); - void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); - void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); - void EmitReturn(EmitContext& ctx); - void EmitGetRegister(EmitContext& ctx); - void EmitSetRegister(EmitContext& ctx); - void EmitGetPred(EmitContext& ctx); - void EmitSetPred(EmitContext& ctx); - void EmitSetGotoVariable(EmitContext& ctx); - void EmitGetGotoVariable(EmitContext& ctx); - Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitGetAttribute(EmitContext& ctx); - void EmitSetAttribute(EmitContext& ctx); - void EmitGetAttributeIndexed(EmitContext& ctx); - void EmitSetAttributeIndexed(EmitContext& ctx); - void EmitGetZFlag(EmitContext& ctx); - void EmitGetSFlag(EmitContext& ctx); - void EmitGetCFlag(EmitContext& ctx); - void EmitGetOFlag(EmitContext& ctx); - void EmitSetZFlag(EmitContext& ctx); - void EmitSetSFlag(EmitContext& ctx); - void EmitSetCFlag(EmitContext& ctx); - void EmitSetOFlag(EmitContext& ctx); - Id EmitWorkgroupId(EmitContext& ctx); - Id EmitLocalInvocationId(EmitContext& ctx); - Id EmitUndefU1(EmitContext& ctx); - Id EmitUndefU8(EmitContext& ctx); - Id EmitUndefU16(EmitContext& ctx); - Id EmitUndefU32(EmitContext& ctx); - Id EmitUndefU64(EmitContext& ctx); - void EmitLoadGlobalU8(EmitContext& ctx); - void EmitLoadGlobalS8(EmitContext& ctx); - void EmitLoadGlobalU16(EmitContext& ctx); - void EmitLoadGlobalS16(EmitContext& ctx); - void EmitLoadGlobal32(EmitContext& ctx); - void EmitLoadGlobal64(EmitContext& ctx); - void EmitLoadGlobal128(EmitContext& ctx); - void EmitWriteGlobalU8(EmitContext& ctx); - void EmitWriteGlobalS8(EmitContext& ctx); - void EmitWriteGlobalU16(EmitContext& ctx); - void EmitWriteGlobalS16(EmitContext& ctx); - void EmitWriteGlobal32(EmitContext& ctx); - void EmitWriteGlobal64(EmitContext& ctx); - void EmitWriteGlobal128(EmitContext& ctx); - void EmitLoadStorageU8(EmitContext& ctx); - void EmitLoadStorageS8(EmitContext& ctx); - void EmitLoadStorageU16(EmitContext& ctx); - void EmitLoadStorageS16(EmitContext& ctx); - Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitLoadStorage64(EmitContext& ctx); - void EmitLoadStorage128(EmitContext& ctx); - void EmitWriteStorageU8(EmitContext& ctx); - void EmitWriteStorageS8(EmitContext& ctx); - void EmitWriteStorageU16(EmitContext& ctx); - void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); - void EmitWriteStorage64(EmitContext& ctx); - void EmitWriteStorage128(EmitContext& ctx); - void EmitCompositeConstructU32x2(EmitContext& ctx); - void EmitCompositeConstructU32x3(EmitContext& ctx); - void EmitCompositeConstructU32x4(EmitContext& ctx); - void EmitCompositeExtractU32x2(EmitContext& ctx); - Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); - void EmitCompositeExtractU32x4(EmitContext& ctx); - void EmitCompositeConstructF16x2(EmitContext& ctx); - void EmitCompositeConstructF16x3(EmitContext& ctx); - void EmitCompositeConstructF16x4(EmitContext& ctx); - void EmitCompositeExtractF16x2(EmitContext& ctx); - void EmitCompositeExtractF16x3(EmitContext& ctx); - void EmitCompositeExtractF16x4(EmitContext& ctx); - void EmitCompositeConstructF32x2(EmitContext& ctx); - void EmitCompositeConstructF32x3(EmitContext& ctx); - void EmitCompositeConstructF32x4(EmitContext& ctx); - void EmitCompositeExtractF32x2(EmitContext& ctx); - void EmitCompositeExtractF32x3(EmitContext& ctx); - void EmitCompositeExtractF32x4(EmitContext& ctx); - void EmitCompositeConstructF64x2(EmitContext& ctx); - void EmitCompositeConstructF64x3(EmitContext& ctx); - void EmitCompositeConstructF64x4(EmitContext& ctx); - void EmitCompositeExtractF64x2(EmitContext& ctx); - void EmitCompositeExtractF64x3(EmitContext& ctx); - void EmitCompositeExtractF64x4(EmitContext& ctx); - void EmitSelect8(EmitContext& ctx); - void EmitSelect16(EmitContext& ctx); - void EmitSelect32(EmitContext& ctx); - void EmitSelect64(EmitContext& ctx); - void EmitBitCastU16F16(EmitContext& ctx); - Id EmitBitCastU32F32(EmitContext& ctx, Id value); - void EmitBitCastU64F64(EmitContext& ctx); - void EmitBitCastF16U16(EmitContext& ctx); - Id EmitBitCastF32U32(EmitContext& ctx, Id value); - void EmitBitCastF64U64(EmitContext& ctx); - void EmitPackUint2x32(EmitContext& ctx); - void EmitUnpackUint2x32(EmitContext& ctx); - void EmitPackFloat2x16(EmitContext& ctx); - void EmitUnpackFloat2x16(EmitContext& ctx); - void EmitPackDouble2x32(EmitContext& ctx); - void EmitUnpackDouble2x32(EmitContext& ctx); - void EmitGetZeroFromOp(EmitContext& ctx); - void EmitGetSignFromOp(EmitContext& ctx); - void EmitGetCarryFromOp(EmitContext& ctx); - void EmitGetOverflowFromOp(EmitContext& ctx); - void EmitFPAbs16(EmitContext& ctx); - void EmitFPAbs32(EmitContext& ctx); - void EmitFPAbs64(EmitContext& ctx); - Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - void EmitFPMax32(EmitContext& ctx); - void EmitFPMax64(EmitContext& ctx); - void EmitFPMin32(EmitContext& ctx); - void EmitFPMin64(EmitContext& ctx); - Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitFPNeg16(EmitContext& ctx); - void EmitFPNeg32(EmitContext& ctx); - void EmitFPNeg64(EmitContext& ctx); - void EmitFPRecip32(EmitContext& ctx); - void EmitFPRecip64(EmitContext& ctx); - void EmitFPRecipSqrt32(EmitContext& ctx); - void EmitFPRecipSqrt64(EmitContext& ctx); - void EmitFPSqrt(EmitContext& ctx); - void EmitFPSin(EmitContext& ctx); - void EmitFPSinNotReduced(EmitContext& ctx); - void EmitFPExp2(EmitContext& ctx); - void EmitFPExp2NotReduced(EmitContext& ctx); - void EmitFPCos(EmitContext& ctx); - void EmitFPCosNotReduced(EmitContext& ctx); - void EmitFPLog2(EmitContext& ctx); - void EmitFPSaturate16(EmitContext& ctx); - void EmitFPSaturate32(EmitContext& ctx); - void EmitFPSaturate64(EmitContext& ctx); - void EmitFPRoundEven16(EmitContext& ctx); - void EmitFPRoundEven32(EmitContext& ctx); - void EmitFPRoundEven64(EmitContext& ctx); - void EmitFPFloor16(EmitContext& ctx); - void EmitFPFloor32(EmitContext& ctx); - void EmitFPFloor64(EmitContext& ctx); - void EmitFPCeil16(EmitContext& ctx); - void EmitFPCeil32(EmitContext& ctx); - void EmitFPCeil64(EmitContext& ctx); - void EmitFPTrunc16(EmitContext& ctx); - void EmitFPTrunc32(EmitContext& ctx); - void EmitFPTrunc64(EmitContext& ctx); - Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitIAdd64(EmitContext& ctx); - Id EmitISub32(EmitContext& ctx, Id a, Id b); - void EmitISub64(EmitContext& ctx); - Id EmitIMul32(EmitContext& ctx, Id a, Id b); - void EmitINeg32(EmitContext& ctx); - void EmitIAbs32(EmitContext& ctx); - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); - void EmitShiftRightLogical32(EmitContext& ctx); - void EmitShiftRightArithmetic32(EmitContext& ctx); - void EmitBitwiseAnd32(EmitContext& ctx); - void EmitBitwiseOr32(EmitContext& ctx); - void EmitBitwiseXor32(EmitContext& ctx); - void EmitBitFieldInsert(EmitContext& ctx); - void EmitBitFieldSExtract(EmitContext& ctx); - Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitULessThan(EmitContext& ctx); - void EmitIEqual(EmitContext& ctx); - void EmitSLessThanEqual(EmitContext& ctx); - void EmitULessThanEqual(EmitContext& ctx); - Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitUGreaterThan(EmitContext& ctx); - void EmitINotEqual(EmitContext& ctx); - void EmitSGreaterThanEqual(EmitContext& ctx); - Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); - void EmitLogicalOr(EmitContext& ctx); - void EmitLogicalAnd(EmitContext& ctx); - void EmitLogicalXor(EmitContext& ctx); - void EmitLogicalNot(EmitContext& ctx); - void EmitConvertS16F16(EmitContext& ctx); - void EmitConvertS16F32(EmitContext& ctx); - void EmitConvertS16F64(EmitContext& ctx); - void EmitConvertS32F16(EmitContext& ctx); - void EmitConvertS32F32(EmitContext& ctx); - void EmitConvertS32F64(EmitContext& ctx); - void EmitConvertS64F16(EmitContext& ctx); - void EmitConvertS64F32(EmitContext& ctx); - void EmitConvertS64F64(EmitContext& ctx); - void EmitConvertU16F16(EmitContext& ctx); - void EmitConvertU16F32(EmitContext& ctx); - void EmitConvertU16F64(EmitContext& ctx); - void EmitConvertU32F16(EmitContext& ctx); - void EmitConvertU32F32(EmitContext& ctx); - void EmitConvertU32F64(EmitContext& ctx); - void EmitConvertU64F16(EmitContext& ctx); - void EmitConvertU64F32(EmitContext& ctx); - void EmitConvertU64F64(EmitContext& ctx); - void EmitConvertU64U32(EmitContext& ctx); - void EmitConvertU32U64(EmitContext& ctx); -}; +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, IR::Block* label); +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitReturn(EmitContext& ctx); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetAttributeIndexed(EmitContext& ctx); +void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx); +void EmitLoadGlobal64(EmitContext& ctx); +void EmitLoadGlobal128(EmitContext& ctx); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx); +void EmitWriteGlobal64(EmitContext& ctx); +void EmitWriteGlobal128(EmitContext& ctx); +void EmitLoadStorageU8(EmitContext& ctx); +void EmitLoadStorageS8(EmitContext& ctx); +void EmitLoadStorageU16(EmitContext& ctx); +void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx); +void EmitLoadStorage128(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx); +void EmitWriteStorageS8(EmitContext& ctx); +void EmitWriteStorageU16(EmitContext& ctx); +void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx); +void EmitCompositeConstructU32x2(EmitContext& ctx); +void EmitCompositeConstructU32x3(EmitContext& ctx); +void EmitCompositeConstructU32x4(EmitContext& ctx); +void EmitCompositeExtractU32x2(EmitContext& ctx); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx); +void EmitCompositeConstructF16x2(EmitContext& ctx); +void EmitCompositeConstructF16x3(EmitContext& ctx); +void EmitCompositeConstructF16x4(EmitContext& ctx); +void EmitCompositeExtractF16x2(EmitContext& ctx); +void EmitCompositeExtractF16x3(EmitContext& ctx); +void EmitCompositeExtractF16x4(EmitContext& ctx); +void EmitCompositeConstructF32x2(EmitContext& ctx); +void EmitCompositeConstructF32x3(EmitContext& ctx); +void EmitCompositeConstructF32x4(EmitContext& ctx); +void EmitCompositeExtractF32x2(EmitContext& ctx); +void EmitCompositeExtractF32x3(EmitContext& ctx); +void EmitCompositeExtractF32x4(EmitContext& ctx); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitSelect8(EmitContext& ctx); +void EmitSelect16(EmitContext& ctx); +void EmitSelect32(EmitContext& ctx); +void EmitSelect64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx); +void EmitUnpackUint2x32(EmitContext& ctx); +void EmitPackFloat2x16(EmitContext& ctx); +void EmitUnpackFloat2x16(EmitContext& ctx); +void EmitPackDouble2x32(EmitContext& ctx); +void EmitUnpackDouble2x32(EmitContext& ctx); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx); +void EmitFPAbs32(EmitContext& ctx); +void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +void EmitFPMax32(EmitContext& ctx); +void EmitFPMax64(EmitContext& ctx); +void EmitFPMin32(EmitContext& ctx); +void EmitFPMin64(EmitContext& ctx); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitFPNeg16(EmitContext& ctx); +void EmitFPNeg32(EmitContext& ctx); +void EmitFPNeg64(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx); +void EmitFPRecip64(EmitContext& ctx); +void EmitFPRecipSqrt32(EmitContext& ctx); +void EmitFPRecipSqrt64(EmitContext& ctx); +void EmitFPSqrt(EmitContext& ctx); +void EmitFPSin(EmitContext& ctx); +void EmitFPSinNotReduced(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPExp2NotReduced(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPCosNotReduced(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPRoundEven16(EmitContext& ctx); +void EmitFPRoundEven32(EmitContext& ctx); +void EmitFPRoundEven64(EmitContext& ctx); +void EmitFPFloor16(EmitContext& ctx); +void EmitFPFloor32(EmitContext& ctx); +void EmitFPFloor64(EmitContext& ctx); +void EmitFPCeil16(EmitContext& ctx); +void EmitFPCeil32(EmitContext& ctx); +void EmitFPCeil64(EmitContext& ctx); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitIAdd64(EmitContext& ctx); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +void EmitISub64(EmitContext& ctx); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +void EmitINeg32(EmitContext& ctx); +void EmitIAbs32(EmitContext& ctx); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +void EmitShiftRightLogical32(EmitContext& ctx); +void EmitShiftRightArithmetic32(EmitContext& ctx); +void EmitBitwiseAnd32(EmitContext& ctx); +void EmitBitwiseOr32(EmitContext& ctx); +void EmitBitwiseXor32(EmitContext& ctx); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitULessThan(EmitContext& ctx); +void EmitIEqual(EmitContext& ctx); +void EmitSLessThanEqual(EmitContext& ctx); +void EmitULessThanEqual(EmitContext& ctx); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitUGreaterThan(EmitContext& ctx); +void EmitINotEqual(EmitContext& ctx); +void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +void EmitLogicalOr(EmitContext& ctx); +void EmitLogicalAnd(EmitContext& ctx); +void EmitLogicalXor(EmitContext& ctx); +void EmitLogicalNot(EmitContext& ctx); +void EmitConvertS16F16(EmitContext& ctx); +void EmitConvertS16F32(EmitContext& ctx); +void EmitConvertS16F64(EmitContext& ctx); +void EmitConvertS32F16(EmitContext& ctx); +void EmitConvertS32F32(EmitContext& ctx); +void EmitConvertS32F64(EmitContext& ctx); +void EmitConvertS64F16(EmitContext& ctx); +void EmitConvertS64F32(EmitContext& ctx); +void EmitConvertS64F64(EmitContext& ctx); +void EmitConvertU16F16(EmitContext& ctx); +void EmitConvertU16F32(EmitContext& ctx); +void EmitConvertU16F64(EmitContext& ctx); +void EmitConvertU32F16(EmitContext& ctx); +void EmitConvertU32F32(EmitContext& ctx); +void EmitConvertU32F64(EmitContext& ctx); +void EmitConvertU64F16(EmitContext& ctx); +void EmitConvertU64F32(EmitContext& ctx); +void EmitConvertU64F64(EmitContext& ctx); +void EmitConvertU64U32(EmitContext& ctx); +void EmitConvertU32U64(EmitContext& ctx); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index af82df99ce..49c2004987 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,51 +6,51 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { +void EmitBitCastU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { +void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { +void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { +void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackUint2x32(EmitContext&) { +void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { +void EmitUnpackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { +void EmitPackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { +void EmitUnpackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { +void EmitPackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { +void EmitUnpackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index a7374c89d2..348e4796d5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,99 +6,99 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { +void EmitCompositeConstructU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { +void EmitCompositeConstructU32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { +void EmitCompositeConstructU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { +void EmitCompositeExtractU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } -void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { +void EmitCompositeExtractU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { +void EmitCompositeConstructF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { +void EmitCompositeConstructF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { +void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { +void EmitCompositeExtractF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { +void EmitCompositeExtractF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { +void EmitCompositeExtractF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { +void EmitCompositeConstructF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { +void EmitCompositeConstructF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { +void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { +void EmitCompositeExtractF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { +void EmitCompositeExtractF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { +void EmitCompositeExtractF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { +void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { +void EmitCompositeConstructF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { +void EmitCompositeConstructF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { +void EmitCompositeExtractF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { +void EmitCompositeExtractF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { +void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f4c9970ebd..eb9c01c5a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,31 +6,31 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitGetRegister(EmitContext&) { +void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetRegister(EmitContext&) { +void EmitSetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetPred(EmitContext&) { +void EmitGetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetPred(EmitContext&) { +void EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { +void EmitSetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { +void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: return ctx.OpLoad(ctx.U32[1], access_chain); } -void EmitSPIRV::EmitGetAttribute(EmitContext&) { +void EmitGetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttribute(EmitContext&) { +void EmitSetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { +void EmitGetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { +void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetZFlag(EmitContext&) { +void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetSFlag(EmitContext&) { +void EmitGetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetCFlag(EmitContext&) { +void EmitGetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetOFlag(EmitContext&) { +void EmitGetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetZFlag(EmitContext&) { +void EmitSetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetSFlag(EmitContext&) { +void EmitSetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetCFlag(EmitContext&) { +void EmitSetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetOFlag(EmitContext&) { +void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { +Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } -Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { +Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 549c1907a2..6c4199664f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,25 +6,25 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { +void EmitBranch(EmitContext& ctx, IR::Block* label) { ctx.OpBranch(label->Definition<Id>()); } -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition<Id>(), false_label->Definition<Id>()); } -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { ctx.OpLoopMerge(merge_label->Definition<Id>(), continue_label->Definition<Id>(), spv::LoopControlMask::MaskNone); } -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { ctx.OpSelectionMerge(merge_label->Definition<Id>(), spv::SelectionControlMask::MaskNone); } -void EmitSPIRV::EmitReturn(EmitContext& ctx) { +void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9bc121f81..d24fbb353e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { } // Anonymous namespace -void EmitSPIRV::EmitFPAbs16(EmitContext&) { +void EmitFPAbs16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs32(EmitContext&) { +void EmitFPAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs64(EmitContext&) { +void EmitFPAbs64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } -Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitSPIRV::EmitFPMax32(EmitContext&) { +void EmitFPMax32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMax64(EmitContext&) { +void EmitFPMax64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin32(EmitContext&) { +void EmitFPMin32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin64(EmitContext&) { +void EmitFPMin64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitSPIRV::EmitFPNeg16(EmitContext&) { +void EmitFPNeg16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg32(EmitContext&) { +void EmitFPNeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg64(EmitContext&) { +void EmitFPNeg64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip32(EmitContext&) { +void EmitFPRecip32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip64(EmitContext&) { +void EmitFPRecip64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { +void EmitFPRecipSqrt32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { +void EmitFPRecipSqrt64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSqrt(EmitContext&) { +void EmitFPSqrt(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSin(EmitContext&) { +void EmitFPSin(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { +void EmitFPSinNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2(EmitContext&) { +void EmitFPExp2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { +void EmitFPExp2NotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCos(EmitContext&) { +void EmitFPCos(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { +void EmitFPCosNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPLog2(EmitContext&) { +void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate16(EmitContext&) { +void EmitFPSaturate16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate32(EmitContext&) { +void EmitFPSaturate32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate64(EmitContext&) { +void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { +void EmitFPRoundEven16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { +void EmitFPRoundEven32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { +void EmitFPRoundEven64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor16(EmitContext&) { +void EmitFPFloor16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor32(EmitContext&) { +void EmitFPFloor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor64(EmitContext&) { +void EmitFPFloor64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil16(EmitContext&) { +void EmitFPCeil16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil32(EmitContext&) { +void EmitFPCeil32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil64(EmitContext&) { +void EmitFPCeil64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc16(EmitContext&) { +void EmitFPTrunc16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc32(EmitContext&) { +void EmitFPTrunc32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc64(EmitContext&) { +void EmitFPTrunc64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 32af94a736..a1d16b81e4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -6,126 +6,126 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } return ctx.OpIAdd(ctx.U32[1], a, b); } -void EmitSPIRV::EmitIAdd64(EmitContext&) { +void EmitIAdd64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { +Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitSPIRV::EmitISub64(EmitContext&) { +void EmitISub64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitSPIRV::EmitINeg32(EmitContext&) { +void EmitINeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIAbs32(EmitContext&) { +void EmitIAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { +void EmitShiftRightLogical32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { +void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { +void EmitBitwiseAnd32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { +void EmitBitwiseOr32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { +void EmitBitwiseXor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { +void EmitBitFieldInsert(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { +void EmitBitFieldSExtract(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } -Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitULessThan(EmitContext&) { +void EmitULessThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIEqual(EmitContext&) { +void EmitIEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { +void EmitSLessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitULessThanEqual(EmitContext&) { +void EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitUGreaterThan(EmitContext&) { +void EmitUGreaterThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitINotEqual(EmitContext&) { +void EmitINotEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { +void EmitSGreaterThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitLogicalOr(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalAnd(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalXor(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalNot(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index 7b43c4ed80..ff2f4fb744 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,83 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitConvertS16F16(EmitContext&) { +void EmitConvertS16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F32(EmitContext&) { +void EmitConvertS16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F64(EmitContext&) { +void EmitConvertS16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F16(EmitContext&) { +void EmitConvertS32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F32(EmitContext&) { +void EmitConvertS32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F64(EmitContext&) { +void EmitConvertS32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F16(EmitContext&) { +void EmitConvertS64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F32(EmitContext&) { +void EmitConvertS64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F64(EmitContext&) { +void EmitConvertS64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F16(EmitContext&) { +void EmitConvertU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F32(EmitContext&) { +void EmitConvertU16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F64(EmitContext&) { +void EmitConvertU16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F16(EmitContext&) { +void EmitConvertU32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F32(EmitContext&) { +void EmitConvertU32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F64(EmitContext&) { +void EmitConvertU32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F16(EmitContext&) { +void EmitConvertU64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F32(EmitContext&) { +void EmitConvertU64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F64(EmitContext&) { +void EmitConvertU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64U32(EmitContext&) { +void EmitConvertU64U32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32U64(EmitContext&) { +void EmitConvertU32U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 5769a3c95b..77d698ffd4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { +void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { +void EmitLoadGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { +void EmitLoadGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { +void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { +void EmitLoadGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { +void EmitLoadGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { +void EmitLoadGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { +void EmitWriteGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { +void EmitWriteGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { +void EmitWriteGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { +void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { +void EmitWriteGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { +void EmitWriteGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { +void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { +void EmitLoadStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { +void EmitLoadStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { +void EmitLoadStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { +void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, return ctx.OpLoad(ctx.U32[1], pointer); } -void EmitSPIRV::EmitLoadStorage64(EmitContext&) { +void EmitLoadStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorage128(EmitContext&) { +void EmitLoadStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { +void EmitWriteStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { +void EmitWriteStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { +void EmitWriteStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { +void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitSPIRV::EmitWriteStorage64(EmitContext&) { +void EmitWriteStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage128(EmitContext&) { +void EmitWriteStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 40a856f72a..8d50627247 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,19 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitSelect8(EmitContext&) { +void EmitSelect8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect16(EmitContext&) { +void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect32(EmitContext&) { +void EmitSelect32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect64(EmitContext&) { +void EmitSelect64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index c1ed8f281f..19b06dbe49 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { +Id EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.U1); } -Id EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { +Id EmitUndefU32(EmitContext& ctx) { return ctx.OpUndef(ctx.U32[1]); } -Id EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index f6230e8171..0ba681fb96 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -1,5 +1,7 @@ #pragma once +#include <array> + #include "common/common_types.h" namespace Shader { @@ -8,7 +10,9 @@ class Environment { public: virtual ~Environment() = default; - [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index b34bf462b6..5127523f97 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) { FileEnvironment::~FileEnvironment() = default; -u64 FileEnvironment::ReadInstruction(u32 offset) const { +u64 FileEnvironment::ReadInstruction(u32 offset) { if (offset % 8 != 0) { throw InvalidArgument("offset={} is not aligned to 8", offset); } @@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const { return data[offset / 8]; } +std::array<u32, 3> FileEnvironment::WorkgroupSize() { + return {1, 1, 1}; +} + } // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index c294bc6faf..b8c4bbadd9 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -12,7 +12,9 @@ public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; - u64 ReadInstruction(u32 offset) const override; + u64 ReadInstruction(u32 offset) override; + + std::array<u32, 3> WorkgroupSize() override; private: std::vector<u64> data; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7dc..ec029dfd6e 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5a..8709a2ea1e 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c1..8c44ebb29d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo Optimization::VerificationPass(function); } Optimization::CollectShaderInfoPass(program); - //*/ + fmt::print(stdout, "{}\n", IR::DumpProgram(program)); return program; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 3c9eaddd94..079e3497f2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -24,6 +24,14 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast<IR::U32>(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d73..8bd468244e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d10683..c3c4b9abd2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acbc..6b2a1356bc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 1610bb34e1..050a37f180 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; + void(Backend::SPIRV::EmitSPIRV(env, program)); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 0000000000..c96d783b77 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +struct Profile { + bool unified_descriptor_binding; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 0000000000..b25081e39b --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <vector> + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address) { + ObjectPool<Maxwell::Flow::Block> flow_block_pool; + ObjectPool<IR::Inst> inst_pool; + ObjectPool<IR::Block> block_pool; + + Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 0000000000..4cb9738782 --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader { + +[[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address); + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706a..3323e69169 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d5..f8b8d06ac7 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b623..0000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <bitset> -#include <optional> -#include <tuple> -#include <vector> - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template <typename T> - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template <typename T> - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned<Tegra::Shader::Attribute> { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned<Tegra::Shader::Register> { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast<u32>(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast<u32>(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast<u16>(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast<u32>(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast<u32>(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast<s32>((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast<s32>(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array<u8, 16> mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast<u32>(target); - constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast<u32>(target); - constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast<u16>(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast<u16>(1U << bit_position); - break; - case '1': - expect |= static_cast<u16>(1U << bit_position); - mask |= static_cast<u16>(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector<Matcher> GetDecodeTable() { - std::vector<Matcher> table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c59..0000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <optional> - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast<PixelImap>( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional<PixelImap> result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array<u32, 0xF> raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496c..588ce61398 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include <vector> +#include <boost/container/small_vector.hpp> + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3de..dc045d5245 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb59103..3bea1ff449 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7bed..2501f9967e 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector<vk::DescriptorSets> descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 0000000000..b062884035 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180d..4bf3e4819c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional<u128> Analyze(u32 start_address) { + const std::optional<u64> size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast<u32>(*size); + return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique<u64[]>(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read<u64>(program_base + address); + } + + std::array<u32, 3> WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional<u64> TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector<u64> code; + u32 cached_lowest = std::numeric_limits<u32>::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340dd..eb35abc27f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array<u32, 3> workgroup_size; @@ -67,13 +67,13 @@ struct hash<Vulkan::ComputePipelineCacheKey> { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector<ComputePipelineCacheKey> compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache<Shader> { +class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr<Shader> null_shader; - std::unique_ptr<Shader> null_kernel; - - std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; + std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d94..b757454c49 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b0..3fd03b9155 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8c..2dd5149681 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d0..f0b80ad59e 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector<u64> ticks; ///< Ticks for each resource -- cgit v1.2.3-70-g09d2 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp <reinuseslisp@airmail.cc> --- src/common/thread_worker.h | 1 + src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +-- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_warp.cpp | 2 +- src/shader_recompiler/file_environment.h | 2 +- src/shader_recompiler/frontend/ir/attribute.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +-- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - .../ir_opt/collect_shader_info_pass.cpp | 20 +-- .../ir_opt/constant_propagation_pass.cpp | 49 ++++---- .../global_memory_to_storage_buffer_pass.cpp | 42 +++---- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 ++--- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 +- src/tests/common/unique_function.cpp | 2 + src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 - .../renderer_vulkan/vk_texture_cache.cpp | 2 +- 66 files changed, 308 insertions(+), 313 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 0a975a869d..cd0017726f 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <condition_variable> #include <functional> #include <mutex> #include <stop_token> diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 22639fe132..551bf1c582 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -196,6 +196,8 @@ else() $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable + + $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b738e00cc2..0c114402b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <array> +#include <climits> #include <string_view> #include <fmt/format.h> @@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie const std::string_view def_name_view( def_name.data(), fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + defs[static_cast<size_t>(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 32512a0e5f..355cf0ca8a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -16,7 +16,7 @@ namespace Shader::Backend::SPIRV { namespace { template <class Func> -struct FuncTraits : FuncTraits<Func> {}; +struct FuncTraits {}; template <class ReturnType_, class... Args> struct FuncTraits<ReturnType_ (*)(Args...)> { @@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { template <auto func, bool is_first_arg_inst, size_t... I> void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { using Traits = FuncTraits<decltype(func)>; - if constexpr (std::is_same_v<Traits::ReturnType, Id>) { + if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { if constexpr (is_first_arg_inst) { - SetDefinition<func>(ctx, inst, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + SetDefinition<func>( + ctx, inst, inst, + Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - SetDefinition<func>(ctx, inst, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + SetDefinition<func>( + ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg<Traits::ArgType<I + 2>>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg<Traits::ArgType<I + 1>>(ctx, inst->Arg(I))...); + func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); } } } @@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ return Invoke<&Emit##name>(ctx, inst); #include "shader_recompiler/frontend/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->Opcode()); + throw LogicError("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f0f8db8c37..815ca62992 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -43,11 +43,13 @@ public: // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); return; } - const IR::Opcode opcode{values[0]->Opcode()}; - if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + auto read{[&](unsigned int a, unsigned int b) { + return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); + }}; const Id offsets{ ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), @@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - const auto info{inst->Flags<IR::TextureInstInfo>()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#ifdef _WIN32 #pragma optimize("", off) +#endif Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c57bd291db..12a03ed6ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; + [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 17640a6229..71601f8fd6 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -7,7 +7,7 @@ namespace Shader { -class FileEnvironment final : public Environment { +class FileEnvironment : public Environment { public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea0..7993e5c436 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast<int>(attribute) - static_cast<int>(Attribute::Generic0X)) / 4; + return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6e..e1f0191f40 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2bc..fc18ea2a2f 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf5..aa8597c608 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast<IR::FlowTest>(flow_test); } - [[nodiscard]] std::pair<IR::Pred, bool> Pred() const noexcept { + [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept { return {static_cast<IR::Pred>(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c4..a2104bdb31 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94b..ceb44e6042 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique<AssociatedInsts>(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique<AssociatedInsts>(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674e..97dc91d855 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9c..002dbf94e9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f3..89a17fb1b4 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <map> #include <string> diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487f..1e7ffb86d5 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e704..a0962863d8 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue<Type::U1>; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb19864..cb8ec7eaa3 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool<Block>& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast<unsigned int>(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e5..932d19c1d4 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d5..008625cb37 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e9..02cef26455 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea7..5a1b3a8fcb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff73218629..7238414962 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95c..4a49299a0b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c849..b8c89810cb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209e..80109ca0e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e1..b9f4ee0d9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde03..035f8782a7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1e..cf3cf1ba69 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c7..fa2a7807b7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032b..8ae4375287 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae8..06226b7ce2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db8438..5f93a15130 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a216..7550a8d4c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047d..f2738a93b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6ae..fd79867016 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef68..3f548ce761 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c8..cca5b831fd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92d..b3931dae32 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast<u32>(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast<u32>(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast<u32>(hsetp2.low << 6) | + static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast<u32>(hsetp2.high << 22) | + static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4d..88bbac0a50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast<u32>(imm.value) << 12}; return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e18151..8ffd84867d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2d..5a0fc36a03 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea325..2300088e38 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea6..e24b497210 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair<int, bool> GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d78..36c5cff2f1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9c..95d4165863 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f42..fe2c7db85d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad9..2ba9c1018a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23b..0863bdfcd4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bcab..0459e5473e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fccf..ec5e74f6d8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82af..edbfcd3082 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a092..61fbbe04cb 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template <typename Func, size_t... I> IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { using Traits = LambdaTraits<decltype(func)>; - return IR::Value{func(Arg<Traits::ArgType<I>>(inst.Arg(I))...)}; + return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bddd..90a65dd167 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f21..38af72dfea 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed61..52576b07fc 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d14..346fcc3774 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce41791..c8aee3d3d5 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector<TextureInst, 24>; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags<IR::TextureInstInfo>()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cca..dbec96d84a 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738a..aa6e865934 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c1940..3166a69dc1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4aa..57e2d569c2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast<u32>(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92f..fcebb8f6e2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast<u64>(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab9..991afe521e 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <unordered_map> #include <boost/container/static_vector.hpp> diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5f..70328680dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { std::vector<VkBufferCopy> result(copies.size()); std::ranges::transform( -- cgit v1.2.3-70-g09d2 From 4bbe5303376e693d15d7de80b25f5fda783281ce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Tue, 20 Apr 2021 22:28:06 -0300 Subject: shader: Inline common IR::Block methods --- src/shader_recompiler/frontend/ir/basic_block.cpp | 12 ------------ src/shader_recompiler/frontend/ir/basic_block.h | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e1f0191f40..f92fc2571c 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -69,24 +69,12 @@ u32 Block::LocationEnd() const noexcept { return location_end; } -Block::InstructionList& Block::Instructions() noexcept { - return instructions; -} - -const Block::InstructionList& Block::Instructions() const noexcept { - return instructions; -} - void Block::AddImmediatePredecessor(Block* block) { if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { imm_predecessors.push_back(block); } } -std::span<IR::Block* const> Block::ImmediatePredecessors() const noexcept { - return imm_predecessors; -} - static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, Block* block) { if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index b14a35ec53..6a1d615d92 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -59,15 +59,22 @@ public: /// Gets the end location for this basic block. [[nodiscard]] u32 LocationEnd() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); + /// Gets a mutable reference to the instruction list for this basic block. - [[nodiscard]] InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept { + return instructions; + } /// Gets an immutable reference to the instruction list for this basic block. - [[nodiscard]] const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept { + return instructions; + } - /// Adds a new immediate predecessor to this basic block. - void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept { + return imm_predecessors; + } /// Intrusively store the host definition of this instruction. template <typename DefinitionType> -- cgit v1.2.3-70-g09d2 From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp <reinuseslisp@airmail.cc> Date: Fri, 14 May 2021 00:40:54 -0300 Subject: glasm: Rework control flow introducing a syntax list This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow. --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/glasm/emit_glasm.cpp | 41 +++- .../backend/glasm/emit_glasm_instructions.h | 7 +- .../backend/glasm/emit_glasm_integer.cpp | 9 +- .../backend/glasm/emit_glasm_not_implemented.cpp | 24 +-- .../backend/spirv/emit_context.cpp | 9 - src/shader_recompiler/backend/spirv/emit_context.h | 1 - src/shader_recompiler/backend/spirv/emit_spirv.cpp | 71 ++++++- .../backend/spirv/emit_spirv_control_flow.cpp | 33 +-- .../backend/spirv/emit_spirv_instructions.h | 9 +- .../frontend/ir/abstract_syntax_list.h | 56 +++++ src/shader_recompiler/frontend/ir/basic_block.cpp | 56 ++--- src/shader_recompiler/frontend/ir/basic_block.h | 51 +---- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 60 ++---- src/shader_recompiler/frontend/ir/ir_emitter.h | 11 +- .../frontend/ir/microinstruction.cpp | 11 +- src/shader_recompiler/frontend/ir/opcodes.h | 1 - src/shader_recompiler/frontend/ir/opcodes.inc | 21 +- src/shader_recompiler/frontend/ir/post_order.cpp | 36 ++-- src/shader_recompiler/frontend/ir/post_order.h | 3 +- src/shader_recompiler/frontend/ir/program.h | 4 +- src/shader_recompiler/frontend/ir/type.h | 49 +++-- src/shader_recompiler/frontend/ir/value.cpp | 8 - src/shader_recompiler/frontend/ir/value.h | 9 - src/shader_recompiler/frontend/maxwell/program.cpp | 24 ++- .../frontend/maxwell/structured_control_flow.cpp | 235 ++++++++++++--------- .../frontend/maxwell/structured_control_flow.h | 12 +- .../frontend/maxwell/translate/translate.cpp | 7 +- .../frontend/maxwell/translate/translate.h | 2 +- .../ir_opt/constant_propagation_pass.cpp | 20 -- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 56 +---- .../ir_opt/identity_removal_pass.cpp | 1 - src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- 33 files changed, 437 insertions(+), 505 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/abstract_syntax_list.h (limited to 'src/shader_recompiler/frontend/ir/basic_block.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f829b8d32c..0d55924a7c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -51,6 +51,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_warp.cpp environment.h exception.h + frontend/ir/abstract_syntax_list.h frontend/ir/attribute.cpp frontend/ir/attribute.h frontend/ir/basic_block.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 056d8cbf8d..51ca83d189 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -117,8 +117,6 @@ auto Arg(EmitContext& ctx, const IR::Value& arg) { return Identity<const IR::Value&>{arg}; } else if constexpr (std::is_same_v<ArgType, u32>) { return Identity{arg.U32()}; - } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { - return Identity{arg.Label()}; } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { return Identity{arg.Attribute()}; } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { @@ -177,6 +175,39 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { throw LogicError("Invalid opcode {}", inst->GetOpcode()); } +void EmitCode(EmitContext& ctx, const IR::Program& program) { + const auto eval{ + [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("MOV.S.CC RC,{};IF NE.x;", eval(node.if_node.cond)); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("ENDIF;"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("REP;"); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + ctx.Add("MOV.S.CC RC,{};BRK NE.x;ENDREP;", eval(node.repeat.cond)); + break; + case IR::AbstractSyntaxNode::Type::Break: + ctx.Add("MOV.S.CC RC,{};BRK NE.x;", eval(node.repeat.cond)); + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("RET;"); + break; + } + } +} + void SetupOptions(std::string& header, Info info) { if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; @@ -201,11 +232,7 @@ void SetupOptions(std::string& header, Info info) { std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { EmitContext ctx{program}; - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } - } + EmitCode(ctx, program); std::string header = "!!NVcp5.0\n" "OPTION NV_internal;"; SetupOptions(header, program.info); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 8202354fea..0f7f16e6e2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -22,13 +22,8 @@ class EmitContext; void EmitPhi(EmitContext& ctx, IR::Inst& inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBranch(EmitContext& ctx); -void EmitBranchConditional(EmitContext& ctx); -void EmitLoopMerge(EmitContext& ctx); -void EmitSelectionMerge(EmitContext& ctx); -void EmitReturn(EmitContext& ctx); +void EmitBranchConditionRef(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index 15fd233560..adcc0404b1 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -91,7 +91,8 @@ void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scalar if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); } else { - ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" "BFI.S {},RC,{},{};", count, offset, ret, insert, base); } @@ -103,7 +104,8 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); } else { - ctx.Add("MOV.S RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.S RC.x,{};" + "MOV.S RC.y,{};" "BFE.S {},RC,{};", count, offset, ret, base); } @@ -115,7 +117,8 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); } else { - ctx.Add("MOV.U RC.x,{};MOV.U RC.y,{};" + ctx.Add("MOV.U RC.x,{};" + "MOV.U RC.y,{};" "BFE.U {},RC,{};", count, offset, ret, base); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index b40d09f8c1..f37ad55879 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -23,34 +23,12 @@ void EmitPhi(EmitContext& ctx, IR::Inst& inst) { void EmitVoid(EmitContext&) {} -void EmitBranch(EmitContext& ctx) { - NotImplemented(); -} - -void EmitBranchConditional(EmitContext& ctx) { - NotImplemented(); -} - -void EmitLoopMerge(EmitContext& ctx) { - NotImplemented(); -} - -void EmitSelectionMerge(EmitContext& ctx) { - NotImplemented(); -} - -void EmitReturn(EmitContext& ctx) { - ctx.Add("RET;"); -} +void EmitBranchConditionRef(EmitContext&) {} void EmitJoin(EmitContext& ctx) { NotImplemented(); } -void EmitUnreachable(EmitContext& ctx) { - NotImplemented(); -} - void EmitDemoteToHelperInvocation(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 9759591bdd..a98e08392c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -463,7 +463,6 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings DefineImages(program.info, image_binding); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); - DefineLabels(program); } EmitContext::~EmitContext() = default; @@ -487,8 +486,6 @@ Id EmitContext::Def(const IR::Value& value) { return Const(value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); - case IR::Type::Label: - return value.Label()->Definition<Id>(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -1139,12 +1136,6 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineLabels(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - block->SetDefinition(OpLabel()); - } -} - void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8b000f1ec8..d2b79f6c17 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -296,7 +296,6 @@ private: void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); void DefineOutputs(const IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2dad87e872..c22edfec2a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -41,8 +41,6 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg; } else if constexpr (std::is_same_v<ArgType, u32>) { return arg.U32(); - } else if constexpr (std::is_same_v<ArgType, IR::Block*>) { - return arg.Label(); } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { return arg.Attribute(); } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { @@ -109,15 +107,74 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { } } +void Traverse(EmitContext& ctx, IR::Program& program) { + IR::Block* current_block{}; + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + const Id label{node.block->Definition<Id>()}; + if (current_block) { + ctx.OpBranch(label); + } + current_block = node.block; + ctx.AddLabel(label); + for (IR::Inst& inst : node.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: { + const Id if_label{node.if_node.body->Definition<Id>()}; + const Id endif_label{node.if_node.merge->Definition<Id>()}; + ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.Def(node.if_node.cond), if_label, endif_label); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: { + const Id body_label{node.loop.body->Definition<Id>()}; + const Id continue_label{node.loop.continue_block->Definition<Id>()}; + const Id endloop_label{node.loop.merge->Definition<Id>()}; + + ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + ctx.OpBranch(node.loop.body->Definition<Id>()); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + const Id break_label{node.break_node.merge->Definition<Id>()}; + const Id skip_label{node.break_node.skip->Definition<Id>()}; + ctx.OpBranchConditional(ctx.Def(node.break_node.cond), break_label, skip_label); + break; + } + case IR::AbstractSyntaxNode::Type::EndIf: + if (current_block) { + ctx.OpBranch(node.end_if.merge->Definition<Id>()); + } + break; + case IR::AbstractSyntaxNode::Type::Repeat: { + const Id loop_header_label{node.repeat.loop_header->Definition<Id>()}; + const Id merge_label{node.repeat.merge->Definition<Id>()}; + ctx.OpBranchConditional(ctx.Def(node.repeat.cond), loop_header_label, merge_label); + break; + } + case IR::AbstractSyntaxNode::Type::Return: + ctx.OpReturn(); + break; + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.OpUnreachable(); + break; + } + if (node.type != IR::AbstractSyntaxNode::Type::Block) { + current_block = nullptr; + } + } +} + Id DefineMain(EmitContext& ctx, IR::Program& program) { const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { - ctx.AddLabel(block->Definition<Id>()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } + block->SetDefinition(ctx.OpLabel()); } + Traverse(ctx, program); ctx.OpFunctionEnd(); return main; } @@ -411,6 +468,8 @@ Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return id; } +void EmitBranchConditionRef(EmitContext&) {} + void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6154c46be4..d33486f282 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -7,40 +7,21 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, Id label) { - ctx.OpBranch(label); -} - -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { - ctx.OpBranchConditional(condition, true_label, false_label); -} - -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { - ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); -} - -void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { - ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); -} - -void EmitReturn(EmitContext& ctx) { - ctx.OpReturn(); -} - void EmitJoin(EmitContext&) { throw NotImplementedException("Join shouldn't be emitted"); } -void EmitUnreachable(EmitContext& ctx) { - ctx.OpUnreachable(); -} - -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { +void EmitDemoteToHelperInvocation(EmitContext& ctx) { if (ctx.profile.support_demote_to_helper_invocation) { ctx.OpDemoteToHelperInvocationEXT(); - ctx.OpBranch(continue_label); } else { + const Id kill_label{ctx.OpLabel()}; + const Id impossible_label{ctx.OpLabel()}; + ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); + ctx.AddLabel(kill_label); ctx.OpKill(); + ctx.AddLabel(impossible_label); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a1ca3f43de..2f4f6e59ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -23,14 +23,9 @@ class EmitContext; Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); +void EmitBranchConditionRef(EmitContext&); void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitDemoteToHelperInvocation(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 0000000000..1366414c25 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class Block; + +struct AbstractSyntaxNode { + enum class Type { + Block, + If, + EndIf, + Loop, + Repeat, + Break, + Return, + Unreachable, + }; + Type type{}; + union { + Block* block{}; + struct { + U1 cond; + Block* body; + Block* merge; + } if_node; + struct { + Block* merge; + } end_if; + struct { + Block* body; + Block* continue_block; + Block* merge; + } loop; + struct { + U1 cond; + Block* loop_header; + Block* merge; + } repeat; + struct { + U1 cond; + Block* merge; + Block* skip; + } break_node; + }; +}; +using AbstractSyntaxList = std::vector<AbstractSyntaxNode>; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index f92fc2571c..7c08b25ce2 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,10 +14,7 @@ namespace Shader::IR { -Block::Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end) - : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} - -Block::Block(ObjectPool<Inst>& inst_pool_) : Block{inst_pool_, 0, 0} {} +Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {} Block::~Block() = default; @@ -40,39 +37,15 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { - branch_cond = cond; - branch_true = branch_true_; - branch_false = branch_false_; -} - -void Block::SetBranch(Block* branch) { - branch_cond = Condition{true}; - branch_true = branch; -} - -void Block::SetReturn() { - branch_cond = Condition{true}; - branch_true = nullptr; - branch_false = nullptr; -} - -bool Block::IsVirtual() const noexcept { - return location_begin == location_end; -} - -u32 Block::LocationBegin() const noexcept { - return location_begin; -} - -u32 Block::LocationEnd() const noexcept { - return location_end; -} - -void Block::AddImmediatePredecessor(Block* block) { - if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { - imm_predecessors.push_back(block); +void Block::AddBranch(Block* block) { + if (std::ranges::find(imm_successors, block) != imm_successors.end()) { + throw LogicError("Successor already inserted"); + } + if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { + throw LogicError("Predecessor already inserted"); } + imm_successors.push_back(block); + block->imm_predecessors.push_back(this); } static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, @@ -92,15 +65,11 @@ static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& in return it->second; } -static std::string ArgToIndex(const std::map<const Block*, size_t>& block_to_index, - std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, +static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return "<null>"; } - if (arg.IsLabel()) { - return BlockToIndex(block_to_index, arg.Label()); - } if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } @@ -140,8 +109,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { ret += fmt::format(" ${}", it->second); } - ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); - + ret += '\n'; for (const Inst& inst : block) { const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); @@ -153,7 +121,7 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; + const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_str, diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 0b0c97af6b..7e134b4c78 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -12,6 +12,7 @@ #include <boost/intrusive/list.hpp> #include "common/bit_cast.h" +#include "common/common_types.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" @@ -27,7 +28,6 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(ObjectPool<Inst>& inst_pool_, u32 begin, u32 end); explicit Block(ObjectPool<Inst>& inst_pool_); ~Block(); @@ -44,22 +44,8 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args = {}, u32 flags = 0); - /// Set the branches to jump to when all instructions have executed. - void SetBranches(Condition cond, Block* branch_true, Block* branch_false); - /// Set the branch to unconditionally jump to when all instructions have executed. - void SetBranch(Block* branch); - /// Mark the block as a return block. - void SetReturn(); - - /// Returns true when the block does not implement any guest instructions directly. - [[nodiscard]] bool IsVirtual() const noexcept; - /// Gets the starting location of this basic block. - [[nodiscard]] u32 LocationBegin() const noexcept; - /// Gets the end location for this basic block. - [[nodiscard]] u32 LocationEnd() const noexcept; - - /// Adds a new immediate predecessor to this basic block. - void AddImmediatePredecessor(Block* block); + /// Adds a new branch to this basic block. + void AddBranch(Block* block); /// Gets a mutable reference to the instruction list for this basic block. [[nodiscard]] InstructionList& Instructions() noexcept { @@ -71,9 +57,13 @@ public: } /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span<Block* const> ImmediatePredecessors() const noexcept { + [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept { return imm_predecessors; } + /// Gets an immutable span to the immediate successors. + [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept { + return imm_successors; + } /// Intrusively store the host definition of this instruction. template <typename DefinitionType> @@ -87,19 +77,6 @@ public: return Common::BitCast<DefinitionType>(definition); } - [[nodiscard]] Condition BranchCondition() const noexcept { - return branch_cond; - } - [[nodiscard]] bool IsTerminationBlock() const noexcept { - return !branch_true && !branch_false; - } - [[nodiscard]] Block* TrueBranch() const noexcept { - return branch_true; - } - [[nodiscard]] Block* FalseBranch() const noexcept { - return branch_false; - } - void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { ssa_reg_values[RegIndex(reg)] = value; } @@ -178,22 +155,14 @@ public: private: /// Memory pool for instruction list ObjectPool<Inst>* inst_pool; - /// Starting location of this block - u32 location_begin; - /// End location of this block - u32 location_end; /// List of instructions in this block InstructionList instructions; - /// Condition to choose the branch to take - Condition branch_cond{true}; - /// Block to jump into when the branch condition evaluates as true - Block* branch_true{nullptr}; - /// Block to jump into when the branch condition evaluates as false - Block* branch_false{nullptr}; /// Block immediate predecessors std::vector<Block*> imm_predecessors; + /// Block immediate successors + std::vector<Block*> imm_successors; /// Intrusively store the value of a register in the block. std::array<Value, NUM_REGS> ssa_reg_values; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce6c9af073..eb45aa4772 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,25 +61,28 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(Block* label) { - label->AddImmediatePredecessor(block); - block->SetBranch(label); - Inst(Opcode::Branch, label); +void IREmitter::Prologue() { + Inst(Opcode::Prologue); } -void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { - block->SetBranches(IR::Condition{true}, true_label, false_label); - true_label->AddImmediatePredecessor(block); - false_label->AddImmediatePredecessor(block); - Inst(Opcode::BranchConditional, condition, true_label, false_label); +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); } -void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { - Inst(Opcode::LoopMerge, merge_block, continue_target); +void IREmitter::BranchConditionRef(const U1& cond) { + Inst(Opcode::BranchConditionRef, cond); } -void IREmitter::SelectionMerge(Block* merge_block) { - Inst(Opcode::SelectionMerge, merge_block); +void IREmitter::DemoteToHelperInvocation() { + Inst(Opcode::DemoteToHelperInvocation); +} + +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); } void IREmitter::Barrier() { @@ -94,37 +97,6 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } -void IREmitter::Return() { - block->SetReturn(); - Inst(Opcode::Return); -} - -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); -} - -void IREmitter::DemoteToHelperInvocation(Block* continue_label) { - block->SetBranch(continue_label); - continue_label->AddImmediatePredecessor(block); - Inst(Opcode::DemoteToHelperInvocation, continue_label); -} - -void IREmitter::Prologue() { - Inst(Opcode::Prologue); -} - -void IREmitter::Epilogue() { - Inst(Opcode::Epilogue); -} - -void IREmitter::EmitVertex(const U32& stream) { - Inst(Opcode::EmitVertex, stream); -} - -void IREmitter::EndPrimitive(const U32& stream) { - Inst(Opcode::EndPrimitive, stream); -} - U32 IREmitter::GetReg(IR::Reg reg) { return Inst<U32>(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index fd41b7e893..7a83c33d33 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,17 +32,10 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(Block* label); - void BranchConditional(const U1& condition, Block* true_label, Block* false_label); - void LoopMerge(Block* merge_block, Block* continue_target); - void SelectionMerge(Block* merge_block); - void Return(); - void Unreachable(); - void DemoteToHelperInvocation(Block* continue_label); - void Prologue(); void Epilogue(); - + void BranchConditionRef(const U1& cond); + void DemoteToHelperInvocation(); void EmitVertex(const U32& stream); void EndPrimitive(const U32& stream); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 616ef17d4e..3645742401 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,19 +56,14 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { - case Opcode::Branch: - case Opcode::BranchConditional: - case Opcode::LoopMerge: - case Opcode::SelectionMerge: - case Opcode::Return: + case Opcode::Prologue: + case Opcode::Epilogue: + case Opcode::BranchConditionRef: case Opcode::Join: - case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: - case Opcode::Prologue: - case Opcode::Epilogue: case Opcode::EmitVertex: case Opcode::EndPrimitive: case Opcode::SetAttribute: diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 2b9c0ed8cc..56b001902c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -30,7 +30,6 @@ struct OpcodeMeta { // using enum Type; constexpr Type Void{Type::Void}; constexpr Type Opaque{Type::Opaque}; -constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9165421f89..75ddb6b6f8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -7,27 +7,20 @@ OPCODE(Phi, Opaque, OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) -// Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) +OPCODE(BranchConditionRef, Void, U1, ) OPCODE(Join, Void, ) -OPCODE(Unreachable, Void, ) -OPCODE(DemoteToHelperInvocation, Void, Label, ) +OPCODE(DemoteToHelperInvocation, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) // Barriers OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) -// Special operations -OPCODE(Prologue, Void, ) -OPCODE(Epilogue, Void, ) -OPCODE(EmitVertex, Void, U32, ) -OPCODE(EndPrimitive, Void, U32, ) - // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index 8709a2ea1e..1a28df7fbe 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> + #include <boost/container/flat_set.hpp> #include <boost/container/small_vector.hpp> @@ -10,35 +12,31 @@ namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks) { +BlockList PostOrder(const AbstractSyntaxNode& root) { boost::container::small_vector<Block*, 16> block_stack; boost::container::flat_set<Block*> visited; - BlockList post_order_blocks; - post_order_blocks.reserve(blocks.size()); - Block* const first_block{blocks.front()}; + if (root.type != AbstractSyntaxNode::Type::Block) { + throw LogicError("First node in abstract syntax list root is not a block"); + } + Block* const first_block{root.block}; visited.insert(first_block); block_stack.push_back(first_block); - const auto visit_branch = [&](Block* block, Block* branch) { - if (!branch) { - return false; - } - if (!visited.insert(branch).second) { - return false; - } - // Calling push_back twice is faster than insert on MSVC - block_stack.push_back(block); - block_stack.push_back(branch); - return true; - }; while (!block_stack.empty()) { Block* const block{block_stack.back()}; + const auto visit{[&](Block* branch) { + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on MSVC + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }}; block_stack.pop_back(); - - if (!visit_branch(block, block->TrueBranch()) && - !visit_branch(block, block->FalseBranch())) { + if (std::ranges::none_of(block->ImmSuccessors(), visit)) { post_order_blocks.push_back(block); } } diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 30137ff57a..58a0467a03 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -5,9 +5,10 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks); +BlockList PostOrder(const AbstractSyntaxNode& root); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 51e1a8c779..9ede5b48d9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -7,8 +7,7 @@ #include <array> #include <string> -#include <boost/container/small_vector.hpp> - +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" @@ -17,6 +16,7 @@ namespace Shader::IR { struct Program { + AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; Info info; diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 8b3b338528..294b230c49 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -16,31 +16,30 @@ namespace Shader::IR { enum class Type { Void = 0, Opaque = 1 << 0, - Label = 1 << 1, - Reg = 1 << 2, - Pred = 1 << 3, - Attribute = 1 << 4, - Patch = 1 << 5, - U1 = 1 << 6, - U8 = 1 << 7, - U16 = 1 << 8, - U32 = 1 << 9, - U64 = 1 << 10, - F16 = 1 << 11, - F32 = 1 << 12, - F64 = 1 << 13, - U32x2 = 1 << 14, - U32x3 = 1 << 15, - U32x4 = 1 << 16, - F16x2 = 1 << 17, - F16x3 = 1 << 18, - F16x4 = 1 << 19, - F32x2 = 1 << 20, - F32x3 = 1 << 21, - F32x4 = 1 << 22, - F64x2 = 1 << 23, - F64x3 = 1 << 24, - F64x4 = 1 << 25, + Reg = 1 << 1, + Pred = 1 << 2, + Attribute = 1 << 3, + Patch = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index b962f170d7..d365ea1bcb 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -9,8 +9,6 @@ namespace Shader::IR { Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} -Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {} - Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} @@ -33,10 +31,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} -bool Value::IsLabel() const noexcept { - return type == Type::Label; -} - IR::Type Value::Type() const noexcept { if (IsPhi()) { // The type of a phi node is stored in its flags @@ -60,8 +54,6 @@ bool Value::operator==(const Value& other) const { return true; case Type::Opaque: return inst == other.inst; - case Type::Label: - return label == other.label; case Type::Reg: return reg == other.reg; case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index beaf149f3f..2ce49f953d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -37,7 +37,6 @@ class Value { public: Value() noexcept = default; explicit Value(IR::Inst* value) noexcept; - explicit Value(IR::Block* value) noexcept; explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; @@ -54,11 +53,9 @@ public: [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; - [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; [[nodiscard]] IR::Inst* Inst() const; - [[nodiscard]] IR::Block* Label() const; [[nodiscard]] IR::Inst* InstRecursive() const; [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; @@ -80,7 +77,6 @@ private: IR::Type type{}; union { IR::Inst* inst{}; - IR::Block* label; IR::Reg reg; IR::Pred pred; IR::Attribute attribute; @@ -304,11 +300,6 @@ inline IR::Inst* Value::Inst() const { return inst; } -inline IR::Block* Value::Label() const { - DEBUG_ASSERT(type == Type::Label); - return label; -} - inline IR::Inst* Value::InstRecursive() const { DEBUG_ASSERT(type == Type::Opaque); if (IsIdentity()) { diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0d3f006991..017c4b8fdc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <memory> +#include <ranges> #include <vector> #include "shader_recompiler/frontend/ir/basic_block.h" @@ -15,6 +16,16 @@ namespace Shader::Maxwell { namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.block; }); + return blocks; +} + void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch @@ -23,7 +34,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { } const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); } @@ -110,8 +121,9 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - program.blocks = VisitAST(inst_pool, block_pool, env, cfg); - program.post_order_blocks = PostOrder(program.blocks); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); program.local_memory_size = env.LocalMemorySize(); switch (program.stage) { @@ -159,9 +171,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); std::swap(result.blocks, vertex_a.blocks); - for (IR::Block* block : vertex_b.blocks) { - result.blocks.push_back(block); - } + result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); @@ -173,7 +183,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.blocks); + result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cc5410c6df..e7e2e9c826 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -36,7 +36,6 @@ using Tree = boost::intrusive::list<Statement, // Avoid linear complexity on splice, size is never called boost::intrusive::constant_time_size<false>>; using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; enum class StatementType { Code, @@ -91,7 +90,8 @@ struct IndirectBranchCond {}; #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement #endif struct Statement : ListBaseHook { - Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(const Flow::Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} Statement(Goto, Statement* cond_, Node label_, Statement* up_) : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} @@ -125,7 +125,7 @@ struct Statement : ListBaseHook { } union { - IR::Block* code; + const Flow::Block* block; Node label; Tree children; IR::Condition guest_cond; @@ -171,8 +171,8 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->code->LocationBegin(), stmt->code->LocationEnd(), - reinterpret_cast<uintptr_t>(stmt->code)); + stmt->block->begin, stmt->block->end, + reinterpret_cast<uintptr_t>(stmt->block)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -407,11 +407,7 @@ private: }}; root.push_front(make_reset_variable()); root.insert(ip, make_reset_variable()); - - const u32 begin_offset{block.begin.Offset()}; - const u32 end_offset{block.end.Offset()}; - IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; - root.insert(ip, *pool.Create(ir_block, &root_stmt)); + root.insert(ip, *pool.Create(&block, &root_stmt)); switch (block.end_class) { case Flow::EndClass::Branch: { @@ -620,13 +616,13 @@ private: Statement root_stmt{FunctionTag{}}; }; -IR::Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; while (forward_node != end && !HasChildren(forward_node->type)) { if (forward_node->type == StatementType::Code) { - return forward_node->code; + return &*forward_node; } ++forward_node; } @@ -654,21 +650,29 @@ class TranslatePass { public: TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::BlockList& block_list_) + IR::AbstractSyntaxList& syntax_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, - block_list{block_list_} { + syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*block_list.front()}; + IR::Block& first_block{*syntax_list.front().block}; IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } private: - void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.block = current_block; + }}; Tree& tree{parent.children}; - IR::Block* current_block{nullptr}; - for (auto it = tree.begin(); it != tree.end(); ++it) { Statement& stmt{*it}; switch (stmt.type) { @@ -676,124 +680,157 @@ private: // Labels can be ignored break; case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IR::IREmitter{*current_block}.Branch(stmt.code); - } - current_block = stmt.code; - Translate(env, stmt.code); - block_list.push_back(stmt.code); + ensure_block(); + Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); break; } case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } case StatementType::SetIndirectBranchVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; ir.SetIndirectBranchVariable(address); break; } case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const merge_block{MergeBlock(parent, stmt)}; - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - // Implement if header block - IR::Block* const first_if_block{block_list.at(first_block_index)}; IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); + ir.BranchConditionRef(cond); + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.if_node.cond = cond; + if_node.if_node.body = then_block; + if_node.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; if (current_block) { - IR::IREmitter{*current_block}.Branch(loop_header_block); + current_block->AddBranch(loop_header_block); } - block_list.push_back(loop_header_block); + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.block = loop_header_block; - IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); - // Implement loop header block - IR::Block* const first_loop_block{block_list.at(first_block_index)}; - IR::IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); + IR::Block* const body_block{syntax_list.at(body_block_index).block}; + loop_header_block->AddBranch(body_block); - // Implement continue block - IR::IREmitter continue_ir{*new_continue_block}; - const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.loop.body = body_block; + loop.loop.continue_block = continue_block; + loop.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.repeat.cond = cond; + repeat.repeat.loop_header = loop_header_block; + repeat.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.break_node.cond = cond; + break_node.break_node.merge = break_block; + break_node.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = skip_block; break; } case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter ir{*current_block}; - ir.Epilogue(); - ir.Return(); + ensure_block(); + IR::IREmitter{*current_block}.Epilogue(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } case StatementType::Kill: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = demote_block; break; } case StatementType::Unreachable: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter{*current_block}.Unreachable(); + ensure_block(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; break; } default: @@ -801,42 +838,42 @@ private: } } if (current_block) { - IR::IREmitter ir{*current_block}; - if (continue_block) { - ir.Branch(continue_block); + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); } else { - ir.Unreachable(); + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; } } } IR::Block* MergeBlock(Statement& parent, Statement& stmt) { - if (IR::Block* const block{TryFindForwardBlock(stmt)}) { - return block; + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); } - // Create a merge block we can visit later - IR::Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; + return block_pool.Create(inst_pool); } ObjectPool<Statement>& stmt_pool; ObjectPool<IR::Inst>& inst_pool; ObjectPool<IR::Block>& block_pool; Environment& env; - IR::BlockList& block_list; + IR::AbstractSyntaxList& syntax_list; + // TODO: Make this constexpr when std::vector is constexpr + const Flow::Block dummy_flow_block; }; } // Anonymous namespace -IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, - Environment& env, Flow::CFG& cfg) { +IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, + Environment& env, Flow::CFG& cfg) { ObjectPool<Statement> stmt_pool{64}; GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; Statement& root{goto_pass.RootStatement()}; - IR::BlockList block_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; - return block_list; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + return syntax_list; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index a6be12ba2e..88b0836491 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -4,12 +4,8 @@ #pragma once -#include <functional> -#include <span> - -#include <boost/intrusive/list.hpp> - #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -17,8 +13,8 @@ namespace Shader::Maxwell { -[[nodiscard]] IR::BlockList VisitAST(ObjectPool<IR::Inst>& inst_pool, - ObjectPool<IR::Block>& block_pool, Environment& env, - Flow::CFG& cfg); +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, + ObjectPool<IR::Block>& block_pool, Environment& env, + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index f1230f58fe..0f4e7a251a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,13 +23,12 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -void Translate(Environment& env, IR::Block* block) { - if (block->IsVirtual()) { +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { return; } TranslatorVisitor visitor{env, *block}; - const Location pc_end{block->LocationEnd()}; - for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { + for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index e1aa2e0f4b..a3edd2e466 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -9,6 +9,6 @@ namespace Shader::Maxwell { -void Translate(Environment& env, IR::Block* block); +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index b1c45d13a8..66f1391dbf 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -353,24 +353,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; } -void FoldBranchConditional(IR::Inst& inst) { - const IR::U1 cond{inst.Arg(0)}; - if (cond.IsImmediate()) { - // TODO: Convert to Branch - return; - } - const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { - const IR::Value true_label{inst.Arg(1)}; - const IR::Value false_label{inst.Arg(2)}; - // Remove negation on the conditional (take the parameter out of LogicalNot) and swap - // the branches - inst.SetArg(0, cond_inst->Arg(0)); - inst.SetArg(1, false_label); - inst.SetArg(2, true_label); - } -} - std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; @@ -581,8 +563,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return (base & ~(~(~0u << bits) << offset)) | (insert << offset); }); return; - case IR::Opcode::BranchConditional: - return FoldBranchConditional(inst); case IR::Opcode::CompositeExtractF32x2: return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, IR::Opcode::CompositeInsertF32x2); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index f2d7db0e6b..b0a9f5258e 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,60 +13,16 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program& program) { - bool replaced_join{}; - bool eliminated_epilogue{}; - for (IR::Block* const block : program.post_order_blocks) { - for (IR::Inst& inst : block->Instructions()) { - switch (inst.GetOpcode()) { - case IR::Opcode::Return: - inst.ReplaceOpcode(IR::Opcode::Join); - replaced_join = true; - break; - case IR::Opcode::Epilogue: - inst.Invalidate(); - eliminated_epilogue = true; - break; - default: - break; - } - if (replaced_join && eliminated_epilogue) { - return; - } - } - } +void VertexATransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.blocks) { - for (IR::Inst& inst : block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Prologue) { - return inst.Invalidate(); - } - } - } +void VertexBTransformPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } -void DualVertexJoinPass(IR::Program& program) { - const auto& blocks = program.blocks; - const s64 sub_size = static_cast<s64>(blocks.size()) - 1; - if (sub_size < 1) { - throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); - } - for (s64 index = 0; index < sub_size; ++index) { - IR::Block* const current_block{blocks[index]}; - IR::Block* const next_block{blocks[index + 1]}; - for (IR::Inst& inst : current_block->Instructions()) { - if (inst.GetOpcode() == IR::Opcode::Join) { - IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; - ir.Branch(next_block); - inst.Invalidate(); - // Only 1 join should exist - return; - } - } - } - throw LogicError("Dual Vertex Join pass failed, no join present"); +void DualVertexJoinPass(IR::Program&) { + throw NotImplementedException("VertexA pass"); } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 6afbe24f7f..e9b55f8358 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -12,7 +12,6 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Program& program) { std::vector<IR::Inst*> to_invalidate; - for (IR::Block* const block : program.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index a8064a5d00..26eb3a3abc 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -202,7 +202,7 @@ public: incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; - } else if (const std::span imm_preds{block->ImmediatePredecessors()}; + } else if (const std::span imm_preds = block->ImmPredecessors(); imm_preds.size() == 1) { // Optimize the common case of one predecessor: no phi needed stack.back().pc = Status::SetValue; @@ -257,7 +257,7 @@ public: private: template <typename Type> IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { - for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { + for (IR::Block* const imm_pred : block->ImmPredecessors()) { phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); -- cgit v1.2.3-70-g09d2