ShaderIR: Refactor shader compilation to use functions.
This commit is contained in:
@@ -491,6 +491,8 @@ private:
|
||||
const Registry& registry;
|
||||
const ShaderType stage;
|
||||
|
||||
std::shared_ptr<ShaderFunctionIR> context_func;
|
||||
|
||||
std::size_t num_temporaries = 0;
|
||||
std::size_t max_temporaries = 0;
|
||||
|
||||
@@ -807,10 +809,12 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
|
||||
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
|
||||
DefineGlobalMemory();
|
||||
|
||||
context_func = ir.GetMainFunction();
|
||||
|
||||
AddLine("TEMP RC;");
|
||||
AddLine("TEMP FSWZA[4];");
|
||||
AddLine("TEMP FSWZB[4];");
|
||||
if (ir.IsDecompiled()) {
|
||||
if (context_func->IsDecompiled()) {
|
||||
DecompileAST();
|
||||
} else {
|
||||
DecompileBranchMode();
|
||||
@@ -1060,7 +1064,7 @@ void ARBDecompiler::InitializeVariables() {
|
||||
}
|
||||
|
||||
void ARBDecompiler::DecompileAST() {
|
||||
const u32 num_flow_variables = ir.GetASTNumVariables();
|
||||
const u32 num_flow_variables = context_func->GetASTNumVariables();
|
||||
for (u32 i = 0; i < num_flow_variables; ++i) {
|
||||
AddLine("TEMP F{};", i);
|
||||
}
|
||||
@@ -1070,12 +1074,12 @@ void ARBDecompiler::DecompileAST() {
|
||||
|
||||
InitializeVariables();
|
||||
|
||||
VisitAST(ir.GetASTProgram());
|
||||
VisitAST(context_func->GetASTProgram());
|
||||
}
|
||||
|
||||
void ARBDecompiler::DecompileBranchMode() {
|
||||
static constexpr u32 FLOW_STACK_SIZE = 20;
|
||||
if (!ir.IsFlowStackDisabled()) {
|
||||
if (!context_func->IsFlowStackDisabled()) {
|
||||
AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
|
||||
AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
|
||||
AddLine("TEMP SSY_TOP;");
|
||||
@@ -1084,15 +1088,15 @@ void ARBDecompiler::DecompileBranchMode() {
|
||||
|
||||
AddLine("TEMP PC;");
|
||||
|
||||
if (!ir.IsFlowStackDisabled()) {
|
||||
if (!context_func->IsFlowStackDisabled()) {
|
||||
AddLine("MOV.U SSY_TOP.x, 0;");
|
||||
AddLine("MOV.U PBK_TOP.x, 0;");
|
||||
}
|
||||
|
||||
InitializeVariables();
|
||||
|
||||
const auto basic_block_end = ir.GetBasicBlocks().end();
|
||||
auto basic_block_it = ir.GetBasicBlocks().begin();
|
||||
const auto basic_block_end = context_func->GetBasicBlocks().end();
|
||||
auto basic_block_it = context_func->GetBasicBlocks().begin();
|
||||
const u32 first_address = basic_block_it->first;
|
||||
AddLine("MOV.U PC.x, {};", first_address);
|
||||
|
||||
@@ -1174,7 +1178,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
|
||||
if (ast_return->kills) {
|
||||
AddLine("KIL TR;");
|
||||
} else {
|
||||
Exit();
|
||||
if (context_func->IsMain()) {
|
||||
Exit();
|
||||
} else {
|
||||
AddLine("RET;");
|
||||
}
|
||||
}
|
||||
if (!is_true) {
|
||||
AddLine("ENDIF;");
|
||||
|
||||
@@ -435,6 +435,8 @@ public:
|
||||
DeclareCustomVariables();
|
||||
DeclarePhysicalAttributeReader();
|
||||
|
||||
context_func = ir.GetMainFunction();
|
||||
|
||||
code.AddLine("void main() {{");
|
||||
++code.scope;
|
||||
|
||||
@@ -442,7 +444,7 @@ public:
|
||||
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
|
||||
}
|
||||
|
||||
if (ir.IsDecompiled()) {
|
||||
if (context_func->IsDecompiled()) {
|
||||
DecompileAST();
|
||||
} else {
|
||||
DecompileBranchMode();
|
||||
@@ -462,13 +464,13 @@ private:
|
||||
|
||||
void DecompileBranchMode() {
|
||||
// VM's program counter
|
||||
const auto first_address = ir.GetBasicBlocks().begin()->first;
|
||||
const auto first_address = context_func->GetBasicBlocks().begin()->first;
|
||||
code.AddLine("uint jmp_to = {}U;", first_address);
|
||||
|
||||
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
|
||||
// unlikely that shaders will use 20 nested SSYs and PBKs.
|
||||
constexpr u32 FLOW_STACK_SIZE = 20;
|
||||
if (!ir.IsFlowStackDisabled()) {
|
||||
if (!context_func->IsFlowStackDisabled()) {
|
||||
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
|
||||
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
|
||||
code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
|
||||
@@ -480,7 +482,7 @@ private:
|
||||
|
||||
code.AddLine("switch (jmp_to) {{");
|
||||
|
||||
for (const auto& pair : ir.GetBasicBlocks()) {
|
||||
for (const auto& pair : context_func->GetBasicBlocks()) {
|
||||
const auto& [address, bb] = pair;
|
||||
code.AddLine("case 0x{:X}U: {{", address);
|
||||
++code.scope;
|
||||
@@ -2388,7 +2390,7 @@ private:
|
||||
}
|
||||
|
||||
Expression Barrier(Operation) {
|
||||
if (!ir.IsDecompiled()) {
|
||||
if (!context_func->IsDecompiled()) {
|
||||
LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
|
||||
return {};
|
||||
}
|
||||
@@ -2755,6 +2757,8 @@ private:
|
||||
const Header header;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
std::shared_ptr<ShaderFunctionIR> context_func;
|
||||
|
||||
ShaderWriter code;
|
||||
|
||||
std::optional<u32> max_input_vertices;
|
||||
@@ -2904,7 +2908,9 @@ public:
|
||||
if (ast.kills) {
|
||||
decomp.code.AddLine("discard;");
|
||||
} else {
|
||||
decomp.PreExit();
|
||||
if (decomp.context_func->IsMain()) {
|
||||
decomp.PreExit();
|
||||
}
|
||||
decomp.code.AddLine("return;");
|
||||
}
|
||||
if (!is_true) {
|
||||
@@ -2937,13 +2943,13 @@ private:
|
||||
};
|
||||
|
||||
void GLSLDecompiler::DecompileAST() {
|
||||
const u32 num_flow_variables = ir.GetASTNumVariables();
|
||||
const u32 num_flow_variables = context_func->GetASTNumVariables();
|
||||
for (u32 i = 0; i < num_flow_variables; i++) {
|
||||
code.AddLine("bool {} = false;", GetFlowVariable(i));
|
||||
}
|
||||
|
||||
ASTDecompiler decompiler{*this};
|
||||
decompiler.Visit(ir.GetASTProgram());
|
||||
decompiler.Visit(context_func->GetASTProgram());
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
@@ -406,10 +406,12 @@ private:
|
||||
binding = DeclareStorageTexels(binding);
|
||||
binding = DeclareImages(binding);
|
||||
|
||||
context_func = ir.GetMainFunction();
|
||||
|
||||
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
|
||||
AddLabel();
|
||||
|
||||
if (ir.IsDecompiled()) {
|
||||
if (context_func->IsDecompiled()) {
|
||||
DeclareFlowVariables();
|
||||
DecompileAST();
|
||||
} else {
|
||||
@@ -441,7 +443,7 @@ private:
|
||||
void DecompileAST();
|
||||
|
||||
void DecompileBranchMode() {
|
||||
const u32 first_address = ir.GetBasicBlocks().begin()->first;
|
||||
const u32 first_address = context_func->GetBasicBlocks().begin()->first;
|
||||
const Id loop_label = OpLabel("loop");
|
||||
const Id merge_label = OpLabel("merge");
|
||||
const Id dummy_label = OpLabel();
|
||||
@@ -484,7 +486,7 @@ private:
|
||||
AddLabel(default_branch);
|
||||
OpReturn();
|
||||
|
||||
for (const auto& [address, bb] : ir.GetBasicBlocks()) {
|
||||
for (const auto& [address, bb] : context_func->GetBasicBlocks()) {
|
||||
AddLabel(labels.at(address));
|
||||
|
||||
VisitBasicBlock(bb);
|
||||
@@ -508,7 +510,7 @@ private:
|
||||
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
|
||||
|
||||
void AllocateLabels() {
|
||||
for (const auto& pair : ir.GetBasicBlocks()) {
|
||||
for (const auto& pair : context_func->GetBasicBlocks()) {
|
||||
const u32 address = pair.first;
|
||||
labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
|
||||
}
|
||||
@@ -656,7 +658,7 @@ private:
|
||||
}
|
||||
|
||||
void DeclareFlowVariables() {
|
||||
for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
|
||||
for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) {
|
||||
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
|
||||
Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
|
||||
flow_variables.emplace(i, AddGlobalVariable(id));
|
||||
@@ -2276,7 +2278,7 @@ private:
|
||||
}
|
||||
|
||||
Expression Barrier(Operation) {
|
||||
if (!ir.IsDecompiled()) {
|
||||
if (!context_func->IsDecompiled()) {
|
||||
LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
|
||||
return {};
|
||||
}
|
||||
@@ -2770,6 +2772,8 @@ private:
|
||||
const Specialization& specialization;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
std::shared_ptr<ShaderFunctionIR> context_func;
|
||||
|
||||
const Id t_void = Name(TypeVoid(), "void");
|
||||
|
||||
const Id t_bool = Name(TypeBool(), "bool");
|
||||
@@ -3049,7 +3053,9 @@ public:
|
||||
if (ast.kills) {
|
||||
decomp.OpKill();
|
||||
} else {
|
||||
decomp.PreExit();
|
||||
if (decomp.context_func->IsMain()) {
|
||||
decomp.PreExit();
|
||||
}
|
||||
decomp.OpReturn();
|
||||
}
|
||||
decomp.AddLabel(endif_label);
|
||||
@@ -3097,7 +3103,7 @@ private:
|
||||
};
|
||||
|
||||
void SPIRVDecompiler::DecompileAST() {
|
||||
const u32 num_flow_variables = ir.GetASTNumVariables();
|
||||
const u32 num_flow_variables = context_func->GetASTNumVariables();
|
||||
for (u32 i = 0; i < num_flow_variables; i++) {
|
||||
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
|
||||
Name(id, fmt::format("flow_var_{}", i));
|
||||
@@ -3106,7 +3112,7 @@ void SPIRVDecompiler::DecompileAST() {
|
||||
|
||||
DefinePrologue();
|
||||
|
||||
const ASTNode program = ir.GetASTProgram();
|
||||
const ASTNode program = context_func->GetASTProgram();
|
||||
ASTDecompiler decompiler{*this};
|
||||
decompiler.Visit(program);
|
||||
|
||||
|
||||
@@ -128,75 +128,98 @@ private:
|
||||
};
|
||||
|
||||
void ShaderIR::Decode() {
|
||||
const auto decode_function = ([this](ShaderFunction& shader_info) {
|
||||
coverage_end = std::max<u32>(0, shader_info.end);
|
||||
switch (shader_info.settings.depth) {
|
||||
case CompileDepth::FlowStack: {
|
||||
for (const auto& block : shader_info.blocks) {
|
||||
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CompileDepth::NoFlowStack: {
|
||||
disable_flow_stack = true;
|
||||
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
|
||||
if (label == static_cast<u32>(exit_branch)) {
|
||||
return;
|
||||
}
|
||||
basic_blocks.insert({label, nodes});
|
||||
};
|
||||
const auto& blocks = shader_info.blocks;
|
||||
NodeBlock current_block;
|
||||
u32 current_label = static_cast<u32>(exit_branch);
|
||||
for (const auto& block : blocks) {
|
||||
if (shader_info.labels.contains(block.start)) {
|
||||
insert_block(current_block, current_label);
|
||||
current_block.clear();
|
||||
current_label = block.start;
|
||||
}
|
||||
if (!block.ignore_branch) {
|
||||
DecodeRangeInner(current_block, block.start, block.end);
|
||||
InsertControlFlow(current_block, block);
|
||||
} else {
|
||||
DecodeRangeInner(current_block, block.start, block.end + 1);
|
||||
}
|
||||
}
|
||||
insert_block(current_block, current_label);
|
||||
break;
|
||||
}
|
||||
case CompileDepth::DecompileBackwards:
|
||||
case CompileDepth::FullDecompile: {
|
||||
program_manager = std::move(shader_info.manager);
|
||||
disable_flow_stack = true;
|
||||
decompiled = true;
|
||||
ASTDecoder decoder{*this};
|
||||
ASTNode program = program_manager.GetProgram();
|
||||
decoder.Visit(program);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
|
||||
[[fallthrough]];
|
||||
case CompileDepth::BruteForce: {
|
||||
const auto shader_end = static_cast<u32>(program_code.size());
|
||||
coverage_begin = main_offset;
|
||||
coverage_end = shader_end;
|
||||
for (u32 label = main_offset; label < shader_end; ++label) {
|
||||
basic_blocks.insert({label, DecodeRange(label, label + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (settings.depth != shader_info.settings.depth) {
|
||||
LOG_WARNING(
|
||||
HW_GPU,
|
||||
"Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
|
||||
CompileDepthAsString(settings.depth),
|
||||
CompileDepthAsString(shader_info.settings.depth));
|
||||
}
|
||||
});
|
||||
const auto gen_function =
|
||||
([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr<ShaderFunctionIR> {
|
||||
std::shared_ptr<ShaderFunctionIR> result;
|
||||
if (decompiled) {
|
||||
result = std::make_shared<ShaderFunctionIR>(std::move(program_manager), id,
|
||||
shader_info.start, shader_info.end);
|
||||
} else {
|
||||
result =
|
||||
std::make_shared<ShaderFunctionIR>(std::move(basic_blocks), disable_flow_stack,
|
||||
id, shader_info.start, shader_info.end);
|
||||
}
|
||||
decompiled = false;
|
||||
disable_flow_stack = false;
|
||||
basic_blocks.clear();
|
||||
program_manager.Clear();
|
||||
return result;
|
||||
});
|
||||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||
|
||||
decompiled = false;
|
||||
auto info = ScanFlow(program_code, main_offset, settings, registry);
|
||||
auto& shader_info = info->main;
|
||||
coverage_begin = shader_info.start;
|
||||
coverage_end = shader_info.end;
|
||||
switch (shader_info.settings.depth) {
|
||||
case CompileDepth::FlowStack: {
|
||||
for (const auto& block : shader_info.blocks) {
|
||||
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CompileDepth::NoFlowStack: {
|
||||
disable_flow_stack = true;
|
||||
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
|
||||
if (label == static_cast<u32>(exit_branch)) {
|
||||
return;
|
||||
}
|
||||
basic_blocks.insert({label, nodes});
|
||||
};
|
||||
const auto& blocks = shader_info.blocks;
|
||||
NodeBlock current_block;
|
||||
u32 current_label = static_cast<u32>(exit_branch);
|
||||
for (const auto& block : blocks) {
|
||||
if (shader_info.labels.contains(block.start)) {
|
||||
insert_block(current_block, current_label);
|
||||
current_block.clear();
|
||||
current_label = block.start;
|
||||
}
|
||||
if (!block.ignore_branch) {
|
||||
DecodeRangeInner(current_block, block.start, block.end);
|
||||
InsertControlFlow(current_block, block);
|
||||
} else {
|
||||
DecodeRangeInner(current_block, block.start, block.end + 1);
|
||||
}
|
||||
}
|
||||
insert_block(current_block, current_label);
|
||||
break;
|
||||
}
|
||||
case CompileDepth::DecompileBackwards:
|
||||
case CompileDepth::FullDecompile: {
|
||||
program_manager = std::move(shader_info.manager);
|
||||
disable_flow_stack = true;
|
||||
decompiled = true;
|
||||
ASTDecoder decoder{*this};
|
||||
ASTNode program = GetASTProgram();
|
||||
decoder.Visit(program);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
|
||||
[[fallthrough]];
|
||||
case CompileDepth::BruteForce: {
|
||||
const auto shader_end = static_cast<u32>(program_code.size());
|
||||
coverage_begin = main_offset;
|
||||
coverage_end = shader_end;
|
||||
for (u32 label = main_offset; label < shader_end; ++label) {
|
||||
basic_blocks.insert({label, DecodeRange(label, label + 1)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (settings.depth != shader_info.settings.depth) {
|
||||
LOG_WARNING(
|
||||
HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
|
||||
CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
|
||||
}
|
||||
coverage_begin = info->main.start;
|
||||
coverage_end = 0;
|
||||
decode_function(info->main);
|
||||
main_function = gen_function(info->main, 0);
|
||||
}
|
||||
|
||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||
|
||||
@@ -64,16 +64,68 @@ struct GlobalMemoryUsage {
|
||||
bool is_written{};
|
||||
};
|
||||
|
||||
class ShaderFunctionIR final {
|
||||
public:
|
||||
explicit ShaderFunctionIR(std::map<u32, NodeBlock>&& basic_blocks_, bool disable_flow_stack_,
|
||||
u32 id_, u32 coverage_begin_, u32 coverage_end_)
|
||||
: basic_blocks{std::move(basic_blocks_)}, decompiled{false},
|
||||
disable_flow_stack{disable_flow_stack}, id{id_}, coverage_begin{coverage_begin_},
|
||||
coverage_end{coverage_end_} {}
|
||||
explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_,
|
||||
u32 coverage_end_)
|
||||
: program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true},
|
||||
id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {}
|
||||
|
||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||
return basic_blocks;
|
||||
}
|
||||
|
||||
bool IsFlowStackDisabled() const {
|
||||
return disable_flow_stack;
|
||||
}
|
||||
|
||||
bool IsDecompiled() const {
|
||||
return decompiled;
|
||||
}
|
||||
|
||||
const ASTManager& GetASTManager() const {
|
||||
return program_manager;
|
||||
}
|
||||
|
||||
[[nodiscard]] ASTNode GetASTProgram() const {
|
||||
return program_manager.GetProgram();
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 GetASTNumVariables() const {
|
||||
return program_manager.GetVariables();
|
||||
}
|
||||
|
||||
bool IsMain() const {
|
||||
return id == 0;
|
||||
}
|
||||
|
||||
u32 GetId() const {
|
||||
return id;
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<u32, NodeBlock> basic_blocks;
|
||||
ASTManager program_manager{true, true};
|
||||
|
||||
bool decompiled{};
|
||||
bool disable_flow_stack{};
|
||||
u32 id{};
|
||||
|
||||
u32 coverage_begin{};
|
||||
u32 coverage_end{};
|
||||
};
|
||||
|
||||
class ShaderIR final {
|
||||
public:
|
||||
explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
|
||||
CompilerSettings settings_, Registry& registry_);
|
||||
~ShaderIR();
|
||||
|
||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||
return basic_blocks;
|
||||
}
|
||||
|
||||
const std::set<u32>& GetRegisters() const {
|
||||
return used_registers;
|
||||
}
|
||||
@@ -155,26 +207,6 @@ public:
|
||||
return header;
|
||||
}
|
||||
|
||||
bool IsFlowStackDisabled() const {
|
||||
return disable_flow_stack;
|
||||
}
|
||||
|
||||
bool IsDecompiled() const {
|
||||
return decompiled;
|
||||
}
|
||||
|
||||
const ASTManager& GetASTManager() const {
|
||||
return program_manager;
|
||||
}
|
||||
|
||||
ASTNode GetASTProgram() const {
|
||||
return program_manager.GetProgram();
|
||||
}
|
||||
|
||||
u32 GetASTNumVariables() const {
|
||||
return program_manager.GetVariables();
|
||||
}
|
||||
|
||||
u32 ConvertAddressToNvidiaSpace(u32 address) const {
|
||||
return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
|
||||
}
|
||||
@@ -190,6 +222,10 @@ public:
|
||||
return num_custom_variables;
|
||||
}
|
||||
|
||||
std::shared_ptr<ShaderFunctionIR> GetMainFunction() const {
|
||||
return main_function;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ASTDecoder;
|
||||
|
||||
@@ -453,6 +489,9 @@ private:
|
||||
std::vector<Node> amend_code;
|
||||
u32 num_custom_variables{};
|
||||
|
||||
std::shared_ptr<ShaderFunctionIR> main_function;
|
||||
std::vector<std::shared_ptr<ShaderFunctionIR>> subfunctions;
|
||||
|
||||
std::set<u32> used_registers;
|
||||
std::set<Tegra::Shader::Pred> used_predicates;
|
||||
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
|
||||
|
||||
Reference in New Issue
Block a user