From b75a9b20e467bbd3929fbf4f355fdc18a75f33ba Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 3 Jan 2021 11:07:34 +0100 Subject: [PATCH] ShaderIR: Refactor shader compilation to use functions. --- .../renderer_opengl/gl_arb_decompiler.cpp | 24 ++- .../renderer_opengl/gl_shader_decompiler.cpp | 22 ++- .../renderer_vulkan/vk_shader_decompiler.cpp | 24 +-- src/video_core/shader/decode.cpp | 153 ++++++++++-------- src/video_core/shader/shader_ir.h | 87 +++++++--- 5 files changed, 196 insertions(+), 114 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index e8d8d2aa55..a1f5a1f8c3 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -491,6 +491,8 @@ private: const Registry& registry; const ShaderType stage; + std::shared_ptr context_func; + std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; @@ -807,10 +809,12 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { DefineGlobalMemory(); + context_func = ir.GetMainFunction(); + AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { + if (context_func->IsDecompiled()) { DecompileAST(); } else { DecompileBranchMode(); @@ -1060,7 +1064,7 @@ void ARBDecompiler::InitializeVariables() { } void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); + const u32 num_flow_variables = context_func->GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; ++i) { AddLine("TEMP F{};", i); } @@ -1070,12 +1074,12 @@ void ARBDecompiler::DecompileAST() { InitializeVariables(); - VisitAST(ir.GetASTProgram()); + VisitAST(context_func->GetASTProgram()); } void ARBDecompiler::DecompileBranchMode() { static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { + if (!context_func->IsFlowStackDisabled()) { AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); AddLine("TEMP SSY_TOP;"); @@ -1084,15 +1088,15 @@ void ARBDecompiler::DecompileBranchMode() { AddLine("TEMP PC;"); - if (!ir.IsFlowStackDisabled()) { + if (!context_func->IsFlowStackDisabled()) { AddLine("MOV.U SSY_TOP.x, 0;"); AddLine("MOV.U PBK_TOP.x, 0;"); } InitializeVariables(); - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); + const auto basic_block_end = context_func->GetBasicBlocks().end(); + auto basic_block_it = context_func->GetBasicBlocks().begin(); const u32 first_address = basic_block_it->first; AddLine("MOV.U PC.x, {};", first_address); @@ -1174,7 +1178,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { if (ast_return->kills) { AddLine("KIL TR;"); } else { - Exit(); + if (context_func->IsMain()) { + Exit(); + } else { + AddLine("RET;"); + } } if (!is_true) { AddLine("ENDIF;"); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 9c28498e85..7d9d627a42 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -435,6 +435,8 @@ public: DeclareCustomVariables(); DeclarePhysicalAttributeReader(); + context_func = ir.GetMainFunction(); + code.AddLine("void main() {{"); ++code.scope; @@ -442,7 +444,7 @@ public: code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); } - if (ir.IsDecompiled()) { + if (context_func->IsDecompiled()) { DecompileAST(); } else { DecompileBranchMode(); @@ -462,13 +464,13 @@ private: void DecompileBranchMode() { // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; + const auto first_address = context_func->GetBasicBlocks().begin()->first; code.AddLine("uint jmp_to = {}U;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { + if (!context_func->IsFlowStackDisabled()) { for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); @@ -480,7 +482,7 @@ private: code.AddLine("switch (jmp_to) {{"); - for (const auto& pair : ir.GetBasicBlocks()) { + for (const auto& pair : context_func->GetBasicBlocks()) { const auto& [address, bb] = pair; code.AddLine("case 0x{:X}U: {{", address); ++code.scope; @@ -2388,7 +2390,7 @@ private: } Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { + if (!context_func->IsDecompiled()) { LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); return {}; } @@ -2755,6 +2757,8 @@ private: const Header header; std::unordered_map transform_feedback; + std::shared_ptr context_func; + ShaderWriter code; std::optional max_input_vertices; @@ -2904,7 +2908,9 @@ public: if (ast.kills) { decomp.code.AddLine("discard;"); } else { - decomp.PreExit(); + if (decomp.context_func->IsMain()) { + decomp.PreExit(); + } decomp.code.AddLine("return;"); } if (!is_true) { @@ -2937,13 +2943,13 @@ private: }; void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); + const u32 num_flow_variables = context_func->GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; i++) { code.AddLine("bool {} = false;", GetFlowVariable(i)); } ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); + decompiler.Visit(context_func->GetASTProgram()); } } // Anonymous namespace diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index c6846d8861..4ead6c5f8b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -406,10 +406,12 @@ private: binding = DeclareStorageTexels(binding); binding = DeclareImages(binding); + context_func = ir.GetMainFunction(); + const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); AddLabel(); - if (ir.IsDecompiled()) { + if (context_func->IsDecompiled()) { DeclareFlowVariables(); DecompileAST(); } else { @@ -441,7 +443,7 @@ private: void DecompileAST(); void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; + const u32 first_address = context_func->GetBasicBlocks().begin()->first; const Id loop_label = OpLabel("loop"); const Id merge_label = OpLabel("merge"); const Id dummy_label = OpLabel(); @@ -484,7 +486,7 @@ private: AddLabel(default_branch); OpReturn(); - for (const auto& [address, bb] : ir.GetBasicBlocks()) { + for (const auto& [address, bb] : context_func->GetBasicBlocks()) { AddLabel(labels.at(address)); VisitBasicBlock(bb); @@ -508,7 +510,7 @@ private: static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { + for (const auto& pair : context_func->GetBasicBlocks()) { const u32 address = pair.first; labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); } @@ -656,7 +658,7 @@ private: } void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { + for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); Name(id, fmt::format("flow_var_{}", static_cast(i))); flow_variables.emplace(i, AddGlobalVariable(id)); @@ -2276,7 +2278,7 @@ private: } Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { + if (!context_func->IsDecompiled()) { LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); return {}; } @@ -2770,6 +2772,8 @@ private: const Specialization& specialization; std::unordered_map transform_feedback; + std::shared_ptr context_func; + const Id t_void = Name(TypeVoid(), "void"); const Id t_bool = Name(TypeBool(), "bool"); @@ -3049,7 +3053,9 @@ public: if (ast.kills) { decomp.OpKill(); } else { - decomp.PreExit(); + if (decomp.context_func->IsMain()) { + decomp.PreExit(); + } decomp.OpReturn(); } decomp.AddLabel(endif_label); @@ -3097,7 +3103,7 @@ private: }; void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); + const u32 num_flow_variables = context_func->GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; i++) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); Name(id, fmt::format("flow_var_{}", i)); @@ -3106,7 +3112,7 @@ void SPIRVDecompiler::DecompileAST() { DefinePrologue(); - const ASTNode program = ir.GetASTProgram(); + const ASTNode program = context_func->GetASTProgram(); ASTDecompiler decompiler{*this}; decompiler.Visit(program); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index bbc7a6f5df..eaa8b46bb0 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -128,75 +128,98 @@ private: }; void ShaderIR::Decode() { + const auto decode_function = ([this](ShaderFunction& shader_info) { + coverage_end = std::max(0, shader_info.end); + switch (shader_info.settings.depth) { + case CompileDepth::FlowStack: { + for (const auto& block : shader_info.blocks) { + basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); + } + break; + } + case CompileDepth::NoFlowStack: { + disable_flow_stack = true; + const auto insert_block = [this](NodeBlock& nodes, u32 label) { + if (label == static_cast(exit_branch)) { + return; + } + basic_blocks.insert({label, nodes}); + }; + const auto& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = static_cast(exit_branch); + for (const auto& block : blocks) { + if (shader_info.labels.contains(block.start)) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } + if (!block.ignore_branch) { + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); + } else { + DecodeRangeInner(current_block, block.start, block.end + 1); + } + } + insert_block(current_block, current_label); + break; + } + case CompileDepth::DecompileBackwards: + case CompileDepth::FullDecompile: { + program_manager = std::move(shader_info.manager); + disable_flow_stack = true; + decompiled = true; + ASTDecoder decoder{*this}; + ASTNode program = program_manager.GetProgram(); + decoder.Visit(program); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); + [[fallthrough]]; + case CompileDepth::BruteForce: { + const auto shader_end = static_cast(program_code.size()); + coverage_begin = main_offset; + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; ++label) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); + } + break; + } + } + if (settings.depth != shader_info.settings.depth) { + LOG_WARNING( + HW_GPU, + "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", + CompileDepthAsString(settings.depth), + CompileDepthAsString(shader_info.settings.depth)); + } + }); + const auto gen_function = + ([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr { + std::shared_ptr result; + if (decompiled) { + result = std::make_shared(std::move(program_manager), id, + shader_info.start, shader_info.end); + } else { + result = + std::make_shared(std::move(basic_blocks), disable_flow_stack, + id, shader_info.start, shader_info.end); + } + decompiled = false; + disable_flow_stack = false; + basic_blocks.clear(); + program_manager.Clear(); + return result; + }); std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = info->main; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } + coverage_begin = info->main.start; + coverage_end = 0; + decode_function(info->main); + main_function = gen_function(info->main, 0); } NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 9f1bc2eb41..b7a33d0ae8 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -64,16 +64,68 @@ struct GlobalMemoryUsage { bool is_written{}; }; +class ShaderFunctionIR final { +public: + explicit ShaderFunctionIR(std::map&& basic_blocks_, bool disable_flow_stack_, + u32 id_, u32 coverage_begin_, u32 coverage_end_) + : basic_blocks{std::move(basic_blocks_)}, decompiled{false}, + disable_flow_stack{disable_flow_stack}, id{id_}, coverage_begin{coverage_begin_}, + coverage_end{coverage_end_} {} + explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_, + u32 coverage_end_) + : program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true}, + id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {} + + const std::map& GetBasicBlocks() const { + return basic_blocks; + } + + bool IsFlowStackDisabled() const { + return disable_flow_stack; + } + + bool IsDecompiled() const { + return decompiled; + } + + const ASTManager& GetASTManager() const { + return program_manager; + } + + [[nodiscard]] ASTNode GetASTProgram() const { + return program_manager.GetProgram(); + } + + [[nodiscard]] u32 GetASTNumVariables() const { + return program_manager.GetVariables(); + } + + bool IsMain() const { + return id == 0; + } + + u32 GetId() const { + return id; + } + +private: + std::map basic_blocks; + ASTManager program_manager{true, true}; + + bool decompiled{}; + bool disable_flow_stack{}; + u32 id{}; + + u32 coverage_begin{}; + u32 coverage_end{}; +}; + class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, Registry& registry_); ~ShaderIR(); - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - const std::set& GetRegisters() const { return used_registers; } @@ -155,26 +207,6 @@ public: return header; } - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - u32 ConvertAddressToNvidiaSpace(u32 address) const { return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); } @@ -190,6 +222,10 @@ public: return num_custom_variables; } + std::shared_ptr GetMainFunction() const { + return main_function; + } + private: friend class ASTDecoder; @@ -453,6 +489,9 @@ private: std::vector amend_code; u32 num_custom_variables{}; + std::shared_ptr main_function; + std::vector> subfunctions; + std::set used_registers; std::set used_predicates; std::set used_input_attributes;