ShaderIR: Refactor shader compilation to use functions.

This commit is contained in:
Fernando Sahmkow
2021-01-03 11:07:34 +01:00
committed by Morph
parent 7b779b26d2
commit b75a9b20e4
5 changed files with 196 additions and 114 deletions

View File

@@ -491,6 +491,8 @@ private:
const Registry& registry;
const ShaderType stage;
std::shared_ptr<ShaderFunctionIR> context_func;
std::size_t num_temporaries = 0;
std::size_t max_temporaries = 0;
@@ -807,10 +809,12 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R
: device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
DefineGlobalMemory();
context_func = ir.GetMainFunction();
AddLine("TEMP RC;");
AddLine("TEMP FSWZA[4];");
AddLine("TEMP FSWZB[4];");
if (ir.IsDecompiled()) {
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
@@ -1060,7 +1064,7 @@ void ARBDecompiler::InitializeVariables() {
}
void ARBDecompiler::DecompileAST() {
const u32 num_flow_variables = ir.GetASTNumVariables();
const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; ++i) {
AddLine("TEMP F{};", i);
}
@@ -1070,12 +1074,12 @@ void ARBDecompiler::DecompileAST() {
InitializeVariables();
VisitAST(ir.GetASTProgram());
VisitAST(context_func->GetASTProgram());
}
void ARBDecompiler::DecompileBranchMode() {
static constexpr u32 FLOW_STACK_SIZE = 20;
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
AddLine("TEMP SSY_TOP;");
@@ -1084,15 +1088,15 @@ void ARBDecompiler::DecompileBranchMode() {
AddLine("TEMP PC;");
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
AddLine("MOV.U SSY_TOP.x, 0;");
AddLine("MOV.U PBK_TOP.x, 0;");
}
InitializeVariables();
const auto basic_block_end = ir.GetBasicBlocks().end();
auto basic_block_it = ir.GetBasicBlocks().begin();
const auto basic_block_end = context_func->GetBasicBlocks().end();
auto basic_block_it = context_func->GetBasicBlocks().begin();
const u32 first_address = basic_block_it->first;
AddLine("MOV.U PC.x, {};", first_address);
@@ -1174,7 +1178,11 @@ void ARBDecompiler::VisitAST(const ASTNode& node) {
if (ast_return->kills) {
AddLine("KIL TR;");
} else {
Exit();
if (context_func->IsMain()) {
Exit();
} else {
AddLine("RET;");
}
}
if (!is_true) {
AddLine("ENDIF;");

View File

@@ -435,6 +435,8 @@ public:
DeclareCustomVariables();
DeclarePhysicalAttributeReader();
context_func = ir.GetMainFunction();
code.AddLine("void main() {{");
++code.scope;
@@ -442,7 +444,7 @@ public:
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
}
if (ir.IsDecompiled()) {
if (context_func->IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
@@ -462,13 +464,13 @@ private:
void DecompileBranchMode() {
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
const auto first_address = context_func->GetBasicBlocks().begin()->first;
code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
if (!ir.IsFlowStackDisabled()) {
if (!context_func->IsFlowStackDisabled()) {
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -480,7 +482,7 @@ private:
code.AddLine("switch (jmp_to) {{");
for (const auto& pair : ir.GetBasicBlocks()) {
for (const auto& pair : context_func->GetBasicBlocks()) {
const auto& [address, bb] = pair;
code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
@@ -2388,7 +2390,7 @@ private:
}
Expression Barrier(Operation) {
if (!ir.IsDecompiled()) {
if (!context_func->IsDecompiled()) {
LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
return {};
}
@@ -2755,6 +2757,8 @@ private:
const Header header;
std::unordered_map<u8, VaryingTFB> transform_feedback;
std::shared_ptr<ShaderFunctionIR> context_func;
ShaderWriter code;
std::optional<u32> max_input_vertices;
@@ -2904,7 +2908,9 @@ public:
if (ast.kills) {
decomp.code.AddLine("discard;");
} else {
decomp.PreExit();
if (decomp.context_func->IsMain()) {
decomp.PreExit();
}
decomp.code.AddLine("return;");
}
if (!is_true) {
@@ -2937,13 +2943,13 @@ private:
};
void GLSLDecompiler::DecompileAST() {
const u32 num_flow_variables = ir.GetASTNumVariables();
const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; i++) {
code.AddLine("bool {} = false;", GetFlowVariable(i));
}
ASTDecompiler decompiler{*this};
decompiler.Visit(ir.GetASTProgram());
decompiler.Visit(context_func->GetASTProgram());
}
} // Anonymous namespace

View File

@@ -406,10 +406,12 @@ private:
binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
context_func = ir.GetMainFunction();
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
AddLabel();
if (ir.IsDecompiled()) {
if (context_func->IsDecompiled()) {
DeclareFlowVariables();
DecompileAST();
} else {
@@ -441,7 +443,7 @@ private:
void DecompileAST();
void DecompileBranchMode() {
const u32 first_address = ir.GetBasicBlocks().begin()->first;
const u32 first_address = context_func->GetBasicBlocks().begin()->first;
const Id loop_label = OpLabel("loop");
const Id merge_label = OpLabel("merge");
const Id dummy_label = OpLabel();
@@ -484,7 +486,7 @@ private:
AddLabel(default_branch);
OpReturn();
for (const auto& [address, bb] : ir.GetBasicBlocks()) {
for (const auto& [address, bb] : context_func->GetBasicBlocks()) {
AddLabel(labels.at(address));
VisitBasicBlock(bb);
@@ -508,7 +510,7 @@ private:
static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
void AllocateLabels() {
for (const auto& pair : ir.GetBasicBlocks()) {
for (const auto& pair : context_func->GetBasicBlocks()) {
const u32 address = pair.first;
labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
}
@@ -656,7 +658,7 @@ private:
}
void DeclareFlowVariables() {
for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) {
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
flow_variables.emplace(i, AddGlobalVariable(id));
@@ -2276,7 +2278,7 @@ private:
}
Expression Barrier(Operation) {
if (!ir.IsDecompiled()) {
if (!context_func->IsDecompiled()) {
LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
return {};
}
@@ -2770,6 +2772,8 @@ private:
const Specialization& specialization;
std::unordered_map<u8, VaryingTFB> transform_feedback;
std::shared_ptr<ShaderFunctionIR> context_func;
const Id t_void = Name(TypeVoid(), "void");
const Id t_bool = Name(TypeBool(), "bool");
@@ -3049,7 +3053,9 @@ public:
if (ast.kills) {
decomp.OpKill();
} else {
decomp.PreExit();
if (decomp.context_func->IsMain()) {
decomp.PreExit();
}
decomp.OpReturn();
}
decomp.AddLabel(endif_label);
@@ -3097,7 +3103,7 @@ private:
};
void SPIRVDecompiler::DecompileAST() {
const u32 num_flow_variables = ir.GetASTNumVariables();
const u32 num_flow_variables = context_func->GetASTNumVariables();
for (u32 i = 0; i < num_flow_variables; i++) {
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
Name(id, fmt::format("flow_var_{}", i));
@@ -3106,7 +3112,7 @@ void SPIRVDecompiler::DecompileAST() {
DefinePrologue();
const ASTNode program = ir.GetASTProgram();
const ASTNode program = context_func->GetASTProgram();
ASTDecompiler decompiler{*this};
decompiler.Visit(program);

View File

@@ -128,75 +128,98 @@ private:
};
void ShaderIR::Decode() {
const auto decode_function = ([this](ShaderFunction& shader_info) {
coverage_end = std::max<u32>(0, shader_info.end);
switch (shader_info.settings.depth) {
case CompileDepth::FlowStack: {
for (const auto& block : shader_info.blocks) {
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
break;
}
case CompileDepth::NoFlowStack: {
disable_flow_stack = true;
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
if (label == static_cast<u32>(exit_branch)) {
return;
}
basic_blocks.insert({label, nodes});
};
const auto& blocks = shader_info.blocks;
NodeBlock current_block;
u32 current_label = static_cast<u32>(exit_branch);
for (const auto& block : blocks) {
if (shader_info.labels.contains(block.start)) {
insert_block(current_block, current_label);
current_block.clear();
current_label = block.start;
}
if (!block.ignore_branch) {
DecodeRangeInner(current_block, block.start, block.end);
InsertControlFlow(current_block, block);
} else {
DecodeRangeInner(current_block, block.start, block.end + 1);
}
}
insert_block(current_block, current_label);
break;
}
case CompileDepth::DecompileBackwards:
case CompileDepth::FullDecompile: {
program_manager = std::move(shader_info.manager);
disable_flow_stack = true;
decompiled = true;
ASTDecoder decoder{*this};
ASTNode program = program_manager.GetProgram();
decoder.Visit(program);
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
[[fallthrough]];
case CompileDepth::BruteForce: {
const auto shader_end = static_cast<u32>(program_code.size());
coverage_begin = main_offset;
coverage_end = shader_end;
for (u32 label = main_offset; label < shader_end; ++label) {
basic_blocks.insert({label, DecodeRange(label, label + 1)});
}
break;
}
}
if (settings.depth != shader_info.settings.depth) {
LOG_WARNING(
HW_GPU,
"Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
CompileDepthAsString(settings.depth),
CompileDepthAsString(shader_info.settings.depth));
}
});
const auto gen_function =
([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr<ShaderFunctionIR> {
std::shared_ptr<ShaderFunctionIR> result;
if (decompiled) {
result = std::make_shared<ShaderFunctionIR>(std::move(program_manager), id,
shader_info.start, shader_info.end);
} else {
result =
std::make_shared<ShaderFunctionIR>(std::move(basic_blocks), disable_flow_stack,
id, shader_info.start, shader_info.end);
}
decompiled = false;
disable_flow_stack = false;
basic_blocks.clear();
program_manager.Clear();
return result;
});
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = info->main;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
switch (shader_info.settings.depth) {
case CompileDepth::FlowStack: {
for (const auto& block : shader_info.blocks) {
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
break;
}
case CompileDepth::NoFlowStack: {
disable_flow_stack = true;
const auto insert_block = [this](NodeBlock& nodes, u32 label) {
if (label == static_cast<u32>(exit_branch)) {
return;
}
basic_blocks.insert({label, nodes});
};
const auto& blocks = shader_info.blocks;
NodeBlock current_block;
u32 current_label = static_cast<u32>(exit_branch);
for (const auto& block : blocks) {
if (shader_info.labels.contains(block.start)) {
insert_block(current_block, current_label);
current_block.clear();
current_label = block.start;
}
if (!block.ignore_branch) {
DecodeRangeInner(current_block, block.start, block.end);
InsertControlFlow(current_block, block);
} else {
DecodeRangeInner(current_block, block.start, block.end + 1);
}
}
insert_block(current_block, current_label);
break;
}
case CompileDepth::DecompileBackwards:
case CompileDepth::FullDecompile: {
program_manager = std::move(shader_info.manager);
disable_flow_stack = true;
decompiled = true;
ASTDecoder decoder{*this};
ASTNode program = GetASTProgram();
decoder.Visit(program);
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
[[fallthrough]];
case CompileDepth::BruteForce: {
const auto shader_end = static_cast<u32>(program_code.size());
coverage_begin = main_offset;
coverage_end = shader_end;
for (u32 label = main_offset; label < shader_end; ++label) {
basic_blocks.insert({label, DecodeRange(label, label + 1)});
}
break;
}
}
if (settings.depth != shader_info.settings.depth) {
LOG_WARNING(
HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
}
coverage_begin = info->main.start;
coverage_end = 0;
decode_function(info->main);
main_function = gen_function(info->main, 0);
}
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {

View File

@@ -64,16 +64,68 @@ struct GlobalMemoryUsage {
bool is_written{};
};
class ShaderFunctionIR final {
public:
explicit ShaderFunctionIR(std::map<u32, NodeBlock>&& basic_blocks_, bool disable_flow_stack_,
u32 id_, u32 coverage_begin_, u32 coverage_end_)
: basic_blocks{std::move(basic_blocks_)}, decompiled{false},
disable_flow_stack{disable_flow_stack}, id{id_}, coverage_begin{coverage_begin_},
coverage_end{coverage_end_} {}
explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_,
u32 coverage_end_)
: program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true},
id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {}
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
}
bool IsFlowStackDisabled() const {
return disable_flow_stack;
}
bool IsDecompiled() const {
return decompiled;
}
const ASTManager& GetASTManager() const {
return program_manager;
}
[[nodiscard]] ASTNode GetASTProgram() const {
return program_manager.GetProgram();
}
[[nodiscard]] u32 GetASTNumVariables() const {
return program_manager.GetVariables();
}
bool IsMain() const {
return id == 0;
}
u32 GetId() const {
return id;
}
private:
std::map<u32, NodeBlock> basic_blocks;
ASTManager program_manager{true, true};
bool decompiled{};
bool disable_flow_stack{};
u32 id{};
u32 coverage_begin{};
u32 coverage_end{};
};
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
CompilerSettings settings_, Registry& registry_);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
return basic_blocks;
}
const std::set<u32>& GetRegisters() const {
return used_registers;
}
@@ -155,26 +207,6 @@ public:
return header;
}
bool IsFlowStackDisabled() const {
return disable_flow_stack;
}
bool IsDecompiled() const {
return decompiled;
}
const ASTManager& GetASTManager() const {
return program_manager;
}
ASTNode GetASTProgram() const {
return program_manager.GetProgram();
}
u32 GetASTNumVariables() const {
return program_manager.GetVariables();
}
u32 ConvertAddressToNvidiaSpace(u32 address) const {
return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
}
@@ -190,6 +222,10 @@ public:
return num_custom_variables;
}
std::shared_ptr<ShaderFunctionIR> GetMainFunction() const {
return main_function;
}
private:
friend class ASTDecoder;
@@ -453,6 +489,9 @@ private:
std::vector<Node> amend_code;
u32 num_custom_variables{};
std::shared_ptr<ShaderFunctionIR> main_function;
std::vector<std::shared_ptr<ShaderFunctionIR>> subfunctions;
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;