Preliminary implementation of LDG

Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.
This commit is contained in:
Zach Hilman
2018-09-14 22:10:07 -04:00
committed by bunnei
parent b4a6ce02ce
commit 2b18ce1248
5 changed files with 146 additions and 2 deletions

View File

@@ -458,6 +458,15 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
return tex_info;
}
std::string Maxwell3D::CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data) {
state.global_memory_uniforms.emplace(std::get<1>(iadd_data), std::get<2>(iadd_data));
return fmt::format("global_memory_region_{}", state.global_memory_uniforms.size() - 1);
}
std::set<std::pair<u64, u64>> Maxwell3D::ListGlobalMemoryRegions() const {
return state.global_memory_uniforms;
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <set>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -1007,6 +1008,8 @@ public:
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
std::set<std::pair<u64, u64>> global_memory_uniforms;
};
State state{};
@@ -1039,6 +1042,9 @@ public:
return macro_memory;
}
std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
private:
void InitializeRegisterDefaults();

View File

@@ -1,4 +1,4 @@
// Copyright 2018 yuzu Emulator Project
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -205,6 +205,8 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class StoreType : u64 {
@@ -771,6 +773,14 @@ union Instruction {
union {
BitField<44, 2, u64> unknown;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> offset_immediate;
BitField<8, 8, Register> offset_register;
BitField<0, 8, Register> output;
} ld_g;
union {
BitField<0, 3, u64> pred0;

View File

@@ -276,6 +276,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
u32 current_global_bindpoint = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
@@ -341,6 +342,45 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
const auto regions = maxwell3d.ListGlobalMemoryRegions();
size_t i = 0;
for (const auto& global_region : regions) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.first].address + global_region.second)};
ASSERT(cbuf_addr != boost::none);
const auto actual_addr_gpu = Memory::Read64(cbuf_addr.get());
const auto size = Memory::Read32(cbuf_addr.get() + 8);
const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
ASSERT(actual_addr != boost::none);
const auto uniform_name = fmt::format("global_memory_region_declblock_{}", i);
const auto b_index = glGetProgramResourceIndex(shader->GetProgramHandle(),
GL_UNIFORM_BLOCK, uniform_name.c_str());
if (b_index != GL_INVALID_INDEX) {
std::vector<u8> new_data(size);
Memory::ReadBlock(actual_addr.get(), new_data.data(), new_data.size());
GLuint gm_ubo{};
glGenBuffers(1, &gm_ubo);
glBindBuffer(GL_UNIFORM_BUFFER, gm_ubo);
glBufferData(GL_UNIFORM_BUFFER, new_data.size(), new_data.data(), GL_STATIC_READ);
glBindBufferBase(GL_UNIFORM_BUFFER, current_constbuffer_bindpoint, gm_ubo);
glUniformBlockBinding(shader->GetProgramHandle(), b_index,
current_constbuffer_bindpoint);
++current_constbuffer_bindpoint;
}
++i;
}
}
}

View File

@@ -13,6 +13,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -679,6 +680,18 @@ private:
declarations.AddNewLine();
}
const auto& regions{
Core::System::GetInstance().GPU().Maxwell3D().ListGlobalMemoryRegions()};
for (size_t i = 0; i < regions.size(); ++i) {
declarations.AddLine("layout(std140) uniform " +
fmt::format("global_memory_region_declblock_{}", i));
declarations.AddLine('{');
declarations.AddLine(" vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
/// Generates declarations for samplers.
void GenerateSamplers() {
const auto& samplers = GetSamplers();
@@ -1800,6 +1813,11 @@ private:
} else {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::Integer);
if (opcode->GetId() == OpCode::Id::IADD_C) {
s_last_iadd = last_iadd;
last_iadd = std::make_tuple<Register, u64, u64>(
instr.gpr8.Value(), instr.cbuf34.index, instr.cbuf34.offset);
}
}
}
@@ -3045,6 +3063,64 @@ private:
shader.AddLine('}');
break;
}
case OpCode::Id::LDG: {
// Determine number of GPRs to fill with data
u64 count = 1;
switch (instr.ld_g.type) {
case Tegra::Shader::UniformType::Single:
count = 1;
break;
case Tegra::Shader::UniformType::Double:
count = 2;
break;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
count = 4;
break;
default:
UNREACHABLE_MSG("Unimplemented LDG size!");
}
auto [gpr_index, index, offset] = last_iadd;
// The last IADD might be the upper u32 of address, so instead take the one before
// that.
if (gpr_index == 0xFF)
std::tie(gpr_index, index, offset) = s_last_iadd;
const auto gpr = regs.GetRegisterAsInteger(gpr_index);
const auto constbuffer =
regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
const auto memory =
Core::System::GetInstance().GPU().Maxwell3D().CreateGlobalMemoryRegion(
{0, index, offset * 4});
const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
const auto o_register =
regs.GetRegisterAsInteger(instr.ld_g.offset_register, 0, false);
const auto address = "( " + immediate + " + " + o_register + " )";
const auto base_sub = address + " - " + constbuffer;
// New scope to prevent potential conflicts
shader.AddLine("{");
++shader.scope;
shader.AddLine("uint final_offset = " + base_sub + ";");
for (std::size_t out = 0; out < count; ++out) {
const u64 reg_id = instr.ld_g.output.Value() + out;
const auto this_memory =
fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
memory, out * 4, out * 4);
regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
}
--shader.scope;
shader.AddLine("}");
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName());
UNREACHABLE();
@@ -3832,9 +3908,12 @@ private:
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
std::tuple<Register, u64, u64> last_iadd{};
std::tuple<Register, u64, u64> s_last_iadd{};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace OpenGL::GLShader::Decompiler
};
std::string GetCommonDeclarations() {
return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",