Preliminary implementation of LDG
Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.
This commit is contained in:
@@ -458,6 +458,15 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
|
||||
return tex_info;
|
||||
}
|
||||
|
||||
std::string Maxwell3D::CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data) {
|
||||
state.global_memory_uniforms.emplace(std::get<1>(iadd_data), std::get<2>(iadd_data));
|
||||
return fmt::format("global_memory_region_{}", state.global_memory_uniforms.size() - 1);
|
||||
}
|
||||
|
||||
std::set<std::pair<u64, u64>> Maxwell3D::ListGlobalMemoryRegions() const {
|
||||
return state.global_memory_uniforms;
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
|
||||
return regs.reg_array[method];
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
@@ -1007,6 +1008,8 @@ public:
|
||||
|
||||
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
|
||||
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
|
||||
|
||||
std::set<std::pair<u64, u64>> global_memory_uniforms;
|
||||
};
|
||||
|
||||
State state{};
|
||||
@@ -1039,6 +1042,9 @@ public:
|
||||
return macro_memory;
|
||||
}
|
||||
|
||||
std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
|
||||
std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
|
||||
|
||||
private:
|
||||
void InitializeRegisterDefaults();
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@@ -205,6 +205,8 @@ enum class UniformType : u64 {
|
||||
SignedShort = 3,
|
||||
Single = 4,
|
||||
Double = 5,
|
||||
Quad = 6,
|
||||
UnsignedQuad = 7,
|
||||
};
|
||||
|
||||
enum class StoreType : u64 {
|
||||
@@ -771,6 +773,14 @@ union Instruction {
|
||||
union {
|
||||
BitField<44, 2, u64> unknown;
|
||||
} st_l;
|
||||
|
||||
union {
|
||||
BitField<48, 3, UniformType> type;
|
||||
BitField<46, 2, u64> cache_mode;
|
||||
BitField<20, 24, s64> offset_immediate;
|
||||
BitField<8, 8, Register> offset_register;
|
||||
BitField<0, 8, Register> output;
|
||||
} ld_g;
|
||||
|
||||
union {
|
||||
BitField<0, 3, u64> pred0;
|
||||
|
||||
@@ -276,6 +276,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
|
||||
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
||||
u32 current_texture_bindpoint = 0;
|
||||
u32 current_global_bindpoint = 0;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = gpu.regs.shader_config[index];
|
||||
@@ -341,6 +342,45 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
||||
index++;
|
||||
}
|
||||
|
||||
auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
|
||||
const auto regions = maxwell3d.ListGlobalMemoryRegions();
|
||||
size_t i = 0;
|
||||
for (const auto& global_region : regions) {
|
||||
auto& gpu{Core::System::GetInstance().GPU()};
|
||||
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
|
||||
const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
|
||||
cbufs.const_buffers[global_region.first].address + global_region.second)};
|
||||
|
||||
ASSERT(cbuf_addr != boost::none);
|
||||
|
||||
const auto actual_addr_gpu = Memory::Read64(cbuf_addr.get());
|
||||
const auto size = Memory::Read32(cbuf_addr.get() + 8);
|
||||
const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
|
||||
|
||||
ASSERT(actual_addr != boost::none);
|
||||
|
||||
const auto uniform_name = fmt::format("global_memory_region_declblock_{}", i);
|
||||
const auto b_index = glGetProgramResourceIndex(shader->GetProgramHandle(),
|
||||
GL_UNIFORM_BLOCK, uniform_name.c_str());
|
||||
if (b_index != GL_INVALID_INDEX) {
|
||||
|
||||
std::vector<u8> new_data(size);
|
||||
Memory::ReadBlock(actual_addr.get(), new_data.data(), new_data.size());
|
||||
|
||||
GLuint gm_ubo{};
|
||||
glGenBuffers(1, &gm_ubo);
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, gm_ubo);
|
||||
glBufferData(GL_UNIFORM_BUFFER, new_data.size(), new_data.data(), GL_STATIC_READ);
|
||||
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, current_constbuffer_bindpoint, gm_ubo);
|
||||
glUniformBlockBinding(shader->GetProgramHandle(), b_index,
|
||||
current_constbuffer_bindpoint);
|
||||
++current_constbuffer_bindpoint;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_header.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
@@ -679,6 +680,18 @@ private:
|
||||
declarations.AddNewLine();
|
||||
}
|
||||
|
||||
const auto& regions{
|
||||
Core::System::GetInstance().GPU().Maxwell3D().ListGlobalMemoryRegions()};
|
||||
for (size_t i = 0; i < regions.size(); ++i) {
|
||||
declarations.AddLine("layout(std140) uniform " +
|
||||
fmt::format("global_memory_region_declblock_{}", i));
|
||||
declarations.AddLine('{');
|
||||
declarations.AddLine(" vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
|
||||
declarations.AddLine("};");
|
||||
declarations.AddNewLine();
|
||||
}
|
||||
declarations.AddNewLine();
|
||||
|
||||
/// Generates declarations for samplers.
|
||||
void GenerateSamplers() {
|
||||
const auto& samplers = GetSamplers();
|
||||
@@ -1800,6 +1813,11 @@ private:
|
||||
} else {
|
||||
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
|
||||
GLSLRegister::Type::Integer);
|
||||
if (opcode->GetId() == OpCode::Id::IADD_C) {
|
||||
s_last_iadd = last_iadd;
|
||||
last_iadd = std::make_tuple<Register, u64, u64>(
|
||||
instr.gpr8.Value(), instr.cbuf34.index, instr.cbuf34.offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3045,6 +3063,64 @@ private:
|
||||
shader.AddLine('}');
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::LDG: {
|
||||
// Determine number of GPRs to fill with data
|
||||
u64 count = 1;
|
||||
|
||||
switch (instr.ld_g.type) {
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
count = 1;
|
||||
break;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
count = 2;
|
||||
break;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
count = 4;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unimplemented LDG size!");
|
||||
}
|
||||
|
||||
auto [gpr_index, index, offset] = last_iadd;
|
||||
|
||||
// The last IADD might be the upper u32 of address, so instead take the one before
|
||||
// that.
|
||||
if (gpr_index == 0xFF)
|
||||
std::tie(gpr_index, index, offset) = s_last_iadd;
|
||||
|
||||
const auto gpr = regs.GetRegisterAsInteger(gpr_index);
|
||||
const auto constbuffer =
|
||||
regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
|
||||
const auto memory =
|
||||
Core::System::GetInstance().GPU().Maxwell3D().CreateGlobalMemoryRegion(
|
||||
{0, index, offset * 4});
|
||||
|
||||
const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
|
||||
const auto o_register =
|
||||
regs.GetRegisterAsInteger(instr.ld_g.offset_register, 0, false);
|
||||
const auto address = "( " + immediate + " + " + o_register + " )";
|
||||
const auto base_sub = address + " - " + constbuffer;
|
||||
|
||||
// New scope to prevent potential conflicts
|
||||
shader.AddLine("{");
|
||||
++shader.scope;
|
||||
|
||||
shader.AddLine("uint final_offset = " + base_sub + ";");
|
||||
for (std::size_t out = 0; out < count; ++out) {
|
||||
const u64 reg_id = instr.ld_g.output.Value() + out;
|
||||
const auto this_memory =
|
||||
fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
|
||||
memory, out * 4, out * 4);
|
||||
|
||||
regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
|
||||
}
|
||||
|
||||
--shader.scope;
|
||||
shader.AddLine("}");
|
||||
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName());
|
||||
UNREACHABLE();
|
||||
@@ -3832,9 +3908,12 @@ private:
|
||||
ShaderWriter declarations;
|
||||
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
|
||||
|
||||
std::tuple<Register, u64, u64> last_iadd{};
|
||||
std::tuple<Register, u64, u64> s_last_iadd{};
|
||||
|
||||
// Declarations
|
||||
std::set<std::string> declr_predicates;
|
||||
}; // namespace OpenGL::GLShader::Decompiler
|
||||
};
|
||||
|
||||
std::string GetCommonDeclarations() {
|
||||
return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",
|
||||
|
||||
Reference in New Issue
Block a user