Compare commits

...

17 Commits

Author SHA1 Message Date
Mat M
b10ef35196 Merge c051624ed6 into 258a5cee84 2018-07-22 07:44:42 +00:00
Lioncash
c051624ed6 shader_bytecode: Add constexpr to default constructor of Attribute and Sampler
We already have a constexpr constructor that takes a paremeter, so the
other constructor should have it too.
2018-07-22 03:43:29 -04:00
bunnei
258a5cee84 Merge pull request #765 from lioncash/file
file_util: Remove goto usages from Copy()
2018-07-22 00:03:35 -07:00
bunnei
af4bde8cd1 Merge pull request #767 from bunnei/shader-cleanup
gl_shader_decompiler: Remove unused state tracking and minor cleanup.
2018-07-22 00:03:17 -07:00
bunnei
2d563ec8d5 Merge pull request #766 from bunnei/shader-sel
gl_shader_decompiler: Implement SEL instruction.
2018-07-21 23:13:27 -07:00
bunnei
ef163c1a15 Merge pull request #764 from lioncash/move
file_util: Minor changes to ScanDirectoryTree() and ForeachDirectoryEntry()
2018-07-21 22:05:30 -07:00
bunnei
f5a2944ab6 gl_shader_decompiler: Remove unused state tracking and minor cleanup. 2018-07-22 01:00:44 -04:00
bunnei
c43eaa94f3 gl_shader_decompiler: Implement SEL instruction. 2018-07-22 00:37:12 -04:00
bunnei
4cd5df95d6 Merge pull request #761 from bunnei/improve-raster-cache
Improvements to rasterizer cache
2018-07-21 20:28:53 -07:00
Lioncash
c5de0a67a8 file_util: Remove goto usages from Copy()
We can just leverage std::unique_ptr to automatically close these for us
in error cases instead of jumping to the end of the function to call
fclose on them.
2018-07-21 23:08:55 -04:00
Lioncash
0ba7fe4ab1 file_util: Use a u64 to represent number of entries
This avoids a truncating cast on size. I doubt we'd ever traverse a
directory this large, however we also shouldn't truncate sizes away.
2018-07-21 22:42:08 -04:00
Lioncash
964154ce44 file_util: std::move FST entries in ScanDirectoryTree()
Avoids unnecessary copies when building up the FST entries.
2018-07-21 22:31:44 -04:00
bunnei
63fbf9a7d3 gl_rasterizer_cache: Blit surfaces on recreation instead of flush and load. 2018-07-21 21:51:06 -04:00
bunnei
4301f0b539 gl_rasterizer_cache: Use GPUVAddr as cache key, not parameter set. 2018-07-21 21:51:06 -04:00
bunnei
cd47391c2d gl_rasterizer_cache: Use zeta_width and zeta_height registers for depth buffer. 2018-07-21 21:51:06 -04:00
bunnei
d8c60029d6 gl_rasterizer: Use zeta_enable register to enable depth buffer. 2018-07-21 21:51:06 -04:00
bunnei
5287991a36 maxwell_3d: Add depth buffer enable, width, and height registers. 2018-07-21 21:51:05 -04:00
11 changed files with 227 additions and 195 deletions

View File

@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <memory>
#include <sstream>
#include <unordered_map>
#include "common/assert.h"
@@ -275,14 +277,10 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
GetLastErrorMsg());
return false;
#else
// buffer size
#define BSIZE 1024
char buffer[BSIZE];
using CFilePointer = std::unique_ptr<FILE, decltype(&std::fclose)>;
// Open input file
FILE* input = fopen(srcFilename.c_str(), "rb");
CFilePointer input{fopen(srcFilename.c_str(), "rb"), std::fclose};
if (!input) {
LOG_ERROR(Common_Filesystem, "opening input failed {} --> {}: {}", srcFilename,
destFilename, GetLastErrorMsg());
@@ -290,44 +288,36 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
}
// open output file
FILE* output = fopen(destFilename.c_str(), "wb");
CFilePointer output{fopen(destFilename.c_str(), "wb"), std::fclose};
if (!output) {
fclose(input);
LOG_ERROR(Common_Filesystem, "opening output failed {} --> {}: {}", srcFilename,
destFilename, GetLastErrorMsg());
return false;
}
// copy loop
while (!feof(input)) {
std::array<char, 1024> buffer;
while (!feof(input.get())) {
// read input
size_t rnum = fread(buffer, sizeof(char), BSIZE, input);
if (rnum != BSIZE) {
if (ferror(input) != 0) {
size_t rnum = fread(buffer.data(), sizeof(char), buffer.size(), input.get());
if (rnum != buffer.size()) {
if (ferror(input.get()) != 0) {
LOG_ERROR(Common_Filesystem, "failed reading from source, {} --> {}: {}",
srcFilename, destFilename, GetLastErrorMsg());
goto bail;
return false;
}
}
// write output
size_t wnum = fwrite(buffer, sizeof(char), rnum, output);
size_t wnum = fwrite(buffer.data(), sizeof(char), rnum, output.get());
if (wnum != rnum) {
LOG_ERROR(Common_Filesystem, "failed writing to output, {} --> {}: {}", srcFilename,
destFilename, GetLastErrorMsg());
goto bail;
return false;
}
}
// close files
fclose(input);
fclose(output);
return true;
bail:
if (input)
fclose(input);
if (output)
fclose(output);
return false;
#endif
}
@@ -396,12 +386,12 @@ bool CreateEmptyFile(const std::string& filename) {
return true;
}
bool ForeachDirectoryEntry(unsigned* num_entries_out, const std::string& directory,
bool ForeachDirectoryEntry(u64* num_entries_out, const std::string& directory,
DirectoryEntryCallable callback) {
LOG_TRACE(Common_Filesystem, "directory {}", directory);
// How many files + directories we found
unsigned found_entries = 0;
u64 found_entries = 0;
// Save the status of callback function
bool callback_error = false;
@@ -431,7 +421,7 @@ bool ForeachDirectoryEntry(unsigned* num_entries_out, const std::string& directo
if (virtual_name == "." || virtual_name == "..")
continue;
unsigned ret_entries = 0;
u64 ret_entries = 0;
if (!callback(&ret_entries, directory, virtual_name)) {
callback_error = true;
break;
@@ -455,9 +445,9 @@ bool ForeachDirectoryEntry(unsigned* num_entries_out, const std::string& directo
return true;
}
unsigned ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
unsigned int recursion) {
const auto callback = [recursion, &parent_entry](unsigned* num_entries_out,
u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
unsigned int recursion) {
const auto callback = [recursion, &parent_entry](u64* num_entries_out,
const std::string& directory,
const std::string& virtual_name) -> bool {
FSTEntry entry;
@@ -469,7 +459,7 @@ unsigned ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
// is a directory, lets go inside if we didn't recurse to often
if (recursion > 0) {
entry.size = ScanDirectoryTree(entry.physicalName, entry, recursion - 1);
*num_entries_out += (int)entry.size;
*num_entries_out += entry.size;
} else {
entry.size = 0;
}
@@ -480,16 +470,16 @@ unsigned ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
(*num_entries_out)++;
// Push into the tree
parent_entry.children.push_back(entry);
parent_entry.children.push_back(std::move(entry));
return true;
};
unsigned num_entries;
u64 num_entries;
return ForeachDirectoryEntry(&num_entries, directory, callback) ? num_entries : 0;
}
bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) {
const auto callback = [recursion](unsigned* num_entries_out, const std::string& directory,
const auto callback = [recursion](u64* num_entries_out, const std::string& directory,
const std::string& virtual_name) -> bool {
std::string new_path = directory + DIR_SEP_CHR + virtual_name;

View File

@@ -84,7 +84,7 @@ bool CreateEmptyFile(const std::string& filename);
* @return whether handling the entry succeeded
*/
using DirectoryEntryCallable = std::function<bool(
unsigned* num_entries_out, const std::string& directory, const std::string& virtual_name)>;
u64* num_entries_out, const std::string& directory, const std::string& virtual_name)>;
/**
* Scans a directory, calling the callback for each file/directory contained within.
@@ -95,7 +95,7 @@ using DirectoryEntryCallable = std::function<bool(
* @param callback The callback which will be called for each entry
* @return whether scanning the directory succeeded
*/
bool ForeachDirectoryEntry(unsigned* num_entries_out, const std::string& directory,
bool ForeachDirectoryEntry(u64* num_entries_out, const std::string& directory,
DirectoryEntryCallable callback);
/**
@@ -105,8 +105,8 @@ bool ForeachDirectoryEntry(unsigned* num_entries_out, const std::string& directo
* @param recursion Number of children directories to read before giving up.
* @return the total number of files/directories found
*/
unsigned ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
unsigned int recursion = 0);
u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
unsigned int recursion = 0);
// deletes the given directory and anything under it. Returns true on success.
bool DeleteDirRecursively(const std::string& directory, unsigned int recursion = 256);

View File

@@ -92,13 +92,13 @@ RealVfsDirectory::RealVfsDirectory(const std::string& path_, Mode perms_)
perms(perms_) {
if (!FileUtil::Exists(path) && (perms == Mode::Write || perms == Mode::Append))
FileUtil::CreateDir(path);
unsigned size;
if (perms == Mode::Append)
return;
FileUtil::ForeachDirectoryEntry(
&size, path,
[this](unsigned* entries_out, const std::string& directory, const std::string& filename) {
nullptr, path,
[this](u64* entries_out, const std::string& directory, const std::string& filename) {
std::string full_path = directory + DIR_SEP + filename;
if (FileUtil::IsDirectory(full_path))
subdirectories.emplace_back(std::make_shared<RealVfsDirectory>(full_path, perms));

View File

@@ -20,7 +20,7 @@ namespace Loader {
static std::string FindRomFS(const std::string& directory) {
std::string filepath_romfs;
const auto callback = [&filepath_romfs](unsigned*, const std::string& directory,
const auto callback = [&filepath_romfs](u64*, const std::string& directory,
const std::string& virtual_name) -> bool {
const std::string physical_name = directory + virtual_name;
if (FileUtil::IsDirectory(physical_name)) {

View File

@@ -487,7 +487,12 @@ public:
};
} rt_control;
INSERT_PADDING_WORDS(0x2B);
INSERT_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
INSERT_PADDING_WORDS(0x27);
u32 depth_test_enable;
@@ -540,7 +545,11 @@ public:
u32 vb_element_base;
INSERT_PADDING_WORDS(0x49);
INSERT_PADDING_WORDS(0x40);
u32 zeta_enable;
INSERT_PADDING_WORDS(0x8);
struct {
u32 tsc_address_high;
@@ -865,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -874,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil, 0x4E0);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(stencil_two_side, 0x565);

View File

@@ -67,7 +67,7 @@ private:
};
union Attribute {
Attribute() = default;
constexpr Attribute() = default;
constexpr explicit Attribute(u64 value) : value(value) {}
@@ -96,7 +96,7 @@ union Attribute {
};
union Sampler {
Sampler() = default;
constexpr Sampler() = default;
constexpr explicit Sampler(u64 value) : value(value) {}
@@ -288,6 +288,11 @@ union Instruction {
BitField<49, 1, u64> negate_a;
} alu_integer;
union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> neg_pred;
} sel;
union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> negate_pred;
@@ -513,6 +518,9 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
SEL_C,
SEL_R,
SEL_IMM,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -713,6 +721,9 @@ private:
INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100010100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),

View File

@@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
use_depth_fb = regs.zeta_enable != 0;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {
glClear(clear_mask);
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}
@@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context;
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);
SyncDepthTestState();
SyncBlendState();
@@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}

View File

@@ -65,9 +65,9 @@ struct FormatTuple {
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
SurfaceParams params{};
params.addr = zeta_address;
@@ -77,9 +77,9 @@ struct FormatTuple {
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.size_in_bytes = params.SizeInBytes();
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.size_in_bytes = params.SizeInBytes();
return params;
}
@@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
cur_state.Apply();
}
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.draw.read_framebuffer = read_fb_handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.Apply();
u32 buffers{};
if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
buffers = GL_COLOR_BUFFER_BIT;
} else if (type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_tex, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true;
}
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
texture.Create();
const auto& rect{params.GetRect()};
@@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
}
if (using_depth_fb) {
depth_params =
SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};
@@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
if (Settings::values.use_accurate_framebuffers) {
// If enabled, always flush dirty surfaces
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
} else {
// Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
// and flushes are very slow and do not seem to improve accuracy
const auto& params{surface->GetSurfaceParams()};
Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
return {};
// Check for an exact match in existing surfaces
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
// Look up surface in the cache based on address
const auto& search{surface_cache.find(params.addr)};
Surface surface;
if (search != surface_cache.end()) {
surface = search->second;
if (Settings::values.use_accurate_framebuffers) {
// Reload the surface from Switch memory
LoadSurface(surface);
// If use_accurate_framebuffers is enabled, always load from memory
FlushSurface(surface);
UnregisterSurface(surface);
} else if (surface->GetSurfaceParams() != params) {
// If surface parameters changed, recreate the surface from the old one
return RecreateSurface(surface, params);
} else {
// Use the cached surface as-is
return surface;
}
} else {
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
}
// No surface found - create a new one
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
return surface;
}
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()};
ASSERT(params.type == new_params.type);
ASSERT(params.pixel_format == new_params.pixel_format);
ASSERT(params.component_type == new_params.component_type);
// Create a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle,
draw_framebuffer.handle);
// Update cache accordingly
UnregisterSurface(surface);
RegisterSurface(new_surface);
return new_surface;
}
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
@@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search != surface_cache.end()) {
// Registered already
return;
}
surface_cache[surface_key] = surface;
surface_cache[params.addr] = surface;
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search == surface_cache.end()) {
// Unregistered already

View File

@@ -10,7 +10,6 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -137,6 +136,7 @@ struct SurfaceParams {
ASSERT(static_cast<size_t>(format) < bpp_table.size());
return bpp_table[static_cast<size_t>(format)];
}
u32 GetFormatBpp() const {
return GetFormatBpp(pixel_format);
}
@@ -365,9 +365,21 @@ struct SurfaceParams {
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format);
bool operator==(const SurfaceParams& other) const {
return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
height, unaligned_height, size_in_bytes) ==
std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
other.component_type, other.type, other.width, other.height,
other.unaligned_height, other.size_in_bytes);
}
bool operator!=(const SurfaceParams& other) const {
return !operator==(other);
}
Tegra::GPUVAddr addr;
bool is_tiled;
@@ -381,24 +393,6 @@ struct SurfaceParams {
size_t size_in_bytes;
};
/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
static SurfaceKey Create(const SurfaceParams& params) {
SurfaceKey res;
res.state = params;
return res;
}
};
namespace std {
template <>
struct hash<SurfaceKey> {
size_t operator()(const SurfaceKey& k) const {
return k.Hash();
}
};
} // namespace std
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
@@ -444,8 +438,8 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
void MarkSurfaceAsDirty(const Surface& surface);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
@@ -460,6 +454,9 @@ private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters
Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@@ -469,7 +466,7 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
std::unordered_map<SurfaceKey, Surface> surface_cache;
std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
PageMap cached_pages;
OGLFramebuffer read_framebuffer;

View File

@@ -191,48 +191,21 @@ public:
UnsignedInteger,
};
GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix)
: index{index}, shader{shader}, suffix{suffix} {}
GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString(Type type) {
switch (type) {
case Type::Float:
return "float";
case Type::Integer:
return "int";
case Type::UnsignedInteger:
return "uint";
}
UNREACHABLE();
return {};
static std::string GetTypeString() {
return "float";
}
/// Gets the GLSL register prefix string, used for declarations and referencing
static std::string GetPrefixString(Type type) {
return "reg_" + GetTypeString(type) + '_';
static std::string GetPrefixString() {
return "reg_";
}
/// Returns a GLSL string representing the current state of the register
std::string GetActiveString() {
declr_type.insert(active_type);
return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix;
}
/// Returns true if the active type is a float
bool IsFloat() const {
return active_type == Type::Float;
}
/// Returns true if the active type is an integer
bool IsInteger() const {
return active_type == Type::Integer;
}
/// Returns the current active type of the register
Type GetActiveType() const {
return active_type;
std::string GetString() const {
return GetPrefixString() + std::to_string(index) + '_' + suffix;
}
/// Returns the index of the register
@@ -240,18 +213,8 @@ public:
return index;
}
/// Returns a set of the declared types of the register
const std::set<Type>& DeclaredTypes() const {
return declr_type;
}
private:
const size_t index;
const std::string float_str;
const std::string integer_str;
ShaderWriter& shader;
Type active_type{Type::Float};
std::set<Type> declr_type;
const std::string& suffix;
};
@@ -297,7 +260,6 @@ public:
* @returns GLSL string corresponding to the register as a float.
*/
std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) {
ASSERT(regs[reg].IsFloat());
return GetRegister(reg, elem);
}
@@ -311,12 +273,8 @@ public:
*/
std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
Register::Size size = Register::Size::Word) {
const std::string func = GetGLSLConversionFunc(
GLSLRegister::Type::Float,
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
std::string value = func + '(' + GetRegister(reg, elem) + ')';
const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"};
const std::string value{func + '(' + GetRegister(reg, elem) + ')'};
return ConvertIntegerSize(value, size);
}
@@ -355,9 +313,7 @@ public:
u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
ASSERT_MSG(!is_saturated, "Unimplemented");
const std::string func = GetGLSLConversionFunc(
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
GLSLRegister::Type::Float);
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
dest_num_components, value_num_components, dest_elem);
@@ -373,14 +329,7 @@ public:
void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
std::string dest = GetRegisterAsFloat(reg);
std::string src = GetInputAttribute(attribute) + GetSwizzle(elem);
if (regs[reg].IsFloat()) {
shader.AddLine(dest + " = " + src + ';');
} else if (regs[reg].IsInteger()) {
shader.AddLine(dest + " = floatBitsToInt(" + src + ");");
} else {
UNREACHABLE();
}
shader.AddLine(dest + " = " + src + ';');
}
/**
@@ -393,7 +342,6 @@ public:
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem);
std::string src = GetRegisterAsFloat(reg);
ASSERT_MSG(regs[reg].IsFloat(), "Output attributes must be set to a float");
shader.AddLine(dest + " = " + src + ';');
}
@@ -434,11 +382,8 @@ public:
/// Add declarations for registers
void GenerateDeclarations(const std::string& suffix) {
for (const auto& reg : regs) {
for (const auto& type : reg.DeclaredTypes()) {
declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' +
reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) +
'_' + suffix + " = 0;");
}
declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
}
declarations.AddNewLine();
@@ -516,21 +461,13 @@ public:
}
private:
/// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
const std::string src_type = GLSLRegister::GetTypeString(src);
std::string dest_type = GLSLRegister::GetTypeString(dest);
dest_type[0] = toupper(dest_type[0]);
return src_type + "BitsTo" + dest_type;
}
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg, unsigned elem) {
if (reg == Register::ZeroIndex) {
return "0";
}
return regs[reg.GetSwizzledIndex(elem)].GetActiveString();
return regs[reg.GetSwizzledIndex(elem)].GetString();
}
/**
@@ -560,7 +497,7 @@ private:
/// Build the GLSL register list.
void BuildRegisterList() {
for (size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, shader, suffix);
regs.emplace_back(index, suffix);
}
}
@@ -1139,6 +1076,15 @@ private:
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
break;
}
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
std::string condition =
GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
regs.SetRegisterToInteger(instr.gpr0, true, 0,
'(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
break;
}
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {

View File

@@ -394,7 +394,7 @@ void GameList::RefreshGameDirectory() {
}
void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion) {
const auto callback = [this, recursion](unsigned* num_entries_out, const std::string& directory,
const auto callback = [this, recursion](u64* num_entries_out, const std::string& directory,
const std::string& virtual_name) -> bool {
std::string physical_name = directory + DIR_SEP + virtual_name;