Compare commits

...

113 Commits

Author SHA1 Message Date
Fernando Sahmkow
3b9d89839d texture_cache: Address Feedback 2019-07-05 09:46:53 -04:00
Fernando Sahmkow
30b176f92b texture_cache: Correct Texture Buffer Uploading 2019-07-04 19:38:19 -04:00
ReinUsesLisp
6e1db6b703 texture_cache: Pack sibling queries inside a method 2019-06-29 20:47:46 -03:00
ReinUsesLisp
8eae66907e texture_cache: Use std::vector reservation for sampled_textures 2019-06-29 20:10:31 -03:00
ReinUsesLisp
f6f1a8f26a texture_cache: Style changes 2019-06-29 19:52:37 -03:00
ReinUsesLisp
dd9ace502b texture_cache: Use std::array for siblings_table 2019-06-29 18:54:13 -03:00
ReinUsesLisp
3f3c3ca5f9 texture_cache: Address feedback 2019-06-29 17:29:39 -03:00
Fernando Sahmkow
223ca80753 texture_cache: Correct variable naming. 2019-06-25 19:35:08 -04:00
Fernando Sahmkow
5aeabd9a17 gl_texture_cache: Correct asserts 2019-06-25 19:26:59 -04:00
Fernando Sahmkow
88bc39374f texture_cache: Corrections, documentation and asserts 2019-06-25 18:36:19 -04:00
Fernando Sahmkow
c0abc7124d surface_params: Corrections, asserts and documentation. 2019-06-25 18:03:25 -04:00
Fernando Sahmkow
fb234560b0 copy_params: use constexpr for constructor 2019-06-25 17:42:50 -04:00
Fernando Sahmkow
18d24fbdd0 gl_texture_cache: Corrections and fixes 2019-06-25 17:40:08 -04:00
Fernando Sahmkow
36665ce0b2 gl_resource_manager: Correct MakeStreamCopy 2019-06-25 17:32:04 -04:00
Fernando Sahmkow
58c8a44e7a texture_cache: Query MemoryManager from the system 2019-06-25 17:26:00 -04:00
ReinUsesLisp
7565389700 texture_cache: Include "core/core.h" 2019-06-24 02:15:57 -03:00
ReinUsesLisp
e723441e37 gl_texture_cache: Explicitly add indirect include 2019-06-24 02:13:55 -03:00
ReinUsesLisp
34841a41c3 texture_cache/surface_view: Address feedback 2019-06-24 02:09:56 -03:00
ReinUsesLisp
0837290992 texture_cache/surface_base: Address feedback 2019-06-24 02:08:52 -03:00
ReinUsesLisp
75de730e28 video_core/surface: Address feedback 2019-06-24 02:07:11 -03:00
ReinUsesLisp
10a83653ee decode/texture: Address feedback 2019-06-24 02:05:05 -03:00
ReinUsesLisp
4504302abc renderer_opengl/utils: Remove unused includes and unused forward declaration 2019-06-24 02:03:37 -03:00
ReinUsesLisp
4b2ff1e00e gl_texture_cache: Address some feedback 2019-06-24 02:01:44 -03:00
ReinUsesLisp
0b6df52109 gl_shader_disk_cache: Address feedback 2019-06-24 01:59:32 -03:00
ReinUsesLisp
b8b05a484a gl_shader_decompiler: Address feedback 2019-06-24 01:56:38 -03:00
ReinUsesLisp
4d63f97945 shader_bytecode: Include missing <array> 2019-06-24 01:51:02 -03:00
ReinUsesLisp
de982deb25 common/alignment: Address feedback 2019-06-24 01:47:09 -03:00
Fernando Sahmkow
d1812316e1 texture_cache: Style and Corrections 2019-06-20 21:24:47 -04:00
Fernando Sahmkow
51ba60b27e shader_cache: Correct versioning and size calculation. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
97c8c9f49a texture_cache: Eliminate linear textures fallthrough 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
6acdae0e4c texture_cache: Correct format R16U as sibling 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
d7587842eb texture_cache: Implement texception detection and texture barriers. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
198a0395bb texture_cache: Corrections to buffers and shadow formats use. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
fed773a86c texture_cache: Implement Irregular Views in surfaces 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
082740d34d surface: Correct format S8Z24 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
03d489dcf5 texture_cache: Initialize all siblings to invalid pixel format. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
9422cf7c10 gl_texture_cache: Use Stream Buffers instead of Persistant for Buffer Copies. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
fac3706253 gl_texture_cache: Correct Image Blit 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
7232a1ed16 decoders: correct block calculation 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
3dd7643214 texture_cache: Use siblings textures on Rebuild and fix possible error on blitting 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
4db28f72f6 texture_cache: Remove old rasterizer cache 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
2d83553ea7 texture_cache: Implement siblings texture formats. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
cb728797b0 fermi2d: Correct Origin Mode 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
a56f687793 texture_cache: correct texture buffer on surface params 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
b01f9c8a70 texture_cache: eliminate accelerated depth->color/color->depth copies due to driver instability. 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
561ce29c98 texture_cache: correct mutex locks 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
b7de31ac97 shader_ir: Fix image copy rebase issues 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
6f69f06873 texture_cache: Don't Image Copy if component types differ 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
9f755218a1 texture_cache: move some large methods to cpp files 2019-06-20 21:38:34 -03:00
Fernando Sahmkow
3809041c24 texture_cache: Optimize GetSurface and use references on functions that don't change a surface. 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
60bf761afb texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
228f516bb4 texture_cache uncompress-compress is untopological.
This makes conflicts between non compress and compress textures to be 
auto recycled. It also limits the amount of mipmaps a texture can have 
if it goes above it's limit.
2019-06-20 21:38:33 -03:00
Fernando Sahmkow
9251354152 texture_cache: Correct copying between compressed and uncompressed formats 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
0966665fc2 texture_cache: Only load on recycle with accurate GPU.
Testing so far has proven this to be quite safe as texture memory read 
added a 2-5ms load to the current cache.
2019-06-20 21:38:33 -03:00
Fernando Sahmkow
ea1525dab1 Fix rebase errors 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
bdf9faab33 texture_cache: Handle uncontinuous surfaces. 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
e60ed2bb3e texture_cache: return null surface on invalid address 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
fcac55d5bf texture_cache: Add checks for texture buffers. 2019-06-20 21:38:33 -03:00
Fernando Sahmkow
175aa343ff texture_cache: Fermi2D reform and implement View Mirage
This also does some fixes on compressed textures reinterpret and on the
Fermi2D engine in general.
2019-06-20 21:38:33 -03:00
ReinUsesLisp
1bf4154e7d gl_shader_decompiler: Implement image binding settings 2019-06-20 21:38:33 -03:00
ReinUsesLisp
9097301d92 shader: Implement bindless images 2019-06-20 21:38:33 -03:00
ReinUsesLisp
06c4ce8645 shader: Decode SUST and implement backing image functionality 2019-06-20 21:38:33 -03:00
ReinUsesLisp
007ffbef1c gl_rasterizer: Track texture buffer usage 2019-06-20 21:38:33 -03:00
ReinUsesLisp
58c0d37422 video_core: Make ARB_buffer_storage a required extension 2019-06-20 21:36:12 -03:00
ReinUsesLisp
07f7ce1da2 gl_rasterizer_cache: Use texture buffers to emulate texture buffers 2019-06-20 21:36:12 -03:00
ReinUsesLisp
b8c75a845b maxwell_3d: Partially implement texture buffers as 1D textures 2019-06-20 21:36:12 -03:00
ReinUsesLisp
6c81c8f5b7 gl_shader_decompiler: Allow 1D textures to be texture buffers 2019-06-20 21:36:12 -03:00
ReinUsesLisp
4e81fc8296 shader: Implement texture buffers 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
d267948a73 texture_cache: loose TryReconstructSurface when accurate GPU is not on.
Also corrects some asserts.
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
6162cb922e texture_cache: Document the most important methods. 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
4530511ee4 texture_cache: Try to Reconstruct Surface on bigger than overlap.
This fixes clouds in SMO Cap Kingdom and lens on Cloud Kingdom.
Also moved accurate_gpu setting check to Pick Strategy
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
a79831d9d0 texture_cache: Implement Guard mechanism 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
7731a0e2d1 texture_cache: General Fixes
Fixed ASTC mipmaps loading
Fixed alignment on openGL upload/download
Fixed Block Height Calculation
Removed unalign_height
2019-06-20 21:36:12 -03:00
ReinUsesLisp
c2ed348bdd surface_params: Ensure pitch is always written to avoid surface leaks 2019-06-20 21:36:12 -03:00
ReinUsesLisp
9098905dd1 gl_framebuffer_cache: Use a hashed struct to cache framebuffers 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
d65a4af895 texture_cache return invalid buffer on deactivated color_mask 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
6bd034eae9 engine_upload: Addapt to new Texture Cache 2019-06-20 21:36:12 -03:00
ReinUsesLisp
2131f71573 surface_params: Optimize CreateForTexture
Instead of using Common::AlignUp, use Common::AlignBits to align the
texture compression factor.
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
41b4674458 gl_texture_cache: Make main views be proxy textures instead of a full view. 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
07cc7e0c12 texture_cache: Add ASync Protections 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
1bbc9debfb Remove Framebuffer reconfiguration and restrict rendertarget protection 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
5192521dc3 texture_cache: Implement GPU Dirty Flags 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
94f2be5473 texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
a4a58be2d4 texture_cache: Implement L1_Inner_cache 2019-06-20 21:36:12 -03:00
ReinUsesLisp
345e73f2fe video_core: Use un-shifted block sizes to avoid integer divisions
Instead of storing all block width, height and depths in their shifted
form:

block_width = 1U << block_shift;

Store them like they are provided by the emulated hardware (their
block_shift form). This way we can avoid doing the costly
Common::AlignUp operation to align texture sizes and drop CPU integer
divisions with bitwise logic (defined in Common::AlignBits).
2019-06-20 21:36:12 -03:00
ReinUsesLisp
28d7c2f5a5 texture_cache: Change internal cache from lists to vectors 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
b347543e83 Reduce amount of size calculations. 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
4e2071b6d9 texture_cache: Correct premature texceptions
Due to our current infrastructure, it is possible for a mipmap to be set 
on as a render target before a texception of that mipmap's superset be 
set afterwards. This is problematic as we rely on texture views to set 
up texceptions and protecting render targets targets for 3D texture 
rendering.

One simple solution is to configure framebuffers after texture setup but 
this brings other problems. This solution, forces a reconfiguration of 
the framebuffers after such event happens.
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
ba677ccb5a texture_cache: Implement guest flushing 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
de0b1cb2b2 Fixes to mipmap's process and reconstruct process 2019-06-20 21:36:12 -03:00
ReinUsesLisp
e0002599ac surface_base: Add parenthesis to EmplaceOverview's predicate 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
324e470879 Texture Cache: Implement Blitting and Fermi Copies 2019-06-20 21:36:12 -03:00
ReinUsesLisp
549fd18ac4 surface_view: Add constructor for ViewParams 2019-06-20 21:36:12 -03:00
ReinUsesLisp
16e8625a30 surface_base: Split BreakDown into layered and non-layered variants 2019-06-20 21:36:12 -03:00
ReinUsesLisp
2b30000a1e surface_base: Silence truncation warnings and minor renames and reordering 2019-06-20 21:36:12 -03:00
ReinUsesLisp
03d10ea3b4 copy_params: Use constructor instead of C-like initialization 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
1af4414861 Correct Mipmaps View method in Texture Cache 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
d86f9cd709 Change texture_cache chaching from GPUAddr to CacheAddr
This also reverses the changes to make invalidation and flushing through
the GPU address.
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
b711cdce78 Corrections to Structural Matching
The texture will now be reconstructed if the width only matches on GoB 
alignment.
2019-06-20 21:36:12 -03:00
Fernando Sahmkow
bc930754cc Implement Texture Cache V2 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
3d471e732d Correct Surface Base and Views for new Texture Cache 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
3b26206dbd Add OGLTextureView 2019-06-20 21:36:12 -03:00
Fernando Sahmkow
6b0695b3cd Deglobalize Memory Manager on texture cahe and Implement Invalidation and Flushing using GPUVAddr 2019-06-20 21:36:11 -03:00
ReinUsesLisp
6c410104f4 texture_cache: Remove execution context copies from the texture cache
This is done to simplify the OpenGL implementation, it is needed for
Vulkan.
2019-06-20 21:36:11 -03:00
ReinUsesLisp
fa59a7b4d8 gl_texture_cache: Implement fermi copies 2019-06-20 21:36:11 -03:00
ReinUsesLisp
1b4503c571 texture_cache: Split texture cache into different files 2019-06-20 21:36:11 -03:00
ReinUsesLisp
5f3aacdc37 texture_cache: Move staging buffer into a generic implementation 2019-06-20 21:36:11 -03:00
ReinUsesLisp
2787a0c287 texture_cache: Flush 3D textures in the order they are drawn 2019-06-20 21:36:11 -03:00
ReinUsesLisp
4b396f375c gl_texture_cache: Minor changes 2019-06-20 21:36:11 -03:00
ReinUsesLisp
0cefb7bcb4 gl_texture_cache: Add copy from multiple overlaps into a single surface 2019-06-20 21:36:11 -03:00
ReinUsesLisp
84139586c9 gl_texture_cache: Attach surface textures instead of views 2019-06-20 21:36:11 -03:00
ReinUsesLisp
fb94871791 gl_texture_cache: Add fast copy path 2019-06-20 21:36:11 -03:00
ReinUsesLisp
bab21e8cb3 gl_texture_cache: Initial implementation 2019-06-20 21:36:11 -03:00
63 changed files with 4196 additions and 3269 deletions

View File

@@ -70,6 +70,7 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"
@@ -74,6 +75,7 @@ add_library(common STATIC
assert.h
detached_tasks.cpp
detached_tasks.h
binary_find.h
bit_field.h
bit_util.h
cityhash.cpp

View File

@@ -19,6 +19,12 @@ constexpr T AlignDown(T value, std::size_t size) {
return static_cast<T>(value - value % size);
}
template <typename T>
constexpr T AlignBits(T value, std::size_t align) {
static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align);
}
template <typename T>
constexpr bool Is4KBAligned(T value) {
static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");

21
src/common/binary_find.h Normal file
View File

@@ -0,0 +1,21 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
namespace Common {
template <class ForwardIt, class T, class Compare = std::less<>>
ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
// Note: BOTH type T and the type after ForwardIt is dereferenced
// must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
// This is stricter than lower_bound requirement (see above)
first = std::lower_bound(first, last, value, comp);
return first != last && !comp(value, *first) ? first : last;
}
} // namespace Common

View File

@@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) {
}
#endif
#ifdef _MSC_VER
inline u32 MostSignificantBit32(const u32 value) {
unsigned long result;
_BitScanReverse(&result, value);
return static_cast<u32>(result);
}
inline u32 MostSignificantBit64(const u64 value) {
unsigned long result;
_BitScanReverse64(&result, value);
return static_cast<u32>(result);
}
#else
inline u32 MostSignificantBit32(const u32 value) {
return 31U - static_cast<u32>(__builtin_clz(value));
}
inline u32 MostSignificantBit64(const u64 value) {
return 63U - static_cast<u32>(__builtin_clzll(value));
}
#endif
inline u32 Log2Floor32(const u32 value) {
return MostSignificantBit32(value);
}
inline u32 Log2Ceil32(const u32 value) {
const u32 log2_f = Log2Floor32(value);
return log2_f + ((value ^ (1U << log2_f)) != 0U);
}
inline u32 Log2Floor64(const u64 value) {
return MostSignificantBit64(value);
}
inline u32 Log2Ceil64(const u64 value) {
const u64 log2_f = static_cast<u64>(Log2Floor64(value));
return static_cast<u32>(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL));
}
} // namespace Common

View File

@@ -4,6 +4,7 @@
#pragma once
#include <algorithm>
#include <string>
#if !defined(ARCHITECTURE_x86_64)

View File

@@ -41,12 +41,12 @@ add_library(video_core STATIC
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_framebuffer_cache.cpp
renderer_opengl/gl_framebuffer_cache.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
renderer_opengl/gl_rasterizer_cache.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_sampler_cache.cpp
@@ -67,6 +67,8 @@ add_library(video_core STATIC
renderer_opengl/gl_state.h
renderer_opengl/gl_stream_buffer.cpp
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
@@ -88,6 +90,7 @@ add_library(video_core STATIC
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/texture.cpp
shader/decode/image.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp
@@ -109,6 +112,13 @@ add_library(video_core STATIC
shader/track.cpp
surface.cpp
surface.h
texture_cache/surface_base.cpp
texture_cache/surface_base.h
texture_cache/surface_params.cpp
texture_cache/surface_params.h
texture_cache/surface_view.cpp
texture_cache/surface_view.h
texture_cache/texture_cache.h
textures/astc.cpp
textures/astc.h
textures/convert.cpp
@@ -116,8 +126,6 @@ add_library(video_core STATIC
textures/decoders.cpp
textures/decoders.h
textures/texture.h
texture_cache.cpp
texture_cache.h
video_core.cpp
video_core.h
)

View File

@@ -36,10 +36,10 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
tmp_buffer.resize(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,

View File

@@ -39,15 +39,15 @@ struct Registers {
}
u32 BlockWidth() const {
return 1U << block_width.Value();
return block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
return block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
return block_depth.Value();
}
} dest;
};

View File

@@ -4,7 +4,6 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -35,21 +34,31 @@ void Fermi2D::HandleSurfaceCopy() {
static_cast<u32>(regs.operation));
// TODO(Subv): Only raw copies are implemented.
ASSERT(regs.operation == Regs::Operation::SrcCopy);
ASSERT(regs.operation == Operation::SrcCopy);
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
const u32 src_blit_x2{
static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
const u32 src_blit_y2{
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
u32 src_blit_x2, src_blit_y2;
if (regs.blit_control.origin == Origin::Corner) {
src_blit_x2 =
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
src_blit_y2 =
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
} else {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
Config copy_config;
copy_config.operation = regs.operation;
copy_config.filter = regs.blit_control.filter;
copy_config.src_rect = src_rect;
copy_config.dst_rect = dst_rect;
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
UNIMPLEMENTED();
}
}

View File

@@ -9,6 +9,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/gpu.h"
namespace Tegra {
@@ -38,6 +39,26 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
enum class Origin : u32 {
Center = 0,
Corner = 1,
};
enum class Filter : u32 {
PointSample = 0, // Nearest
Linear = 1,
};
enum class Operation : u32 {
SrcCopyAnd = 0,
ROPAnd = 1,
Blend = 2,
SrcCopy = 3,
ROP = 4,
SrcCopyPremult = 5,
BlendPremult = 6,
};
struct Regs {
static constexpr std::size_t NUM_REGS = 0x258;
@@ -63,32 +84,19 @@ public:
}
u32 BlockWidth() const {
// The block width is stored in log2 format.
return 1 << block_width;
return block_width.Value();
}
u32 BlockHeight() const {
// The block height is stored in log2 format.
return 1 << block_height;
return block_height.Value();
}
u32 BlockDepth() const {
// The block depth is stored in log2 format.
return 1 << block_depth;
return block_depth.Value();
}
};
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
enum class Operation : u32 {
SrcCopyAnd = 0,
ROPAnd = 1,
Blend = 2,
SrcCopy = 3,
ROP = 4,
SrcCopyPremult = 5,
BlendPremult = 6,
};
union {
struct {
INSERT_PADDING_WORDS(0x80);
@@ -105,7 +113,11 @@ public:
INSERT_PADDING_WORDS(0x177);
u32 blit_control;
union {
u32 raw;
BitField<0, 1, Origin> origin;
BitField<4, 1, Filter> filter;
} blit_control;
INSERT_PADDING_WORDS(0x8);
@@ -124,6 +136,13 @@ public:
};
} regs{};
struct Config {
Operation operation;
Filter filter;
Common::Rectangle<u32> src_rect;
Common::Rectangle<u32> dst_rect;
};
private:
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;

View File

@@ -430,14 +430,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
const auto r_type = tic_entry.r_type.Value();
const auto g_type = tic_entry.g_type.Value();
const auto b_type = tic_entry.b_type.Value();
const auto a_type = tic_entry.a_type.Value();
const auto r_type{tic_entry.r_type.Value()};
const auto g_type{tic_entry.g_type.Value()};
const auto b_type{tic_entry.b_type.Value()};
const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);

View File

@@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() {
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.BlockDepth() == 1);
ASSERT(regs.dst_params.BlockDepth() == 0);
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;

View File

@@ -59,11 +59,11 @@ public:
};
u32 BlockHeight() const {
return 1 << block_height;
return block_height.Value();
}
u32 BlockDepth() const {
return 1 << block_depth;
return block_depth.Value();
}
};

View File

@@ -4,6 +4,7 @@
#pragma once
#include <array>
#include <bitset>
#include <optional>
#include <tuple>
@@ -126,6 +127,15 @@ union Sampler {
u64 value{};
};
union Image {
Image() = default;
constexpr explicit Image(u64 value) : value{value} {}
BitField<36, 13, u64> index;
u64 value;
};
} // namespace Tegra::Shader
namespace std {
@@ -344,6 +354,26 @@ enum class TextureMiscMode : u64 {
PTP,
};
enum class SurfaceDataMode : u64 {
P = 0,
D_BA = 1,
};
enum class OutOfBoundsStore : u64 {
Ignore = 0,
Clamp = 1,
Trap = 2,
};
enum class ImageType : u64 {
Texture1D = 0,
TextureBuffer = 1,
Texture1DArray = 2,
Texture2D = 3,
Texture2DArray = 4,
Texture3D = 5,
};
enum class IsberdMode : u64 {
None = 0,
Patch = 1,
@@ -398,7 +428,7 @@ enum class LmemLoadCacheManagement : u64 {
CV = 3,
};
enum class LmemStoreCacheManagement : u64 {
enum class StoreCacheManagement : u64 {
Default = 0,
CG = 1,
CS = 2,
@@ -811,7 +841,7 @@ union Instruction {
} ld_l;
union {
BitField<44, 2, LmemStoreCacheManagement> cache_management;
BitField<44, 2, StoreCacheManagement> cache_management;
} st_l;
union {
@@ -1231,6 +1261,20 @@ union Instruction {
}
} texs;
union {
BitField<28, 1, u64> is_array;
BitField<29, 2, TextureType> texture_type;
BitField<35, 1, u64> aoffi;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> ms; // Multisample?
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
union {
BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
@@ -1280,6 +1324,35 @@ union Instruction {
}
} tlds;
union {
BitField<24, 2, StoreCacheManagement> cache_management;
BitField<33, 3, ImageType> image_type;
BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
BitField<51, 1, u64> is_immediate;
BitField<52, 1, SurfaceDataMode> mode;
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
constexpr std::array<u8, 16> mask = {
0, (R), (G), (R | G), (B), (R | B),
(G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
(B | A), (R | B | A), (G | B | A), (R | G | B | A)};
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
} sust;
union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1371,6 +1444,7 @@ union Instruction {
Attribute attribute;
Sampler sampler;
Image image;
u64 value;
};
@@ -1408,11 +1482,13 @@ public:
TXQ, // Texture Query
TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
EXIT,
IPA,
OUT_R, // Emit vertex/primitive
@@ -1543,6 +1619,7 @@ public:
Synch,
Memory,
Texture,
Image,
FloatSet,
FloatSetPredicate,
IntegerSet,
@@ -1682,11 +1759,13 @@ private:
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),

View File

@@ -202,11 +202,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
}
bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
const GPUVAddr end = start + size;
const std::size_t inner_size = size - 1;
const GPUVAddr end = start + inner_size;
const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
return range == size;
return range == inner_size;
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {

View File

@@ -10,6 +10,10 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
enum class LoadCallbackStage {
@@ -46,8 +50,7 @@ public:
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
const Tegra::Engines::Fermi2D::Config& copy_config) {
return false;
}

View File

@@ -0,0 +1,75 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include "common/cityhash.h"
#include "common/scope_exit.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
const auto [entry, is_cache_miss] = cache.try_emplace(key);
auto& framebuffer{entry->second};
if (is_cache_miss) {
framebuffer = CreateFramebuffer(key);
}
return framebuffer.handle;
}
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
OGLFramebuffer framebuffer;
framebuffer.Create();
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
local_state.draw.draw_framebuffer = framebuffer.handle;
local_state.ApplyFramebufferState();
if (key.is_single_buffer) {
if (key.color_attachments[0] != GL_NONE && key.colors[0]) {
key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER);
glDrawBuffer(key.color_attachments[0]);
} else {
glDrawBuffer(GL_NONE);
}
} else {
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
if (key.colors[index]) {
key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
GL_DRAW_FRAMEBUFFER);
}
}
glDrawBuffers(key.colors_count, key.color_attachments.data());
}
if (key.zeta) {
key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
GL_DRAW_FRAMEBUFFER);
}
return framebuffer;
}
std::size_t FramebufferCacheKey::Hash() const {
static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
return static_cast<std::size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
}
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors,
zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count,
rhs.color_attachments, rhs.colors, rhs.zeta);
}
} // namespace OpenGL

View File

@@ -0,0 +1,68 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <cstddef>
#include <unordered_map>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
struct alignas(sizeof(u64)) FramebufferCacheKey {
bool is_single_buffer = false;
bool stencil_enable = false;
u16 colors_count = 0;
std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
View zeta;
std::size_t Hash() const;
bool operator==(const FramebufferCacheKey& rhs) const;
bool operator!=(const FramebufferCacheKey& rhs) const {
return !operator==(rhs);
}
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::FramebufferCacheKey> {
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace OpenGL {
class FramebufferCacheOpenGL {
public:
FramebufferCacheOpenGL();
~FramebufferCacheOpenGL();
GLuint GetFramebuffer(const FramebufferCacheKey& key);
private:
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
OpenGLState local_state;
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
};
} // namespace OpenGL

View File

@@ -29,8 +29,10 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
using SurfaceType = VideoCore::Surface::SurfaceType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -78,29 +80,9 @@ struct DrawParameters {
}
};
struct FramebufferCacheKey {
bool is_single_buffer = false;
bool stencil_enable = false;
std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
u32 colors_count = 0;
GLuint zeta = 0;
auto Tie() const {
return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
zeta);
}
bool operator<(const FramebufferCacheKey& rhs) const {
return Tie() < rhs.Tie();
}
};
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system, emu_window, device},
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
global_cache{*this}, system{system}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
OpenGLState::ApplyDefaultState();
@@ -121,11 +103,6 @@ void RasterizerOpenGL::CheckExtensions() {
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_buffer_storage) {
LOG_WARNING(
Render_OpenGL,
"Buffer storage control is not supported! This can cause performance degradation.");
}
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
@@ -302,8 +279,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
SetupDrawConstBuffers(stage_enum, shader);
SetupGlobalRegions(stage_enum, shader);
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -321,11 +304,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupDrawConstBuffers(stage_enum, shader);
SetupGlobalRegions(stage_enum, shader);
SetupTextures(stage_enum, shader, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -351,44 +329,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
gpu.dirty_flags.shaders = false;
}
void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
OpenGLState& current_state) {
const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
auto& framebuffer = entry->second;
if (is_cache_miss)
framebuffer.Create();
current_state.draw.draw_framebuffer = framebuffer.handle;
current_state.ApplyFramebufferState();
if (!is_cache_miss)
return;
if (fbkey.is_single_buffer) {
if (fbkey.color_attachments[0] != GL_NONE) {
glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
0);
}
glDrawBuffer(fbkey.color_attachments[0]);
} else {
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
if (fbkey.colors[index]) {
glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
fbkey.colors[index], 0);
}
}
glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
}
if (fbkey.zeta) {
GLenum zeta_attachment =
fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
}
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = system.GPU().Maxwell3D().regs;
@@ -478,9 +418,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
}
current_framebuffer_config_state = fb_config_state;
Surface depth_surface;
texture_cache.GuardRenderTargets(true);
View depth_surface{};
if (using_depth_fb) {
depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
} else {
texture_cache.SetEmptyDepthBuffer();
}
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -493,13 +437,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
if (using_color_fb) {
if (single_color_target) {
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
Surface color_surface =
res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
View color_surface{
texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)};
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
texture_cache.MarkColorBufferInUse(*single_color_target);
// Workaround for and issue in nvidia drivers
// https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
@@ -508,16 +452,21 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
fbkey.is_single_buffer = true;
fbkey.color_attachments[0] =
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
fbkey.colors[0] = color_surface;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
if (index != *single_color_target) {
texture_cache.SetEmptyColorBuffer(index);
}
}
} else {
// Multiple color attachments are enabled
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)};
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
texture_cache.MarkColorBufferInUse(index);
// Enable sRGB only for supported formats
// Workaround for and issue in nvidia drivers
// https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
@@ -527,8 +476,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
fbkey.color_attachments[index] =
GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
fbkey.colors[index] =
color_surface != nullptr ? color_surface->Texture().handle : 0;
fbkey.colors[index] = color_surface;
}
fbkey.is_single_buffer = false;
fbkey.colors_count = regs.rt_control.count;
@@ -541,14 +489,16 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
depth_surface->MarkAsModified(true, res_cache);
texture_cache.MarkDepthBufferInUse();
fbkey.zeta = depth_surface->Texture().handle;
fbkey.zeta = depth_surface;
fbkey.stencil_enable = regs.stencil_enable &&
depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
}
SetupCachedFramebuffer(fbkey, current_state);
texture_cache.GuardRenderTargets(false);
current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
SyncViewport(current_state);
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
@@ -630,6 +580,7 @@ void RasterizerOpenGL::Clear() {
clear_state.ApplyDepth();
clear_state.ApplyStencilTest();
clear_state.ApplyViewport();
clear_state.ApplyFramebufferState();
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -652,7 +603,6 @@ void RasterizerOpenGL::DrawArrays() {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
ConfigureFramebuffers(state);
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -697,16 +647,22 @@ void RasterizerOpenGL::DrawArrays() {
SetupVertexBuffer(vao);
DrawParameters params = SetupDraw();
texture_cache.GuardSamplers(true);
SetupShaders(params.primitive_mode);
texture_cache.GuardSamplers(false);
ConfigureFramebuffers(state);
buffer_cache.Unmap();
shader_program_manager->ApplyTo(state);
state.Apply();
res_cache.SignalPreDrawCall();
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
params.DispatchDraw();
res_cache.SignalPostDrawCall();
accelerate_draw = AccelDraw::Disabled;
}
@@ -718,7 +674,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
if (!addr || !size) {
return;
}
res_cache.FlushRegion(addr, size);
texture_cache.FlushRegion(addr, size);
global_cache.FlushRegion(addr, size);
}
@@ -727,23 +683,24 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
if (!addr || !size) {
return;
}
res_cache.InvalidateRegion(addr, size);
texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
FlushRegion(addr, size);
if (Settings::values.use_accurate_gpu_emulation) {
FlushRegion(addr, size);
}
InvalidateRegion(addr, size);
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
texture_cache.DoFermiCopy(src, dst, copy_config);
return true;
}
@@ -755,7 +712,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
const auto surface{
texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}
@@ -771,7 +729,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
}
screen_info.display_texture = surface->Texture().handle;
screen_info.display_texture = surface->GetTexture();
return true;
}
@@ -837,8 +795,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -847,6 +805,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
Tegra::Texture::FullTextureInfo texture;
@@ -860,18 +820,26 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
}
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
auto& unit{state.texture_units[current_bindpoint]};
unit.sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
surface->Texture(entry.IsArray()).handle;
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) {
if (view->GetSurfaceParams().IsBuffer()) {
// Record that this texture is a texture buffer.
texture_buffer_usage.set(bindpoint);
} else {
// Apply swizzle to textures that are not buffers.
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
}
state.texture_units[current_bindpoint].texture = view->GetTexture();
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
state.texture_units[current_bindpoint].texture = 0;
unit.texture = 0;
}
}
return texture_buffer_usage;
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {

View File

@@ -23,14 +23,15 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
namespace Core {
@@ -41,11 +42,14 @@ namespace Core::Frontend {
class EmuWindow;
}
namespace Tegra {
class MemoryManager;
}
namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
@@ -61,8 +65,7 @@ public:
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) override;
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
@@ -95,6 +98,8 @@ private:
/**
* Configures the color and depth framebuffer states.
* @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure
* again. Used by the texture cache to solve texception conflicts
* @param use_color_fb If true, configure color framebuffers.
* @param using_depth_fb If true, configure the depth/stencil framebuffer.
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
@@ -118,9 +123,10 @@ private:
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings);
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
@@ -181,10 +187,11 @@ private:
const Device device;
OpenGLState state;
RasterizerCacheOpenGL res_cache;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
SamplerCacheOpenGL sampler_cache;
FramebufferCacheOpenGL framebuffer_cache;
Core::System& system;
ScreenInfo& screen_info;
@@ -195,7 +202,6 @@ private:
OGLVertexArray>
vertex_array_cache;
std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
FramebufferConfigState current_framebuffer_config_state;
std::pair<bool, bool> current_depth_stencil_usage{};
@@ -218,8 +224,6 @@ private:
void SetupShaders(GLenum primitive_mode);
void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;

File diff suppressed because it is too large Load Diff

View File

@@ -1,572 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace OpenGL {
class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct SurfaceParams {
enum class SurfaceClass {
Uploaded,
RenderTarget,
DepthBuffer,
Copy,
};
static std::string SurfaceTargetName(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
return "Texture1D";
case SurfaceTarget::Texture2D:
return "Texture2D";
case SurfaceTarget::Texture3D:
return "Texture3D";
case SurfaceTarget::Texture1DArray:
return "Texture1DArray";
case SurfaceTarget::Texture2DArray:
return "Texture2DArray";
case SurfaceTarget::TextureCubemap:
return "TextureCubemap";
case SurfaceTarget::TextureCubeArray:
return "TextureCubeArray";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return fmt::format("TextureUnknown({})", static_cast<u32>(target));
}
}
u32 GetFormatBpp() const {
return VideoCore::Surface::GetFormatBpp(pixel_format);
}
/// Returns the rectangle corresponding to this surface
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
const size_t uncompressed_size{
Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
height, depth, block_height, block_depth)};
// Divide by compression_factor^2, as height and width are factored by this
return uncompressed_size / (compression_factor * compression_factor);
}
/// Returns the size of this surface as an OpenGL texture in bytes
std::size_t SizeInBytesGL() const {
return SizeInBytesRaw(true);
}
/// Returns the size of this surface as a cube face in bytes
std::size_t SizeInBytesCubeFace() const {
return size_in_bytes / 6;
}
/// Returns the size of this surface as an OpenGL cube face in bytes
std::size_t SizeInBytesCubeFaceGL() const {
return size_in_bytes_gl / 6;
}
/// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
std::size_t MemorySize() const {
std::size_t size = InnerMemorySize(false, is_layered);
if (is_layered)
return size * depth;
return size;
}
/// Returns true if the parameters constitute a valid rasterizer surface.
bool IsValid() const {
return gpu_addr && host_ptr && height && width;
}
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
/// mipmaps.
std::size_t LayerMemorySize() const {
return InnerMemorySize(false, true);
}
/// Returns the size of a layer of this surface in OpenGL.
std::size_t LayerSizeGL(u32 mip_level) const {
return InnerMipmapMemorySize(mip_level, true, is_layered, false);
}
std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
if (is_layered)
return size * depth;
return size;
}
std::size_t GetMipmapLevelOffset(u32 mip_level) const {
std::size_t offset = 0;
for (u32 i = 0; i < mip_level; i++)
offset += InnerMipmapMemorySize(i, false, is_layered);
return offset;
}
std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
std::size_t offset = 0;
for (u32 i = 0; i < mip_level; i++)
offset += InnerMipmapMemorySize(i, true, is_layered);
return offset;
}
std::size_t GetMipmapSingleSize(u32 mip_level) const {
return InnerMipmapMemorySize(mip_level, false, is_layered);
}
u32 MipWidth(u32 mip_level) const {
return std::max(1U, width >> mip_level);
}
u32 MipWidthGobAligned(u32 mip_level) const {
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
}
u32 MipHeight(u32 mip_level) const {
return std::max(1U, height >> mip_level);
}
u32 MipDepth(u32 mip_level) const {
return is_layered ? depth : std::max(1U, depth >> mip_level);
}
// Auto block resizing algorithm from:
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
u32 MipBlockHeight(u32 mip_level) const {
if (mip_level == 0)
return block_height;
u32 alt_height = MipHeight(mip_level);
u32 h = GetDefaultBlockHeight(pixel_format);
u32 blocks_in_y = (alt_height + h - 1) / h;
u32 bh = 16;
while (bh > 1 && blocks_in_y <= bh * 4) {
bh >>= 1;
}
return bh;
}
u32 MipBlockDepth(u32 mip_level) const {
if (mip_level == 0) {
return block_depth;
}
if (is_layered) {
return 1;
}
const u32 mip_depth = MipDepth(mip_level);
u32 bd = 32;
while (bd > 1 && mip_depth * 2 <= bd) {
bd >>= 1;
}
if (bd == 32) {
const u32 bh = MipBlockHeight(mip_level);
if (bh >= 4) {
return 16;
}
}
return bd;
}
u32 RowAlign(u32 mip_level) const {
const u32 m_width = MipWidth(mip_level);
const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
return (1U << l2);
}
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
/// Creates SurfaceParams from a framebuffer configuration
static SurfaceParams CreateForFramebuffer(std::size_t index);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
/// Creates SurfaceParams for a Fermi2D surface copy
static SurfaceParams CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config);
/// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const {
if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
other.depth, other.is_tiled)) {
if (!is_tiled)
return true;
return std::tie(block_height, block_depth, tile_width_spacing) ==
std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
}
return false;
}
/// Initializes parameters for caching, should be called after everything has been initialized
void InitCacheParameters(GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
return "3D";
case SurfaceTarget::Texture1DArray:
return "1DArray";
case SurfaceTarget::Texture2DArray:
return "2DArray";
case SurfaceTarget::TextureCubemap:
return "Cube";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return fmt::format("TUK({})", static_cast<u32>(target));
}
}
std::string ClassName() const {
switch (identity) {
case SurfaceClass::Uploaded:
return "UP";
case SurfaceClass::RenderTarget:
return "RT";
case SurfaceClass::DepthBuffer:
return "DB";
case SurfaceClass::Copy:
return "CP";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
UNREACHABLE();
return fmt::format("CUK({})", static_cast<u32>(identity));
}
}
std::string IdentityString() const {
return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
}
bool is_tiled;
u32 block_width;
u32 block_height;
u32 block_depth;
u32 tile_width_spacing;
PixelFormat pixel_format;
ComponentType component_type;
SurfaceType type;
u32 width;
u32 height;
u32 depth;
u32 unaligned_height;
u32 pitch;
SurfaceTarget target;
SurfaceClass identity;
u32 max_mip_level;
bool is_layered;
bool is_array;
bool srgb_conversion;
// Parameters used for caching
u8* host_ptr;
GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
// Render target specific parameters, not used in caching
struct {
u32 index;
u32 array_mode;
u32 volume;
u32 layer_stride;
u32 base_layer;
} rt;
private:
std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
bool uncompressed = false) const;
std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
bool uncompressed = false) const;
};
}; // namespace OpenGL
/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res;
res.state = params;
res.state.identity = {}; // Ignore the origin of the texture
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
res.state.rt = {}; // Ignore rt config in caching
return res;
}
};
namespace std {
template <>
struct hash<SurfaceReserveKey> {
std::size_t operator()(const SurfaceReserveKey& k) const {
return k.Hash();
}
};
} // namespace std
namespace OpenGL {
class RasterizerOpenGL;
// This is used to store temporary big buffers,
// instead of creating/destroying all the time
struct RasterizerTemporaryMemory {
std::vector<std::vector<u8>> gl_buffer;
};
class CachedSurface final : public RasterizerCacheObject {
public:
explicit CachedSurface(const SurfaceParams& params);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return cached_size_in_bytes;
}
std::size_t GetMemorySize() const {
return memory_size;
}
const OGLTexture& Texture() const {
return texture;
}
const OGLTexture& Texture(bool as_array) {
if (params.is_array == as_array) {
return texture;
} else {
EnsureTextureDiscrepantView();
return discrepant_view;
}
}
GLenum Target() const {
return gl_target;
}
const SurfaceParams& GetSurfaceParams() const {
return params;
}
// Read/Write data in Switch memory to/from gl_buffer
void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
// Upload data in gl_buffer to this surface's texture
void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
GLuint draw_fb_handle);
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
void MarkReinterpreted() {
reinterpreted = true;
}
bool IsReinterpreted() const {
return reinterpreted;
}
void MarkForReload(bool reload) {
must_reload = reload;
}
bool MustReload() const {
return must_reload;
}
bool IsUploaded() const {
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
}
private:
void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
GLuint read_fb_handle, GLuint draw_fb_handle);
void EnsureTextureDiscrepantView();
OGLTexture texture;
OGLTexture discrepant_view;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
VAddr cpu_addr{};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
public:
explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
/// Get the depth surface based on the framebuffer configuration
Surface GetDepthBufferSurface(bool preserve_contents);
/// Get the color surface based on the framebuffer configuration and the specified render target
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Tries to find a framebuffer using on the provided CPU address
Surface TryFindFramebufferSurface(const u8* host_ptr) const;
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect);
void SignalPreDrawCall();
void SignalPostDrawCall();
protected:
void FlushObjectInner(const Surface& object) override {
object->FlushGLBuffer(temporal_memory);
}
private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
/// Gets an uncached surface, creating it if need be
Surface GetUncachedSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters
Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
/// Reserves a unique surface that can be reused later
void ReserveSurface(const Surface& surface);
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
// returns true if the reinterpret was successful, false in case it was not.
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and
/// destroyed when used with different surface parameters.
std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
bool texception = false;
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
RasterizerTemporaryMemory temporal_memory;
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
object->GetCacheAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
SurfaceIntervalCache reinterpreted_surfaces;
void RegisterReinterpretSurface(Surface reinterpret_surface) {
auto interval = GetReinterpretInterval(reinterpret_surface);
reinterpreted_surfaces.insert({interval, reinterpret_surface});
reinterpret_surface->MarkReinterpreted();
}
Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
return pair.second;
}
return nullptr;
}
void Register(const Surface& object) override {
RasterizerCache<Surface>::Register(object);
}
/// Unregisters an object from the cache
void Unregister(const Surface& object) override {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
RasterizerCache<Surface>::Unregister(object);
}
};
} // namespace OpenGL

View File

@@ -33,6 +33,24 @@ void OGLTexture::Release() {
handle = 0;
}
void OGLTextureView::Create() {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
}
void OGLTextureView::Release() {
if (handle == 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
void OGLSampler::Create() {
if (handle != 0)
return;
@@ -130,6 +148,12 @@ void OGLBuffer::Release() {
handle = 0;
}
void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
}
void OGLSync::Create() {
if (handle != 0)
return;

View File

@@ -36,6 +36,31 @@ public:
GLuint handle = 0;
};
class OGLTextureView : private NonCopyable {
public:
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
~OGLTextureView() {
Release();
}
OGLTextureView& operator=(OGLTextureView&& o) noexcept {
Release();
handle = std::exchange(o.handle, 0);
return *this;
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
/// Deletes the internal OpenGL resource
void Release();
GLuint handle = 0;
};
class OGLSampler : private NonCopyable {
public:
OGLSampler() = default;
@@ -161,6 +186,9 @@ public:
/// Deletes the internal OpenGL resource
void Release();
// Converts the buffer into a stream copy buffer with a fixed size
void MakeStreamCopy(std::size_t buffer_size);
GLuint handle = 0;
};

View File

@@ -103,15 +103,22 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
std::size_t size = start_offset * sizeof(u64);
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if (instruction == 0 || (instruction >> 52) == 0x50b) {
if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
if (instruction == 0) {
break;
}
}
size += sizeof(u64);
offset++;
@@ -168,8 +175,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
@@ -186,6 +197,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
for (const auto& image : entries.images) {
source +=
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
}
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
if (!texture_buffer_usage.test(i)) {
continue;
}
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
}
if (program_type == Maxwell::ShaderProgram::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
@@ -261,20 +284,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
handle = GetGeometryShader(variant);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(primitive_mode, base_bindings);
program = TryLoadProgram(variant);
if (!program) {
program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
@@ -283,6 +304,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
handle = program->handle;
}
auto base_bindings{variant.base_bindings};
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -290,43 +312,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
return {handle, base_bindings};
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
auto& programs = entry->second;
switch (primitive_mode) {
switch (variant.primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines, variant);
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines_adjacency, variant);
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles, variant);
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles_adjacency, variant);
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
}
}
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode) {
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
const ProgramVariant& variant) {
if (target_program) {
return target_program->handle;
}
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
target_program = TryLoadProgram(primitive_mode, base_bindings);
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
target_program = TryLoadProgram(variant);
if (!target_program) {
target_program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
target_program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
@@ -334,18 +355,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
return target_program->handle;
};
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
BaseBindings base_bindings) const {
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
BaseBindings base_bindings) const {
return {unique_identifier, base_bindings, primitive_mode};
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
return usage;
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -411,8 +433,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
if (!shader) {
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
unspecialized.program_type, usage.bindings,
usage.primitive, true);
unspecialized.program_type, usage.variant, true);
}
std::scoped_lock lock(mutex);

View File

@@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <set>
#include <tuple>
@@ -67,8 +68,7 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
// Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -82,15 +82,14 @@ private:
CachedProgram triangles_adjacency;
};
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
GLuint GetGeometryShader(const ProgramVariant& variant);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode);
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
u8* host_ptr{};
VAddr cpu_addr{};
@@ -104,8 +103,8 @@ private:
std::string code;
std::unordered_map<BaseBindings, CachedProgram> programs;
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
std::unordered_map<ProgramVariant, CachedProgram> programs;
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
std::unordered_map<u32, GLuint> cbuf_resource_cache;
std::unordered_map<u32, GLuint> gmem_resource_cache;

View File

@@ -180,6 +180,7 @@ public:
DeclareGlobalMemory();
DeclareSamplers();
DeclarePhysicalAttributeReader();
DeclareImages();
code.AddLine("void execute_{}() {{", suffix);
++code.scope;
@@ -234,6 +235,9 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
}
for (const auto& gmem_pair : ir.GetGlobalMemory()) {
const auto& [base, usage] = gmem_pair;
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
@@ -453,9 +457,13 @@ private:
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
std::string sampler_type = [&sampler] {
const std::string name{GetSampler(sampler)};
const std::string description{"layout (binding = SAMPLER_BINDING_" +
std::to_string(sampler.GetIndex()) + ") uniform"};
std::string sampler_type = [&]() {
switch (sampler.GetType()) {
case Tegra::Shader::TextureType::Texture1D:
// Special cased, read below.
return "sampler1D";
case Tegra::Shader::TextureType::Texture2D:
return "sampler2D";
@@ -475,8 +483,19 @@ private:
sampler_type += "Shadow";
}
code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(),
sampler_type, GetSampler(sampler));
if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) {
// 1D textures can be aliased to texture buffers, hide the declarations behind a
// preprocessor flag and use one or the other from the GPU state. This has to be
// done because shaders don't have enough information to determine the texture type.
EmitIfdefIsBuffer(sampler);
code.AddLine("{} samplerBuffer {};", description, name);
code.AddLine("#else");
code.AddLine("{} {} {};", description, sampler_type, name);
code.AddLine("#endif");
} else {
// The other texture types (2D, 3D and cubes) don't have this issue.
code.AddLine("{} {} {};", description, sampler_type, name);
}
}
if (!samplers.empty()) {
code.AddNewLine();
@@ -516,6 +535,37 @@ private:
code.AddNewLine();
}
void DeclareImages() {
const auto& images{ir.GetImages()};
for (const auto& image : images) {
const std::string image_type = [&]() {
switch (image.GetType()) {
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
case Tegra::Shader::ImageType::TextureBuffer:
return "bufferImage";
case Tegra::Shader::ImageType::Texture1DArray:
return "image1DArray";
case Tegra::Shader::ImageType::Texture2D:
return "image2D";
case Tegra::Shader::ImageType::Texture2DArray:
return "image2DArray";
case Tegra::Shader::ImageType::Texture3D:
return "image3D";
default:
UNREACHABLE();
return "image1D";
}
}();
code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform "
"{} {};",
image.GetIndex(), image_type, GetImage(image));
}
if (!images.empty()) {
code.AddNewLine();
}
}
void VisitBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
if (const std::string expr = Visit(node); !expr.empty()) {
@@ -1439,13 +1489,61 @@ private:
else if (next < count)
expr += ", ";
}
// Store a copy of the expression without the lod to be used with texture buffers
std::string expr_buffer = expr;
if (meta->lod) {
expr += ", ";
expr += CastOperand(Visit(meta->lod), Type::Int);
}
expr += ')';
expr += GetSwizzle(meta->element);
return expr + GetSwizzle(meta->element);
expr_buffer += ')';
expr_buffer += GetSwizzle(meta->element);
const std::string tmp{code.GenerateTemporary()};
EmitIfdefIsBuffer(meta->sampler);
code.AddLine("float {} = {};", tmp, expr_buffer);
code.AddLine("#else");
code.AddLine("float {} = {};", tmp, expr);
code.AddLine("#endif");
return tmp;
}
std::string ImageStore(Operation operation) {
constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
std::string expr = "imageStore(";
expr += GetImage(meta.image);
expr += ", ";
const std::size_t coords_count{operation.GetOperandsCount()};
expr += constructors.at(coords_count - 1);
for (std::size_t i = 0; i < coords_count; ++i) {
expr += VisitOperand(operation, i, Type::Int);
if (i + 1 < coords_count) {
expr += ", ";
}
}
expr += "), ";
const std::size_t values_count{meta.values.size()};
UNIMPLEMENTED_IF(values_count != 4);
expr += "vec4(";
for (std::size_t i = 0; i < values_count; ++i) {
expr += Visit(meta.values.at(i));
if (i + 1 < values_count) {
expr += ", ";
}
}
expr += "));";
code.AddLine(expr);
return {};
}
std::string Branch(Operation operation) {
@@ -1688,6 +1786,8 @@ private:
&GLSLDecompiler::TextureQueryLod,
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::ImageStore,
&GLSLDecompiler::Branch,
&GLSLDecompiler::PushFlowStack,
&GLSLDecompiler::PopFlowStack,
@@ -1756,6 +1856,14 @@ private:
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
}
void EmitIfdefIsBuffer(const Sampler& sampler) {
code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
}
std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
}

View File

@@ -27,6 +27,7 @@ struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
@@ -74,6 +75,7 @@ struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};

View File

@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
Dump,
};
constexpr u32 NativeVersion = 1;
constexpr u32 NativeVersion = 4;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 12);
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
namespace {
@@ -332,11 +332,28 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
}
u32 images_count{};
if (!LoadObjectFromPrecompiled(images_count)) {
return {};
}
for (u32 i = 0; i < images_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
}
u32 global_memory_count{};
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
@@ -360,7 +377,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
@@ -400,6 +416,18 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
return false;
}
for (const auto& image : entries.images) {
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
return false;
}

View File

@@ -4,6 +4,7 @@
#pragma once
#include <bitset>
#include <optional>
#include <string>
#include <tuple>
@@ -30,22 +31,26 @@ class IOFile;
namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Allocated bindings used by an OpenGL shader program
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using TextureBufferUsage = std::bitset<64>;
/// Allocated bindings used by an OpenGL shader program.
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
u32 image{};
bool operator==(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
return std::tie(cbuf, gmem, sampler, image) ==
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
}
bool operator!=(const BaseBindings& rhs) const {
@@ -53,15 +58,29 @@ struct BaseBindings {
}
};
/// Describes how a shader is used
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
BaseBindings base_bindings;
GLenum primitive_mode{};
TextureBufferUsage texture_buffer_usage{};
bool operator==(const ProgramVariant& rhs) const {
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
}
bool operator!=(const ProgramVariant& rhs) const {
return !operator==(rhs);
}
};
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
BaseBindings bindings;
GLenum primitive{};
ProgramVariant variant;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, bindings, primitive) ==
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -76,7 +95,19 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
return static_cast<std::size_t>(bindings.cbuf) ^
(static_cast<std::size_t>(bindings.gmem) << 8) ^
(static_cast<std::size_t>(bindings.sampler) << 16) ^
(static_cast<std::size_t>(bindings.image) << 24);
}
};
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
(static_cast<std::size_t>(variant.primitive_mode) << 6);
}
};
@@ -84,7 +115,7 @@ template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
std::hash<OpenGL::ProgramVariant>()(usage.variant);
}
};
@@ -275,26 +306,17 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
bool LoadObjectFromPrecompiled(bool& object) {
u8 value;
const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
if (!read_ok) {
return false;
}
object = value != 0;
return true;
}
// Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// Stores whole precompiled cache which will be read from/saved to the precompiled cache file
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
bool tried_to_load{};
};

View File

@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
bool use_persistent)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
if (GLAD_GL_ARB_buffer_storage) {
if (use_persistent) {
persistent = true;
coherent = prefer_coherent;
const GLbitfield flags =

View File

@@ -13,7 +13,8 @@ namespace OpenGL {
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
bool use_persistent = true);
~OGLStreamBuffer();
GLuint GetHandle() const;

View File

@@ -0,0 +1,614 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/convert.h"
#include "video_core/textures/texture.h"
namespace OpenGL {
using Tegra::Texture::SwizzleSource;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::ComponentType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
namespace {
struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
ComponentType component_type;
bool compressed;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10U
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXN2UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
true}, // BC6H_UF16
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
true}, // BC6H_SF16
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
{GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
{GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
{GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
{GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
{GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
false}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
// Compressed sRGB formats
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT1_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT23_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT45_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // BC7U_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
false}, // Z16
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // S8Z24
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
ComponentType::Float, false}, // Z32FS8
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
ASSERT(component_type == format.component_type);
return format;
}
GLenum GetTextureTarget(const SurfaceTarget& target) {
switch (target) {
case SurfaceTarget::TextureBuffer:
return GL_TEXTURE_BUFFER;
case SurfaceTarget::Texture1D:
return GL_TEXTURE_1D;
case SurfaceTarget::Texture2D:
return GL_TEXTURE_2D;
case SurfaceTarget::Texture3D:
return GL_TEXTURE_3D;
case SurfaceTarget::Texture1DArray:
return GL_TEXTURE_1D_ARRAY;
case SurfaceTarget::Texture2DArray:
return GL_TEXTURE_2D_ARRAY;
case SurfaceTarget::TextureCubemap:
return GL_TEXTURE_CUBE_MAP;
case SurfaceTarget::TextureCubeArray:
return GL_TEXTURE_CUBE_MAP_ARRAY;
}
UNREACHABLE();
return {};
}
GLint GetSwizzleSource(SwizzleSource source) {
switch (source) {
case SwizzleSource::Zero:
return GL_ZERO;
case SwizzleSource::R:
return GL_RED;
case SwizzleSource::G:
return GL_GREEN;
case SwizzleSource::B:
return GL_BLUE;
case SwizzleSource::A:
return GL_ALPHA;
case SwizzleSource::OneInt:
case SwizzleSource::OneFloat:
return GL_ONE;
}
UNREACHABLE();
return GL_NONE;
}
void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
if (params.num_levels == 1) {
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
}
}
OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
OGLBuffer& texture_buffer) {
OGLTexture texture;
texture.Create(target);
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
break;
case SurfaceTarget::TextureBuffer:
texture_buffer.Create();
glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
nullptr, GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
params.height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
params.height, params.depth);
break;
default:
UNREACHABLE();
}
ApplyTextureDefaults(params, texture.handle);
return texture;
}
} // Anonymous namespace
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
internal_format = tuple.internal_format;
format = tuple.format;
type = tuple.type;
is_compressed = tuple.compressed;
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
main_view = CreateViewInner(
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
true);
}
CachedSurface::~CachedSurface() = default;
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Download);
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.emulated_levels; ++level) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
if (is_compressed) {
glGetCompressedTextureImage(texture.handle, level,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
} else {
glGetTextureImage(texture.handle, level, format, type,
static_cast<GLsizei>(params.GetHostMipmapSize(level)),
staging_buffer.data() + mip_offset);
}
}
}
void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.emulated_levels; ++level) {
UploadTextureMipmap(level, staging_buffer);
}
}
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
auto compression_type = params.GetCompressionType();
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
? params.GetConvertedMipmapOffset(level)
: params.GetHostMipmapLevelOffset(level);
const u8* buffer{staging_buffer.data() + mip_offset};
if (is_compressed) {
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
switch (params.target) {
case SurfaceTarget::Texture2D:
glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
internal_format, image_size, buffer);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
static_cast<GLsizei>(params.GetMipDepth(level)),
internal_format, image_size, buffer);
break;
case SurfaceTarget::TextureCubemap: {
const std::size_t layer_size{params.GetHostLayerSize(level)};
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)), 1,
internal_format, static_cast<GLsizei>(layer_size),
buffer);
buffer += layer_size;
}
break;
}
default:
UNREACHABLE();
}
} else {
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
buffer);
break;
case SurfaceTarget::TextureBuffer:
ASSERT(level == 0);
glNamedBufferSubData(texture_buffer.handle, 0,
params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
params.GetMipHeight(level), format, type, buffer);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureSubImage3D(
texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
static_cast<GLsizei>(params.GetMipHeight(level)),
static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
break;
case SurfaceTarget::TextureCubemap:
for (std::size_t face = 0; face < params.depth; ++face) {
glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
params.GetMipWidth(level), params.GetMipHeight(level), 1,
format, type, buffer);
buffer += params.GetHostLayerSize(level);
}
break;
default:
UNREACHABLE();
}
}
}
void CachedSurface::DecorateSurfaceName() {
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
}
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
}
View CachedSurface::CreateView(const ViewParams& view_key) {
return CreateViewInner(view_key, false);
}
View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
views[view_key] = view;
if (!is_proxy)
view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
return view;
}
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
const bool is_proxy)
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
target = GetTextureTarget(params.target);
if (!is_proxy) {
texture_view = CreateTextureView();
}
swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
}
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
ASSERT(params.num_layers == 1 && params.num_levels == 1);
const auto& owner_params = surface.GetSurfaceParams();
switch (owner_params.target) {
case SurfaceTarget::Texture1D:
glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
params.base_level);
break;
case SurfaceTarget::Texture2D:
glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
params.base_layer);
break;
default:
UNIMPLEMENTED();
}
}
void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
if (new_swizzle == swizzle)
return;
swizzle = new_swizzle;
const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
GetSwizzleSource(z_source),
GetSwizzleSource(w_source)};
const GLuint handle = GetTexture();
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
}
OGLTextureView CachedSurfaceView::CreateTextureView() const {
const auto& owner_params = surface.GetSurfaceParams();
OGLTextureView texture_view;
texture_view.Create();
const GLuint handle{texture_view.handle};
const FormatTuple& tuple{
GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
ApplyTextureDefaults(owner_params, handle);
return texture_view;
}
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
const Device& device)
: TextureCacheBase{system, rasterizer} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
TextureCacheOpenGL::~TextureCacheOpenGL() = default;
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
return std::make_shared<CachedSurface>(gpu_addr, params);
}
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
const VideoCommon::CopyParams& copy_params) {
const auto& src_params = src_surface->GetSurfaceParams();
const auto& dst_params = dst_surface->GetSurfaceParams();
if (src_params.type != dst_params.type) {
// A fallback is needed
return;
}
const auto src_handle = src_surface->GetTexture();
const auto src_target = src_surface->GetTarget();
const auto dst_handle = dst_surface->GetTexture();
const auto dst_target = dst_surface->GetTarget();
glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
copy_params.dest_z, copy_params.width, copy_params.height,
copy_params.depth);
}
void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle;
state.Apply();
u32 buffers{};
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
if (src_params.type == SurfaceType::ColorTexture) {
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
buffers = GL_COLOR_BUFFER_BIT;
} else if (src_params.type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (src_params.type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
}
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
const auto& src_params = src_surface->GetSurfaceParams();
const auto& dst_params = dst_surface->GetSurfaceParams();
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
const std::size_t source_size = src_surface->GetHostSizeInBytes();
const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
const std::size_t buffer_size = std::max(source_size, dest_size);
GLuint copy_pbo_handle = FetchPBO(buffer_size);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
if (source_format.compressed) {
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
nullptr);
} else {
glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
static_cast<GLsizei>(source_size), nullptr);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
const GLsizei width = static_cast<GLsizei>(dst_params.width);
const GLsizei height = static_cast<GLsizei>(dst_params.height);
const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
if (dest_format.compressed) {
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
UNREACHABLE();
} else {
switch (dst_params.target) {
case SurfaceTarget::Texture1D:
glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
dest_format.type, nullptr);
break;
case SurfaceTarget::Texture2D:
glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
dest_format.format, dest_format.type, nullptr);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
dest_format.format, dest_format.type, nullptr);
break;
case SurfaceTarget::TextureCubemap:
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
dest_format.format, dest_format.type, nullptr);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(dst_params.target));
UNREACHABLE();
}
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glTextureBarrier();
}
GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
OGLBuffer& cp = copy_pbo_cache[l2];
if (cp.handle == 0) {
const std::size_t ceil_size = 1ULL << l2;
cp.Create();
cp.MakeStreamCopy(ceil_size);
}
return cp.handle;
}
} // namespace OpenGL

View File

@@ -0,0 +1,143 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <functional>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
using VideoCommon::SurfaceParams;
using VideoCommon::ViewParams;
class CachedSurfaceView;
class CachedSurface;
class TextureCacheOpenGL;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
void DownloadTexture(std::vector<u8>& staging_buffer) override;
GLenum GetTarget() const {
return target;
}
GLuint GetTexture() const {
return texture.handle;
}
protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& view_key) override;
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
private:
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
GLenum internal_format{};
GLenum format{};
GLenum type{};
bool is_compressed{};
GLenum target{};
u32 view_count{};
OGLTexture texture;
OGLBuffer texture_buffer;
};
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
~CachedSurfaceView();
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
void Attach(GLenum attachment, GLenum target) const;
GLuint GetTexture() const {
if (is_proxy) {
return surface.GetTexture();
}
return texture_view.handle;
}
const SurfaceParams& GetSurfaceParams() const {
return surface.GetSurfaceParams();
}
void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
private:
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source) const {
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
}
OGLTextureView CreateTextureView() const;
CachedSurface& surface;
GLenum target{};
OGLTextureView texture_view;
u32 swizzle;
bool is_proxy;
};
class TextureCacheOpenGL final : public TextureCacheBase {
public:
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const Device& device);
~TextureCacheOpenGL();
protected:
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
void ImageCopy(Surface& src_surface, Surface& dst_surface,
const VideoCommon::CopyParams& copy_params) override;
void ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
private:
GLuint FetchPBO(std::size_t buffer_size);
OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
};
} // namespace OpenGL

View File

@@ -471,7 +471,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
}
}
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

View File

@@ -5,8 +5,10 @@
#include <string>
#include <fmt/format.h>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/scope_exit.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -63,4 +65,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
}
} // namespace OpenGL
} // namespace OpenGL

View File

@@ -32,4 +32,4 @@ private:
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL
} // namespace OpenGL

View File

@@ -935,6 +935,11 @@ private:
return {};
}
Id ImageStore(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1326,6 +1331,8 @@ private:
&SPIRVDecompiler::TextureQueryLod,
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::PushFlowStack,
&SPIRVDecompiler::PopFlowStack,

View File

@@ -169,6 +169,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::Image, &ShaderIR::DecodeImage},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},

View File

@@ -0,0 +1,120 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
case Tegra::Shader::ImageType::TextureBuffer:
return 1;
case Tegra::Shader::ImageType::Texture1DArray:
case Tegra::Shader::ImageType::Texture2D:
return 2;
case Tegra::Shader::ImageType::Texture2DArray:
case Tegra::Shader::ImageType::Texture3D:
return 3;
}
UNREACHABLE();
return 1;
}
} // Anonymous namespace
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
std::vector<Node> values;
constexpr std::size_t hardcoded_size{4};
for (std::size_t i = 0; i < hardcoded_size; ++i) {
values.push_back(GetRegister(instr.gpr0.Value() + i));
}
std::vector<Node> coords;
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
for (std::size_t i = 0; i < num_coords; ++i) {
coords.push_back(GetRegister(instr.gpr8.Value() + i));
}
const auto type{instr.sust.image_type};
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
: GetBindlessImage(instr.gpr39, type)};
MetaImage meta{image, values};
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
bb.push_back(store);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == offset; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{offset, next_index, type};
return *used_images.emplace(entry).first;
}
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
Tegra::Shader::ImageType type) {
const Node image_register{GetRegister(reg)};
const Node base_image{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf{std::get_if<CbufNode>(&*base_image)};
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
const auto cbuf_index{cbuf->GetIndex()};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{cbuf_index, cbuf_offset, next_index, type};
return *used_images.emplace(entry).first;
}
} // namespace VideoCommon::Shader

View File

@@ -245,6 +245,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::TLD: {
UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
if (instr.tld.nodep_flag) {
LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
}
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
@@ -575,6 +587,39 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
return values;
}
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
const bool is_array{instr.tld.is_array};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
u64 gpr8_cursor{instr.gpr8.Value()};
const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
std::vector<Node> coords;
coords.reserve(coord_count);
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(gpr8_cursor++));
}
u64 gpr20_cursor{instr.gpr20.Value()};
// const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;

View File

@@ -146,6 +146,8 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
ImageStore, /// (MetaImage, float[N] coords) -> void
Branch, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
PopFlowStack, /// () -> void
@@ -263,6 +265,48 @@ private:
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class Image {
public:
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
: offset{offset}, index{index}, type{type}, is_bindless{false} {}
explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
Tegra::Shader::ImageType type)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
is_bindless{true} {}
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
bool is_bindless)
: offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return index;
}
Tegra::Shader::ImageType GetType() const {
return type;
}
bool IsBindless() const {
return is_bindless;
}
bool operator<(const Image& rhs) const {
return std::tie(offset, index, type, is_bindless) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
}
private:
std::size_t offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
};
struct GlobalMemoryBase {
u32 cbuf_index{};
u32 cbuf_offset{};
@@ -289,8 +333,14 @@ struct MetaTexture {
u32 element{};
};
struct MetaImage {
const Image& image;
std::vector<Node> values;
};
/// Parameters that modify an operation but are not part of any particular operand
using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>;
using Meta =
std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {

View File

@@ -104,6 +104,10 @@ public:
return used_samplers;
}
const std::set<Image>& GetImages() const {
return used_images;
}
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
const {
return used_clip_distances;
@@ -154,6 +158,7 @@ private:
u32 DecodeConversion(NodeBlock& bb, u32 pc);
u32 DecodeMemory(NodeBlock& bb, u32 pc);
u32 DecodeTexture(NodeBlock& bb, u32 pc);
u32 DecodeImage(NodeBlock& bb, u32 pc);
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -254,6 +259,12 @@ private:
Tegra::Shader::TextureType type, bool is_array,
bool is_shadow);
/// Accesses an image.
const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
/// Access a bindless image sampler.
const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -277,6 +288,8 @@ private:
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -327,6 +340,7 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::set<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes

View File

@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
switch (texture_type) {
case Tegra::Texture::TextureType::Texture1D:
return SurfaceTarget::Texture1D;
case Tegra::Texture::TextureType::Texture1DBuffer:
return SurfaceTarget::TextureBuffer;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D;
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
bool SurfaceTargetIsLayered(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
return false;
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
bool SurfaceTargetIsArray(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
case SurfaceTarget::TextureCubemap:
@@ -304,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::Z32F;
case Tegra::Texture::TextureFormat::Z16:
return PixelFormat::Z16;
case Tegra::Texture::TextureFormat::Z24S8:
return PixelFormat::Z24S8;
case Tegra::Texture::TextureFormat::S8Z24:
return PixelFormat::S8Z24;
case Tegra::Texture::TextureFormat::ZF32_X24S8:
return PixelFormat::Z32FS8;
case Tegra::Texture::TextureFormat::DXT1:

View File

@@ -114,6 +114,7 @@ enum class SurfaceType {
enum class SurfaceTarget {
Texture1D,
TextureBuffer,
Texture2D,
Texture3D,
Texture1DArray,
@@ -122,71 +123,71 @@ enum class SurfaceTarget {
TextureCubeArray,
};
constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
1, // ABGR8U
1, // ABGR8S
1, // ABGR8UI
1, // B5G6R5U
1, // A2B10G10R10U
1, // A1B5G5R5U
1, // R8U
1, // R8UI
1, // RGBA16F
1, // RGBA16U
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
4, // DXT1
4, // DXT23
4, // DXT45
4, // DXN1
4, // DXN2UNORM
4, // DXN2SNORM
4, // BC7U
4, // BC6H_UF16
4, // BC6H_SF16
4, // ASTC_2D_4X4
1, // BGRA8
1, // RGBA32F
1, // RG32F
1, // R32F
1, // R16F
1, // R16U
1, // R16S
1, // R16UI
1, // R16I
1, // RG16
1, // RG16F
1, // RG16UI
1, // RG16I
1, // RG16S
1, // RGB32F
1, // RGBA8_SRGB
1, // RG8U
1, // RG8S
1, // RG32UI
1, // R32UI
4, // ASTC_2D_8X8
4, // ASTC_2D_8X5
4, // ASTC_2D_5X4
1, // BGRA8_SRGB
4, // DXT1_SRGB
4, // DXT23_SRGB
4, // DXT45_SRGB
4, // BC7U_SRGB
4, // ASTC_2D_4X4_SRGB
4, // ASTC_2D_8X8_SRGB
4, // ASTC_2D_8X5_SRGB
4, // ASTC_2D_5X4_SRGB
4, // ASTC_2D_5X5
4, // ASTC_2D_5X5_SRGB
4, // ASTC_2D_10X8
4, // ASTC_2D_10X8_SRGB
1, // Z32F
1, // Z16
1, // Z24S8
1, // S8Z24
1, // Z32FS8
constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // ABGR8U
0, // ABGR8S
0, // ABGR8UI
0, // B5G6R5U
0, // A2B10G10R10U
0, // A1B5G5R5U
0, // R8U
0, // R8UI
0, // RGBA16F
0, // RGBA16U
0, // RGBA16UI
0, // R11FG11FB10F
0, // RGBA32UI
2, // DXT1
2, // DXT23
2, // DXT45
2, // DXN1
2, // DXN2UNORM
2, // DXN2SNORM
2, // BC7U
2, // BC6H_UF16
2, // BC6H_SF16
2, // ASTC_2D_4X4
0, // BGRA8
0, // RGBA32F
0, // RG32F
0, // R32F
0, // R16F
0, // R16U
0, // R16S
0, // R16UI
0, // R16I
0, // RG16
0, // RG16F
0, // RG16UI
0, // RG16I
0, // RG16S
0, // RGB32F
0, // RGBA8_SRGB
0, // RG8U
0, // RG8S
0, // RG32UI
0, // R32UI
2, // ASTC_2D_8X8
2, // ASTC_2D_8X5
2, // ASTC_2D_5X4
0, // BGRA8_SRGB
2, // DXT1_SRGB
2, // DXT23_SRGB
2, // DXT45_SRGB
2, // BC7U_SRGB
2, // ASTC_2D_4X4_SRGB
2, // ASTC_2D_8X8_SRGB
2, // ASTC_2D_8X5_SRGB
2, // ASTC_2D_5X4_SRGB
2, // ASTC_2D_5X5
2, // ASTC_2D_5X5_SRGB
2, // ASTC_2D_10X8
2, // ASTC_2D_10X8_SRGB
0, // Z32F
0, // Z16
0, // Z24S8
0, // S8Z24
0, // Z32FS8
}};
/**
@@ -195,12 +196,14 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
* compressed image. This is used for maintaining proper surface sizes for compressed
* texture formats.
*/
static constexpr u32 GetCompressionFactor(PixelFormat format) {
if (format == PixelFormat::Invalid)
return 0;
inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
DEBUG_ASSERT(format != PixelFormat::Invalid);
DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
return compression_factor_shift_table[static_cast<std::size_t>(format)];
}
ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
return compression_factor_table[static_cast<std::size_t>(format)];
inline constexpr u32 GetCompressionFactor(PixelFormat format) {
return 1U << GetCompressionFactorShift(format);
}
constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
@@ -436,6 +439,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
return GetFormatBpp(pixel_format) / CHAR_BIT;
}
enum class SurfaceCompression {
None, // Not compressed
Compressed, // Texture is compressed
Converted, // Texture is converted before upload or after download
Rearranged, // Texture is swizzled before upload or after download
};
constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
SurfaceCompression::None, // ABGR8U
SurfaceCompression::None, // ABGR8S
SurfaceCompression::None, // ABGR8UI
SurfaceCompression::None, // B5G6R5U
SurfaceCompression::None, // A2B10G10R10U
SurfaceCompression::None, // A1B5G5R5U
SurfaceCompression::None, // R8U
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI
SurfaceCompression::Compressed, // DXT1
SurfaceCompression::Compressed, // DXT23
SurfaceCompression::Compressed, // DXT45
SurfaceCompression::Compressed, // DXN1
SurfaceCompression::Compressed, // DXN2UNORM
SurfaceCompression::Compressed, // DXN2SNORM
SurfaceCompression::Compressed, // BC7U
SurfaceCompression::Compressed, // BC6H_UF16
SurfaceCompression::Compressed, // BC6H_SF16
SurfaceCompression::Converted, // ASTC_2D_4X4
SurfaceCompression::None, // BGRA8
SurfaceCompression::None, // RGBA32F
SurfaceCompression::None, // RG32F
SurfaceCompression::None, // R32F
SurfaceCompression::None, // R16F
SurfaceCompression::None, // R16U
SurfaceCompression::None, // R16S
SurfaceCompression::None, // R16UI
SurfaceCompression::None, // R16I
SurfaceCompression::None, // RG16
SurfaceCompression::None, // RG16F
SurfaceCompression::None, // RG16UI
SurfaceCompression::None, // RG16I
SurfaceCompression::None, // RG16S
SurfaceCompression::None, // RGB32F
SurfaceCompression::None, // RGBA8_SRGB
SurfaceCompression::None, // RG8U
SurfaceCompression::None, // RG8S
SurfaceCompression::None, // RG32UI
SurfaceCompression::None, // R32UI
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5
SurfaceCompression::Converted, // ASTC_2D_5X4
SurfaceCompression::None, // BGRA8_SRGB
SurfaceCompression::Compressed, // DXT1_SRGB
SurfaceCompression::Compressed, // DXT23_SRGB
SurfaceCompression::Compressed, // DXT45_SRGB
SurfaceCompression::Compressed, // BC7U_SRGB
SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_5X5
SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_10X8
SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
SurfaceCompression::None, // Z32F
SurfaceCompression::None, // Z16
SurfaceCompression::None, // Z24S8
SurfaceCompression::Rearranged, // S8Z24
SurfaceCompression::None, // Z32FS8
}};
constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
if (format == PixelFormat::Invalid) {
return SurfaceCompression::None;
}
DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
return compression_type_table[static_cast<std::size_t>(format)];
}
SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
bool SurfaceTargetIsLayered(SurfaceTarget target);

View File

@@ -1,386 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/assert.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/surface.h"
#include "video_core/texture_cache.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::ComponentTypeFromDepthFormat;
using VideoCore::Surface::ComponentTypeFromRenderTarget;
using VideoCore::Surface::ComponentTypeFromTexture;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::PixelFormatFromTextureFormat;
using VideoCore::Surface::SurfaceTargetFromTextureType;
constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
}
SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
const Tegra::Texture::FullTextureInfo& config) {
SurfaceParams params;
params.is_tiled = config.tic.IsTiled();
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
params.pixel_format =
PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.depth = config.tic.Depth();
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
params.unaligned_height = config.tic.Height();
params.num_levels = config.tic.max_mip_level + 1;
params.CalculateCachedValues();
return params;
}
SurfaceParams SurfaceParams::CreateForDepthBuffer(
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params;
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << std::min(block_width, 5U);
params.block_height = 1 << std::min(block_height, 5U);
params.block_depth = 1 << std::min(block_depth, 5U);
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;
params.CalculateCachedValues();
return params;
}
SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << config.memory_layout.block_width;
params.block_height = 1 << config.memory_layout.block_height;
params.block_depth = 1 << config.memory_layout.block_depth;
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
if (params.is_tiled) {
params.width = config.width;
} else {
const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
params.pitch = config.width;
params.width = params.pitch / bpp;
}
params.height = config.height;
params.depth = 1;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.num_levels = 1;
params.CalculateCachedValues();
return params;
}
SurfaceParams SurfaceParams::CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
SurfaceParams params{};
params.is_tiled = !config.linear;
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;
params.CalculateCachedValues();
return params;
}
u32 SurfaceParams::GetMipWidth(u32 level) const {
return std::max(1U, width >> level);
}
u32 SurfaceParams::GetMipHeight(u32 level) const {
return std::max(1U, height >> level);
}
u32 SurfaceParams::GetMipDepth(u32 level) const {
return IsLayered() ? depth : std::max(1U, depth >> level);
}
bool SurfaceParams::IsLayered() const {
switch (target) {
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
case SurfaceTarget::TextureCubemap:
return true;
default:
return false;
}
}
u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
// Auto block resizing algorithm from:
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
if (level == 0) {
return block_height;
}
const u32 height{GetMipHeight(level)};
const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
u32 block_height = 16;
while (block_height > 1 && blocks_in_y <= block_height * 4) {
block_height >>= 1;
}
return block_height;
}
u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
if (level == 0)
return block_depth;
if (target != SurfaceTarget::Texture3D)
return 1;
const u32 depth{GetMipDepth(level)};
u32 block_depth = 32;
while (block_depth > 1 && depth * 2 <= block_depth) {
block_depth >>= 1;
}
if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
return 16;
}
return block_depth;
}
std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
}
return offset;
}
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetInnerMipmapMemorySize(i, true, false, false);
}
return offset;
}
std::size_t SurfaceParams::GetGuestLayerSize() const {
return GetInnerMemorySize(false, true, false);
}
std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
}
bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
view_params.component_type, view_params.type)) {
return false;
}
const SurfaceTarget view_target{view_params.target};
if (view_target == target) {
return true;
}
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
return false;
case SurfaceTarget::Texture1DArray:
return view_target == SurfaceTarget::Texture1D;
case SurfaceTarget::Texture2DArray:
return view_target == SurfaceTarget::Texture2D;
case SurfaceTarget::TextureCubemap:
return view_target == SurfaceTarget::Texture2D ||
view_target == SurfaceTarget::Texture2DArray;
case SurfaceTarget::TextureCubeArray:
return view_target == SurfaceTarget::Texture2D ||
view_target == SurfaceTarget::Texture2DArray ||
view_target == SurfaceTarget::TextureCubemap;
default:
UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
return false;
}
}
bool SurfaceParams::IsPixelFormatZeta() const {
return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
}
void SurfaceParams::CalculateCachedValues() {
guest_size_in_bytes = GetInnerMemorySize(false, false, false);
// ASTC is uncompressed in software, in emulated as RGBA8
if (IsPixelFormatASTC(pixel_format)) {
host_size_in_bytes = width * height * depth * 4;
} else {
host_size_in_bytes = GetInnerMemorySize(true, false, false);
}
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
num_layers = 1;
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
num_layers = depth;
break;
default:
UNREACHABLE();
}
}
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
bool uncompressed) const {
const bool tiled{as_host_size ? false : is_tiled};
const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
const u32 depth{layer_only ? 1U : GetMipDepth(level)};
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
}
std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
bool uncompressed) const {
std::size_t size = 0;
for (u32 level = 0; level < num_levels; ++level) {
size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
}
if (!as_host_size && is_tiled) {
size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
}
return size;
}
std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
std::map<u64, std::pair<u32, u32>> view_offset_map;
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D: {
constexpr u32 layer = 0;
for (u32 level = 0; level < num_levels; ++level) {
const std::size_t offset{GetGuestMipmapLevelOffset(level)};
view_offset_map.insert({offset, {layer, level}});
}
break;
}
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray: {
const std::size_t layer_size{GetGuestLayerSize()};
for (u32 level = 0; level < num_levels; ++level) {
const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
for (u32 layer = 0; layer < num_layers; ++layer) {
const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
const std::size_t offset{level_offset + layer_offset};
view_offset_map.insert({offset, {layer, level}});
}
}
break;
}
default:
UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
}
return view_offset_map;
}
bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
IsInBounds(view_params, layer, level);
}
bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
}
bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
if (view_params.target != SurfaceTarget::Texture3D) {
return true;
}
return view_params.depth == GetMipDepth(level);
}
bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
return layer + view_params.num_layers <= num_layers &&
level + view_params.num_levels <= num_levels;
}
std::size_t HasheableSurfaceParams::Hash() const {
return static_cast<std::size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
}
bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
height, depth, pitch, unaligned_height, num_levels, pixel_format,
component_type, type, target) ==
std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
rhs.type, rhs.target);
}
std::size_t ViewKey::Hash() const {
return static_cast<std::size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
}
bool ViewKey::operator==(const ViewKey& rhs) const {
return std::tie(base_layer, num_layers, base_level, num_levels) ==
std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
}
} // namespace VideoCommon

View File

@@ -1,586 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <list>
#include <memory>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
namespace Core {
class System;
}
namespace Tegra::Texture {
struct FullTextureInfo;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace VideoCommon {
class HasheableSurfaceParams {
public:
std::size_t Hash() const;
bool operator==(const HasheableSurfaceParams& rhs) const;
protected:
// Avoid creation outside of a managed environment.
HasheableSurfaceParams() = default;
bool is_tiled;
u32 block_width;
u32 block_height;
u32 block_depth;
u32 tile_width_spacing;
u32 width;
u32 height;
u32 depth;
u32 pitch;
u32 unaligned_height;
u32 num_levels;
VideoCore::Surface::PixelFormat pixel_format;
VideoCore::Surface::ComponentType component_type;
VideoCore::Surface::SurfaceType type;
VideoCore::Surface::SurfaceTarget target;
};
class SurfaceParams final : public HasheableSurfaceParams {
public:
/// Creates SurfaceCachedParams from a texture configuration.
static SurfaceParams CreateForTexture(Core::System& system,
const Tegra::Texture::FullTextureInfo& config);
/// Creates SurfaceCachedParams for a depth buffer configuration.
static SurfaceParams CreateForDepthBuffer(
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
/// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config);
bool IsTiled() const {
return is_tiled;
}
u32 GetBlockWidth() const {
return block_width;
}
u32 GetTileWidthSpacing() const {
return tile_width_spacing;
}
u32 GetWidth() const {
return width;
}
u32 GetHeight() const {
return height;
}
u32 GetDepth() const {
return depth;
}
u32 GetPitch() const {
return pitch;
}
u32 GetNumLevels() const {
return num_levels;
}
VideoCore::Surface::PixelFormat GetPixelFormat() const {
return pixel_format;
}
VideoCore::Surface::ComponentType GetComponentType() const {
return component_type;
}
VideoCore::Surface::SurfaceTarget GetTarget() const {
return target;
}
VideoCore::Surface::SurfaceType GetType() const {
return type;
}
std::size_t GetGuestSizeInBytes() const {
return guest_size_in_bytes;
}
std::size_t GetHostSizeInBytes() const {
return host_size_in_bytes;
}
u32 GetNumLayers() const {
return num_layers;
}
/// Returns the width of a given mipmap level.
u32 GetMipWidth(u32 level) const;
/// Returns the height of a given mipmap level.
u32 GetMipHeight(u32 level) const;
/// Returns the depth of a given mipmap level.
u32 GetMipDepth(u32 level) const;
/// Returns true if these parameters are from a layered surface.
bool IsLayered() const;
/// Returns the block height of a given mipmap level.
u32 GetMipBlockHeight(u32 level) const;
/// Returns the block depth of a given mipmap level.
u32 GetMipBlockDepth(u32 level) const;
/// Returns the offset in bytes in guest memory of a given mipmap level.
std::size_t GetGuestMipmapLevelOffset(u32 level) const;
/// Returns the offset in bytes in host memory (linear) of a given mipmap level.
std::size_t GetHostMipmapLevelOffset(u32 level) const;
/// Returns the size of a layer in bytes in guest memory.
std::size_t GetGuestLayerSize() const;
/// Returns the size of a layer in bytes in host memory for a given mipmap level.
std::size_t GetHostLayerSize(u32 level) const;
/// Returns true if another surface can be familiar with this. This is a loosely defined term
/// that reflects the possibility of these two surface parameters potentially being part of a
/// bigger superset.
bool IsFamiliar(const SurfaceParams& view_params) const;
/// Returns true if the pixel format is a depth and/or stencil format.
bool IsPixelFormatZeta() const;
/// Creates a map that redirects an address difference to a layer and mipmap level.
std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
/// Returns true if the passed surface view parameters is equal or a valid subset of this.
bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
private:
/// Calculates values that can be deduced from HasheableSurfaceParams.
void CalculateCachedValues();
/// Returns the size of a given mipmap level.
std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
bool uncompressed) const;
/// Returns the size of all mipmap levels and aligns as needed.
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
/// Returns true if the passed view width and height match the size of this params in a given
/// mipmap level.
bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
/// Returns true if the passed view depth match the size of this params in a given mipmap level.
bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
/// Returns true if the passed view layers and mipmap levels are in bounds.
bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
std::size_t guest_size_in_bytes;
std::size_t host_size_in_bytes;
u32 num_layers;
};
struct ViewKey {
std::size_t Hash() const;
bool operator==(const ViewKey& rhs) const;
u32 base_layer{};
u32 num_layers{};
u32 base_level{};
u32 num_levels{};
};
} // namespace VideoCommon
namespace std {
template <>
struct hash<VideoCommon::SurfaceParams> {
std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<VideoCommon::ViewKey> {
std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std
namespace VideoCommon {
template <typename TView, typename TExecutionContext>
class SurfaceBase {
static_assert(std::is_trivially_copyable_v<TExecutionContext>);
public:
virtual void LoadBuffer() = 0;
virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
// It can't be a view if it's in a prior address.
return {};
}
const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
const auto it{view_offset_map.find(relative_offset)};
if (it == view_offset_map.end()) {
// Couldn't find an aligned view.
return {};
}
const auto [layer, level] = it->second;
if (!params.IsViewValid(view_params, layer, level)) {
return {};
}
return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
}
VAddr GetCpuAddr() const {
ASSERT(is_registered);
return cpu_addr;
}
u8* GetHostPtr() const {
ASSERT(is_registered);
return host_ptr;
}
CacheAddr GetCacheAddr() const {
ASSERT(is_registered);
return cache_addr;
}
std::size_t GetSizeInBytes() const {
return params.GetGuestSizeInBytes();
}
void MarkAsModified(bool is_modified_) {
is_modified = is_modified_;
}
const SurfaceParams& GetSurfaceParams() const {
return params;
}
TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
TView* view{TryGetView(view_addr, view_params)};
ASSERT(view != nullptr);
return view;
}
void Register(VAddr cpu_addr_, u8* host_ptr_) {
ASSERT(!is_registered);
is_registered = true;
cpu_addr = cpu_addr_;
host_ptr = host_ptr_;
cache_addr = ToCacheAddr(host_ptr_);
}
void Register(VAddr cpu_addr_) {
Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
}
void Unregister() {
ASSERT(is_registered);
is_registered = false;
}
bool IsRegistered() const {
return is_registered;
}
protected:
explicit SurfaceBase(const SurfaceParams& params)
: params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
~SurfaceBase() = default;
virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
bool IsModified() const {
return is_modified;
}
const SurfaceParams params;
private:
TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
const ViewKey key{base_layer, num_layers, base_level, num_levels};
const auto [entry, is_cache_miss] = views.try_emplace(key);
auto& view{entry->second};
if (is_cache_miss) {
view = CreateView(key);
}
return view.get();
}
const std::map<u64, std::pair<u32, u32>> view_offset_map;
VAddr cpu_addr{};
u8* host_ptr{};
CacheAddr cache_addr{};
bool is_modified{};
bool is_registered{};
std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
};
template <typename TSurface, typename TView, typename TExecutionContext>
class TextureCache {
static_assert(std::is_trivially_copyable_v<TExecutionContext>);
using ResultType = std::tuple<TView*, TExecutionContext>;
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
using IntervalType = typename IntervalMap::interval_type;
public:
void InvalidateRegion(CacheAddr addr, std::size_t size) {
for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
if (!surface->IsRegistered()) {
// Skip duplicates
continue;
}
Unregister(surface);
}
}
ResultType GetTextureSurface(TExecutionContext exctx,
const Tegra::Texture::FullTextureInfo& config) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
if (!cpu_addr) {
return {{}, exctx};
}
const auto params{SurfaceParams::CreateForTexture(system, config)};
return GetSurfaceView(exctx, *cpu_addr, params, true);
}
ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
const auto& regs{system.GPU().Maxwell3D().regs};
if (!regs.zeta.Address() || !regs.zeta_enable) {
return {{}, exctx};
}
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
if (!cpu_addr) {
return {{}, exctx};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
}
ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
bool preserve_contents) {
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
const auto& regs{system.GPU().Maxwell3D().regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
return {{}, exctx};
}
auto& memory_manager{system.GPU().MemoryManager()};
const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
const auto cpu_addr{memory_manager.GpuToCpuAddress(
config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
if (!cpu_addr) {
return {{}, exctx};
}
return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents);
}
ResultType GetFermiSurface(TExecutionContext exctx,
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
ASSERT(cpu_addr);
return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
true);
}
TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {}
~TextureCache() = default;
virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
const SurfaceParams& params, bool preserve_contents,
const std::vector<TSurface*>& overlaps) = 0;
virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
surface->Register(cpu_addr, host_ptr);
registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
}
void Unregister(TSurface* surface) {
registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
surface->Unregister();
}
TSurface* GetUncachedSurface(const SurfaceParams& params) {
if (TSurface* surface = TryGetReservedSurface(params); surface)
return surface;
// No reserved surface available, create a new one and reserve it
auto new_surface{CreateSurface(params)};
TSurface* surface{new_surface.get()};
ReserveSurface(params, std::move(new_surface));
return surface;
}
Core::System& system;
private:
ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
bool preserve_contents) {
const auto host_ptr{Memory::GetPointer(cpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
if (overlaps.empty()) {
return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
}
if (overlaps.size() == 1) {
if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
return {view, exctx};
}
TView* fast_view;
std::tie(fast_view, exctx) =
TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
for (TSurface* surface : overlaps) {
if (!fast_view) {
// Flush even when we don't care about the contents, to preserve memory not written
// by the new surface.
exctx = surface->FlushBuffer(exctx);
}
Unregister(surface);
}
if (fast_view) {
return {fast_view, exctx};
}
return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
}
ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
const SurfaceParams& params, bool preserve_contents) {
TSurface* new_surface{GetUncachedSurface(params)};
Register(new_surface, cpu_addr, host_ptr);
if (preserve_contents) {
exctx = LoadSurface(exctx, new_surface);
}
return {new_surface->GetView(cpu_addr, params), exctx};
}
TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
surface->LoadBuffer();
exctx = surface->UploadTexture(exctx);
surface->MarkAsModified(false);
return exctx;
}
std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
if (size == 0) {
return {};
}
const IntervalType interval{cache_addr, cache_addr + size};
std::vector<TSurface*> surfaces;
for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
surfaces.push_back(*pair.second.begin());
}
return surfaces;
}
void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
surface_reserve[params].push_back(std::move(surface));
}
TSurface* TryGetReservedSurface(const SurfaceParams& params) {
auto search{surface_reserve.find(params)};
if (search == surface_reserve.end()) {
return {};
}
for (auto& surface : search->second) {
if (!surface->IsRegistered()) {
return surface.get();
}
}
return {};
}
IntervalType GetSurfaceInterval(TSurface* surface) const {
return IntervalType::right_open(surface->GetCacheAddr(),
surface->GetCacheAddr() + surface->GetSizeInBytes());
}
VideoCore::RasterizerInterface& rasterizer;
IntervalMap registered_surfaces;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and
/// destroyed when used with different surface parameters.
std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
};
} // namespace VideoCommon

View File

@@ -0,0 +1,36 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace VideoCommon {
struct CopyParams {
constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
u32 depth)
: source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
: source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
dest_level{level}, width{width}, height{height}, depth{depth} {}
u32 source_x;
u32 source_y;
u32 source_z;
u32 dest_x;
u32 dest_y;
u32 dest_z;
u32 source_level;
u32 dest_level;
u32 width;
u32 height;
u32 depth;
};
} // namespace VideoCommon

View File

@@ -0,0 +1,300 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/common_types.h"
#include "common/microprofile.h"
#include "video_core/memory_manager.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/textures/convert.h"
namespace VideoCommon {
MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
using Tegra::Texture::ConvertFromGuestToHost;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::SurfaceCompression;
StagingCache::StagingCache() = default;
StagingCache::~StagingCache() = default;
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
: params{params}, mipmap_sizes(params.num_levels),
mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{
params.GetHostSizeInBytes()} {
std::size_t offset = 0;
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
mipmap_sizes[level] = mipmap_size;
mipmap_offsets[level] = offset;
offset += mipmap_size;
}
layer_size = offset;
if (params.is_layered) {
if (params.is_tiled) {
layer_size =
SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
}
guest_memory_size = layer_size * params.depth;
} else {
guest_memory_size = layer_size;
}
}
MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
const u32 src_bpp{params.GetBytesPerPixel()};
const u32 dst_bpp{rhs.GetBytesPerPixel()};
const bool ib1 = params.IsBuffer();
const bool ib2 = rhs.IsBuffer();
if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
const bool cb1 = params.IsCompressed();
const bool cb2 = rhs.IsCompressed();
if (cb1 == cb2) {
return MatchTopologyResult::FullMatch;
}
return MatchTopologyResult::CompressUnmatch;
}
return MatchTopologyResult::None;
}
MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
// Buffer surface Check
if (params.IsBuffer()) {
const std::size_t wd1 = params.width * params.GetBytesPerPixel();
const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
if (wd1 == wd2) {
return MatchStructureResult::FullMatch;
}
return MatchStructureResult::None;
}
// Linear Surface check
if (!params.is_tiled) {
if (std::tie(params.width, params.height, params.pitch) ==
std::tie(rhs.width, rhs.height, rhs.pitch)) {
return MatchStructureResult::FullMatch;
}
return MatchStructureResult::None;
}
// Tiled Surface check
if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
params.tile_width_spacing, params.num_levels) ==
std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
rhs.tile_width_spacing, rhs.num_levels)) {
if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
return MatchStructureResult::FullMatch;
}
const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
rhs.pixel_format);
const u32 hs =
SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
const u32 w1 = params.GetBlockAlignedWidth();
if (std::tie(w1, params.height) == std::tie(ws, hs)) {
return MatchStructureResult::SemiMatch;
}
}
return MatchStructureResult::None;
}
std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
const GPUVAddr candidate_gpu_addr) const {
if (gpu_addr == candidate_gpu_addr) {
return {{0, 0}};
}
if (candidate_gpu_addr < gpu_addr) {
return {};
}
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
if (mipmap_it == mipmap_offsets.end()) {
return {};
}
const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
return std::make_pair(layer, level);
}
std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
const u32 layers{params.depth};
const u32 mipmaps{params.num_levels};
std::vector<CopyParams> result;
result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
for (u32 layer = 0; layer < layers; layer++) {
for (u32 level = 0; level < mipmaps; level++) {
const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
result.emplace_back(width, height, layer, level);
}
}
return result;
}
std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
const u32 mipmaps{params.num_levels};
std::vector<CopyParams> result;
result.reserve(mipmaps);
for (u32 level = 0; level < mipmaps; level++) {
const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
result.emplace_back(width, height, depth, level);
}
return result;
}
void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
u8* buffer, u32 level) {
const u32 width{params.GetMipWidth(level)};
const u32 height{params.GetMipHeight(level)};
const u32 block_height{params.GetMipBlockHeight(level)};
const u32 block_depth{params.GetMipBlockDepth(level)};
std::size_t guest_offset{mipmap_offsets[level]};
if (params.is_layered) {
std::size_t host_offset{0};
const std::size_t guest_stride = layer_size;
const std::size_t host_stride = params.GetHostLayerSize(level);
for (u32 layer = 0; layer < params.depth; ++layer) {
MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
guest_offset += guest_stride;
host_offset += host_stride;
}
} else {
MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
params.GetMipDepth(level), params.tile_width_spacing, buffer,
memory + guest_offset);
}
}
void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
StagingCache& staging_cache) {
MICROPROFILE_SCOPE(GPU_Load_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
params.block_width, static_cast<u32>(params.target));
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
} else {
ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
const u32 bpp{params.GetBytesPerPixel()};
const u32 block_width{params.GetDefaultBlockWidth()};
const u32 block_height{params.GetDefaultBlockHeight()};
const u32 width{(params.width + block_width - 1) / block_width};
const u32 height{(params.height + block_height - 1) / block_height};
const u32 copy_size{width * bpp};
if (params.pitch == copy_size) {
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
} else {
const u8* start{host_ptr};
u8* write_to{staging_buffer.data()};
for (u32 h = height; h > 0; --h) {
std::memcpy(write_to, start, copy_size);
start += params.pitch;
write_to += copy_size;
}
}
}
auto compression_type = params.GetCompressionType();
if (compression_type == SurfaceCompression::None ||
compression_type == SurfaceCompression::Compressed)
return;
for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
const u32 level = level_up - 1;
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
? in_host_offset
: params.GetConvertedMipmapOffset(level);
u8* in_buffer = staging_buffer.data() + in_host_offset;
u8* out_buffer = staging_buffer.data() + out_host_offset;
ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
params.GetMipWidth(level), params.GetMipHeight(level),
params.GetMipDepth(level), true, true);
}
}
void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
StagingCache& staging_cache) {
MICROPROFILE_SCOPE(GPU_Flush_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
}
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
} else {
ASSERT(params.target == SurfaceTarget::Texture2D);
ASSERT(params.num_levels == 1);
const u32 bpp{params.GetBytesPerPixel()};
const u32 copy_size{params.width * bpp};
if (params.pitch == copy_size) {
std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
} else {
u8* start{host_ptr};
const u8* read_to{staging_buffer.data()};
for (u32 h = params.height; h > 0; --h) {
std::memcpy(start, read_to, copy_size);
start += params.pitch;
read_to += copy_size;
}
}
}
if (!is_continuous) {
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
}
} // namespace VideoCommon

View File

@@ -0,0 +1,317 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
#include "common/binary_find.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/morton.h"
#include "video_core/texture_cache/copy_params.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/texture_cache/surface_view.h"
namespace Tegra {
class MemoryManager;
}
namespace VideoCommon {
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::SurfaceTarget;
enum class MatchStructureResult : u32 {
FullMatch = 0,
SemiMatch = 1,
None = 2,
};
enum class MatchTopologyResult : u32 {
FullMatch = 0,
CompressUnmatch = 1,
None = 2,
};
class StagingCache {
public:
explicit StagingCache();
~StagingCache();
std::vector<u8>& GetBuffer(std::size_t index) {
return staging_buffer[index];
}
const std::vector<u8>& GetBuffer(std::size_t index) const {
return staging_buffer[index];
}
void SetSize(std::size_t size) {
staging_buffer.resize(size);
}
private:
std::vector<std::vector<u8>> staging_buffer;
};
class SurfaceBaseImpl {
public:
void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
bool Overlaps(const CacheAddr start, const CacheAddr end) const {
return (cache_addr < end) && (cache_addr_end > start);
}
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
return (gpu_addr <= other_start && other_end <= gpu_addr_end);
}
// Use only when recycling a surface
void SetGpuAddr(const GPUVAddr new_addr) {
gpu_addr = new_addr;
}
VAddr GetCpuAddr() const {
return cpu_addr;
}
void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr;
}
CacheAddr GetCacheAddr() const {
return cache_addr;
}
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + guest_memory_size;
}
const SurfaceParams& GetSurfaceParams() const {
return params;
}
std::size_t GetSizeInBytes() const {
return guest_memory_size;
}
std::size_t GetHostSizeInBytes() const {
return host_memory_size;
}
std::size_t GetMipmapSize(const u32 level) const {
return mipmap_sizes[level];
}
void MarkAsContinuous(const bool is_continuous) {
this->is_continuous = is_continuous;
}
bool IsContinuous() const {
return is_continuous;
}
bool IsLinear() const {
return !params.is_tiled;
}
bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
return params.pixel_format == pixel_format;
}
VideoCore::Surface::PixelFormat GetFormat() const {
return params.pixel_format;
}
bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
return params.target == target;
}
MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
return std::tie(gpu_addr, params.target, params.num_levels) ==
std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
}
std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
}
protected:
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
~SurfaceBaseImpl() = default;
virtual void DecorateSurfaceName() = 0;
const SurfaceParams params;
std::size_t layer_size;
std::size_t guest_memory_size;
const std::size_t host_memory_size;
GPUVAddr gpu_addr{};
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
VAddr cpu_addr{};
bool is_continuous{};
std::vector<std::size_t> mipmap_sizes;
std::vector<std::size_t> mipmap_offsets;
private:
void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
u32 level);
std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
};
template <typename TView>
class SurfaceBase : public SurfaceBaseImpl {
public:
virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
void MarkAsModified(const bool is_modified_, const u64 tick) {
is_modified = is_modified_ || is_target;
modification_tick = tick;
}
void MarkAsRenderTarget(const bool is_target) {
this->is_target = is_target;
}
void MarkAsPicked(const bool is_picked) {
this->is_picked = is_picked;
}
bool IsModified() const {
return is_modified;
}
bool IsProtected() const {
// Only 3D Slices are to be protected
return is_target && params.block_depth > 0;
}
bool IsRenderTarget() const {
return is_target;
}
bool IsRegistered() const {
return is_registered;
}
bool IsPicked() const {
return is_picked;
}
void MarkAsRegistered(bool is_reg) {
is_registered = is_reg;
}
u64 GetModificationTick() const {
return modification_tick;
}
TView EmplaceOverview(const SurfaceParams& overview_params) {
const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
}
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
const GPUVAddr view_addr,
const std::size_t candidate_size, const u32 mipmap,
const u32 layer) {
const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
if (!layer_mipmap) {
return {};
}
const u32 end_layer{layer_mipmap->first};
const u32 end_mipmap{layer_mipmap->second};
if (layer != end_layer) {
if (mipmap == 0 && end_mipmap == 0) {
return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1));
}
return {};
} else {
return GetView(
ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1));
}
}
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
const std::size_t candidate_size) {
if (params.target == SurfaceTarget::Texture3D ||
(params.num_levels == 1 && !params.is_layered) ||
view_params.target == SurfaceTarget::Texture3D) {
return {};
}
const auto layer_mipmap{GetLayerMipmap(view_addr)};
if (!layer_mipmap) {
return {};
}
const u32 layer{layer_mipmap->first};
const u32 mipmap{layer_mipmap->second};
if (GetMipmapSize(mipmap) != candidate_size) {
return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
}
return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
}
TView GetMainView() const {
return main_view;
}
protected:
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
: SurfaceBaseImpl(gpu_addr, params) {}
~SurfaceBase() = default;
virtual TView CreateView(const ViewParams& view_key) = 0;
TView main_view;
std::unordered_map<ViewParams, TView> views;
private:
TView GetView(const ViewParams& key) {
const auto [entry, is_cache_miss] = views.try_emplace(key);
auto& view{entry->second};
if (is_cache_miss) {
view = CreateView(key);
}
return view;
}
bool is_modified{};
bool is_target{};
bool is_registered{};
bool is_picked{};
u64 modification_tick{};
};
} // namespace VideoCommon

View File

@@ -0,0 +1,334 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <map>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/surface_params.h"
namespace VideoCommon {
using VideoCore::Surface::ComponentTypeFromDepthFormat;
using VideoCore::Surface::ComponentTypeFromRenderTarget;
using VideoCore::Surface::ComponentTypeFromTexture;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::PixelFormatFromTextureFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceTargetFromTextureType;
using VideoCore::Surface::SurfaceType;
SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
switch (type) {
case Tegra::Shader::TextureType::Texture1D: {
if (is_array)
return SurfaceTarget::Texture1DArray;
else
return SurfaceTarget::Texture1D;
}
case Tegra::Shader::TextureType::Texture2D: {
if (is_array)
return SurfaceTarget::Texture2DArray;
else
return SurfaceTarget::Texture2D;
}
case Tegra::Shader::TextureType::Texture3D: {
ASSERT(!is_array);
return SurfaceTarget::Texture3D;
}
case Tegra::Shader::TextureType::TextureCube: {
if (is_array)
return SurfaceTarget::TextureCubeArray;
else
return SurfaceTarget::TextureCubemap;
}
default: {
UNREACHABLE();
return SurfaceTarget::Texture2D;
}
}
}
namespace {
constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
}
} // Anonymous namespace
SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
const Tegra::Texture::FullTextureInfo& config,
const VideoCommon::Shader::Sampler& entry) {
SurfaceParams params;
params.is_tiled = config.tic.IsTiled();
params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
params.srgb_conversion);
params.type = GetFormatType(params.pixel_format);
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
case PixelFormat::R16U:
case PixelFormat::R16F: {
params.pixel_format = PixelFormat::Z16;
break;
}
case PixelFormat::R32F: {
params.pixel_format = PixelFormat::Z32F;
break;
}
default: {
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
static_cast<u32>(params.pixel_format));
}
}
params.type = GetFormatType(params.pixel_format);
}
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
// TODO: on 1DBuffer we should use the tic info.
if (!config.tic.IsBuffer()) {
params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray());
params.width = config.tic.Width();
params.height = config.tic.Height();
params.depth = config.tic.Depth();
params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = config.tic.max_mip_level + 1;
params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
params.is_layered = params.IsLayered();
} else {
params.target = SurfaceTarget::TextureBuffer;
params.width = config.tic.Width();
params.pitch = params.width * params.GetBytesPerPixel();
params.height = 1;
params.depth = 1;
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = false;
}
return params;
}
SurfaceParams SurfaceParams::CreateForDepthBuffer(
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params;
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.srgb_conversion = false;
params.block_width = std::min(block_width, 5U);
params.block_height = std::min(block_height, 5U);
params.block_depth = std::min(block_depth, 5U);
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.width = zeta_width;
params.height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.pitch = 0;
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = false;
return params;
}
SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
params.block_width = config.memory_layout.block_width;
params.block_height = config.memory_layout.block_height;
params.block_depth = config.memory_layout.block_depth;
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
if (params.is_tiled) {
params.pitch = 0;
params.width = config.width;
} else {
const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
params.pitch = config.width;
params.width = params.pitch / bpp;
}
params.height = config.height;
params.depth = 1;
params.target = SurfaceTarget::Texture2D;
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = false;
return params;
}
SurfaceParams SurfaceParams::CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
SurfaceParams params{};
params.is_tiled = !config.linear;
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
params.height = config.height;
params.pitch = config.pitch;
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = params.IsLayered();
return params;
}
bool SurfaceParams::IsLayered() const {
switch (target) {
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
return true;
default:
return false;
}
}
// Auto block resizing algorithm from:
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
if (level == 0) {
return this->block_height;
}
const u32 height_new{GetMipHeight(level)};
const u32 default_block_height{GetDefaultBlockHeight()};
const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
return std::clamp(block_height_new, 3U, 7U) - 3U;
}
u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
if (level == 0) {
return this->block_depth;
}
if (is_layered) {
return 0;
}
const u32 depth_new{GetMipDepth(level)};
const u32 block_depth_new = Common::Log2Ceil32(depth_new);
if (block_depth_new > 4) {
return 5 - (GetMipBlockHeight(level) >= 2);
}
return block_depth_new;
}
std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetInnerMipmapMemorySize(i, false, false);
}
return offset;
}
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
}
return offset;
}
std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetConvertedMipmapSize(i);
}
return offset;
}
std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
constexpr std::size_t rgba8_bpp = 4ULL;
const std::size_t width_t = GetMipWidth(level);
const std::size_t height_t = GetMipHeight(level);
const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
return width_t * height_t * depth_t * rgba8_bpp;
}
std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
std::size_t size = 0;
for (u32 level = 0; level < num_levels; ++level) {
size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
}
if (is_tiled && is_layered) {
return Common::AlignBits(size,
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
}
return size;
}
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
bool uncompressed) const {
const bool tiled{as_host_size ? false : is_tiled};
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
const u32 depth{is_layered ? 1U : GetMipDepth(level)};
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth,
GetMipBlockHeight(level), GetMipBlockDepth(level));
}
bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
height, depth, pitch, num_levels, pixel_format, component_type, type, target) ==
std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target);
}
std::string SurfaceParams::TargetName() const {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::TextureBuffer:
return "TexBuffer";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
return "3D";
case SurfaceTarget::Texture1DArray:
return "1DArray";
case SurfaceTarget::Texture2DArray:
return "2DArray";
case SurfaceTarget::TextureCubemap:
return "Cube";
case SurfaceTarget::TextureCubeArray:
return "CubeArray";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return fmt::format("TUK({})", static_cast<u32>(target));
}
}
} // namespace VideoCommon

View File

@@ -0,0 +1,286 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <map>
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon {
using VideoCore::Surface::SurfaceCompression;
class SurfaceParams {
public:
/// Creates SurfaceCachedParams from a texture configuration.
static SurfaceParams CreateForTexture(Core::System& system,
const Tegra::Texture::FullTextureInfo& config,
const VideoCommon::Shader::Sampler& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
static SurfaceParams CreateForDepthBuffer(
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
/// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config);
std::size_t Hash() const {
return static_cast<std::size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
}
bool operator==(const SurfaceParams& rhs) const;
bool operator!=(const SurfaceParams& rhs) const {
return !operator==(rhs);
}
std::size_t GetGuestSizeInBytes() const {
return GetInnerMemorySize(false, false, false);
}
std::size_t GetHostSizeInBytes() const {
std::size_t host_size_in_bytes;
if (GetCompressionType() == SurfaceCompression::Converted) {
constexpr std::size_t rgb8_bpp = 4ULL;
// ASTC is uncompressed in software, in emulated as RGBA8
host_size_in_bytes = 0;
for (u32 level = 0; level < num_levels; ++level) {
host_size_in_bytes += GetConvertedMipmapSize(level);
}
} else {
host_size_in_bytes = GetInnerMemorySize(true, false, false);
}
return host_size_in_bytes;
}
u32 GetBlockAlignedWidth() const {
return Common::AlignUp(width, 64 / GetBytesPerPixel());
}
/// Returns the width of a given mipmap level.
u32 GetMipWidth(u32 level) const {
return std::max(1U, width >> level);
}
/// Returns the height of a given mipmap level.
u32 GetMipHeight(u32 level) const {
return std::max(1U, height >> level);
}
/// Returns the depth of a given mipmap level.
u32 GetMipDepth(u32 level) const {
return is_layered ? depth : std::max(1U, depth >> level);
}
/// Returns the block height of a given mipmap level.
u32 GetMipBlockHeight(u32 level) const;
/// Returns the block depth of a given mipmap level.
u32 GetMipBlockDepth(u32 level) const;
/// Returns the best possible row/pitch alignment for the surface.
u32 GetRowAlignment(u32 level) const {
const u32 bpp =
GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
}
/// Returns the offset in bytes in guest memory of a given mipmap level.
std::size_t GetGuestMipmapLevelOffset(u32 level) const;
/// Returns the offset in bytes in host memory (linear) of a given mipmap level.
std::size_t GetHostMipmapLevelOffset(u32 level) const;
/// Returns the offset in bytes in host memory (linear) of a given mipmap level
/// for a texture that is converted in host gpu.
std::size_t GetConvertedMipmapOffset(u32 level) const;
/// Returns the size in bytes in guest memory of a given mipmap level.
std::size_t GetGuestMipmapSize(u32 level) const {
return GetInnerMipmapMemorySize(level, false, false);
}
/// Returns the size in bytes in host memory (linear) of a given mipmap level.
std::size_t GetHostMipmapSize(u32 level) const {
return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
}
std::size_t GetConvertedMipmapSize(u32 level) const;
/// Returns the size of a layer in bytes in guest memory.
std::size_t GetGuestLayerSize() const {
return GetLayerSize(false, false);
}
/// Returns the size of a layer in bytes in host memory for a given mipmap level.
std::size_t GetHostLayerSize(u32 level) const {
ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
return GetInnerMipmapMemorySize(level, true, false);
}
/// Returns the max possible mipmap that the texture can have in host gpu
u32 MaxPossibleMipmap() const {
const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
return max_mipmap;
return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
}
/// Returns if the guest surface is a compressed surface.
bool IsCompressed() const {
return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
}
/// Returns the default block width.
u32 GetDefaultBlockWidth() const {
return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
}
/// Returns the default block height.
u32 GetDefaultBlockHeight() const {
return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
}
/// Returns the bits per pixel.
u32 GetBitsPerPixel() const {
return VideoCore::Surface::GetFormatBpp(pixel_format);
}
/// Returns the bytes per pixel.
u32 GetBytesPerPixel() const {
return VideoCore::Surface::GetBytesPerPixel(pixel_format);
}
/// Returns true if the pixel format is a depth and/or stencil format.
bool IsPixelFormatZeta() const {
return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
}
/// Returns how the compression should be handled for this texture.
SurfaceCompression GetCompressionType() const {
return VideoCore::Surface::GetFormatCompressionType(pixel_format);
}
/// Returns is the surface is a TextureBuffer type of surface.
bool IsBuffer() const {
return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
}
/// Returns the debug name of the texture for use in graphic debuggers.
std::string TargetName() const;
// Helper used for out of class size calculations
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
const u32 block_depth) {
return Common::AlignBits(out_size,
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
}
/// Converts a width from a type of surface into another. This helps represent the
/// equivalent value between compressed/non-compressed textures.
static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
VideoCore::Surface::PixelFormat pixel_format_to) {
const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
return (width * bw2 + bw1 - 1) / bw1;
}
/// Converts a height from a type of surface into another. This helps represent the
/// equivalent value between compressed/non-compressed textures.
static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
VideoCore::Surface::PixelFormat pixel_format_to) {
const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
return (height * bh2 + bh1 - 1) / bh1;
}
// Finds the maximun possible width between 2 2D layers of different formats
static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
const u32 src_level, const u32 dst_level) {
const u32 bw1 = src_params.GetDefaultBlockWidth();
const u32 bw2 = dst_params.GetDefaultBlockWidth();
const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
return std::min(t_src_width, t_dst_width);
}
// Finds the maximun possible height between 2 2D layers of different formats
static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
const u32 src_level, const u32 dst_level) {
const u32 bh1 = src_params.GetDefaultBlockHeight();
const u32 bh2 = dst_params.GetDefaultBlockHeight();
const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
return std::min(t_src_height, t_dst_height);
}
bool is_tiled;
bool srgb_conversion;
bool is_layered;
u32 block_width;
u32 block_height;
u32 block_depth;
u32 tile_width_spacing;
u32 width;
u32 height;
u32 depth;
u32 pitch;
u32 num_levels;
u32 emulated_levels;
VideoCore::Surface::PixelFormat pixel_format;
VideoCore::Surface::ComponentType component_type;
VideoCore::Surface::SurfaceType type;
VideoCore::Surface::SurfaceTarget target;
private:
/// Returns the size of a given mipmap level inside a layer.
std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
/// Returns the size of all mipmap levels and aligns as needed.
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
}
/// Returns the size of a layer
std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
std::size_t GetNumLayers() const {
return is_layered ? depth : 1;
}
/// Returns true if these parameters are from a layered surface.
bool IsLayered() const;
};
} // namespace VideoCommon
namespace std {
template <>
struct hash<VideoCommon::SurfaceParams> {
std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -0,0 +1,23 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include "common/common_types.h"
#include "video_core/texture_cache/surface_view.h"
namespace VideoCommon {
std::size_t ViewParams::Hash() const {
return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^
(static_cast<std::size_t>(base_level) << 24) ^
(static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
}
bool ViewParams::operator==(const ViewParams& rhs) const {
return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
}
} // namespace VideoCommon

View File

@@ -0,0 +1,67 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "common/common_types.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/surface_params.h"
namespace VideoCommon {
struct ViewParams {
ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers,
u32 base_level, u32 num_levels)
: target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
num_levels{num_levels} {}
std::size_t Hash() const;
bool operator==(const ViewParams& rhs) const;
VideoCore::Surface::SurfaceTarget target{};
u32 base_layer{};
u32 num_layers{};
u32 base_level{};
u32 num_levels{};
bool IsLayered() const {
switch (target) {
case VideoCore::Surface::SurfaceTarget::Texture1DArray:
case VideoCore::Surface::SurfaceTarget::Texture2DArray:
case VideoCore::Surface::SurfaceTarget::TextureCubemap:
case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
return true;
default:
return false;
}
}
};
class ViewBase {
public:
ViewBase(const ViewParams& params) : params{params} {}
const ViewParams& GetViewParams() const {
return params;
}
protected:
ViewParams params;
};
} // namespace VideoCommon
namespace std {
template <>
struct hash<VideoCommon::ViewParams> {
std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@@ -0,0 +1,814 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <set>
#include <tuple>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/copy_params.h"
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/surface_params.h"
#include "video_core/texture_cache/surface_view.h"
namespace Tegra::Texture {
struct FullTextureInfo;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace VideoCommon {
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
public:
void InvalidateRegion(CacheAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
Unregister(surface);
}
}
/***
* `Guard` guarantees that rendertargets don't unregister themselves if the
* collide. Protection is currently only done on 3D slices.
***/
void GuardRenderTargets(bool new_guard) {
guard_render_targets = new_guard;
}
void GuardSamplers(bool new_guard) {
guard_samplers = new_guard;
}
void FlushRegion(CacheAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
auto surfaces = GetSurfacesInRegion(addr, size);
if (surfaces.empty()) {
return;
}
std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
return a->GetModificationTick() < b->GetModificationTick();
});
for (const auto& surface : surfaces) {
FlushSurface(surface);
}
}
TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
const VideoCommon::Shader::Sampler& entry) {
std::lock_guard lock{mutex};
const auto gpu_addr{config.tic.Address()};
if (!gpu_addr) {
return {};
}
const auto params{SurfaceParams::CreateForTexture(system, config, entry)};
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
return view;
}
bool TextureBarrier() {
const bool any_rt =
std::any_of(sampled_textures.begin(), sampled_textures.end(),
[](const auto& surface) { return surface->IsRenderTarget(); });
sampled_textures.clear();
return any_rt;
}
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty_flags.zeta_buffer) {
return depth_buffer.view;
}
maxwell3d.dirty_flags.zeta_buffer = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
if (!gpu_addr || !regs.zeta_enable) {
SetEmptyDepthBuffer();
return {};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false);
depth_buffer.target = surface_view.first;
depth_buffer.view = surface_view.second;
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(true);
return surface_view.second;
}
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty_flags.color_buffer[index]) {
return render_targets[index].view;
}
maxwell3d.dirty_flags.color_buffer.reset(index);
const auto& regs{maxwell3d.regs};
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
SetEmptyColorBuffer(index);
return {};
}
const auto& config{regs.rt[index]};
const auto gpu_addr{config.Address()};
if (!gpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true);
if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false);
render_targets[index].target = surface_view.first;
render_targets[index].view = surface_view.second;
if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(true);
return surface_view.second;
}
void MarkColorBufferInUse(std::size_t index) {
if (auto& render_target = render_targets[index].target) {
render_target->MarkAsModified(true, Tick());
}
}
void MarkDepthBufferInUse() {
if (depth_buffer.target) {
depth_buffer.target->MarkAsModified(true, Tick());
}
}
void SetEmptyDepthBuffer() {
if (depth_buffer.target == nullptr) {
return;
}
depth_buffer.target->MarkAsRenderTarget(false);
depth_buffer.target = nullptr;
depth_buffer.view = nullptr;
}
void SetEmptyColorBuffer(std::size_t index) {
if (render_targets[index].target == nullptr) {
return;
}
render_targets[index].target->MarkAsRenderTarget(false);
render_targets[index].target = nullptr;
render_targets[index].view = nullptr;
}
void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::lock_guard lock{mutex};
std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
if (!cache_addr) {
return nullptr;
}
const CacheAddr page = cache_addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page];
for (auto& surface : list) {
if (surface->GetCacheAddr() == cache_addr) {
return surface;
}
}
return nullptr;
}
u64 Tick() {
return ++ticks;
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
SetEmptyDepthBuffer();
staging_cache.SetSize(2);
const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
siblings_table[static_cast<std::size_t>(a)] = b;
siblings_table[static_cast<std::size_t>(b)] = a;
};
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
make_siblings(PixelFormat::Z16, PixelFormat::R16U);
make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
sampled_textures.reserve(64);
}
~TextureCache() = default;
virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
const CopyParams& copy_params) = 0;
virtual void ImageBlit(TView& src_view, TView& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
// and reading it from a sepparate buffer.
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_ptr || !cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
return;
}
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
surface->MarkAsContinuous(continuous);
surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface);
surface->MarkAsRegistered(true);
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
}
void Unregister(TSurface surface) {
if (guard_render_targets && surface->IsProtected()) {
return;
}
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = surface->GetCacheAddr();
const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr();
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
UnregisterInnerCache(surface);
surface->MarkAsRegistered(false);
ReserveSurface(surface->GetSurfaceParams(), surface);
}
TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
if (const auto surface = TryGetReservedSurface(params); surface) {
surface->SetGpuAddr(gpu_addr);
return surface;
}
// No reserved surface available, create a new one and reserve it
auto new_surface{CreateSurface(gpu_addr, params)};
return new_surface;
}
std::pair<TSurface, TView> GetFermiSurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config);
const GPUVAddr gpu_addr = config.Address();
return GetSurface(gpu_addr, params, true, false);
}
Core::System& system;
private:
enum class RecycleStrategy : u32 {
Ignore = 0,
Flush = 1,
BufferCopy = 3,
};
/**
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
* @param overlaps, the overlapping surfaces registered in the cache.
* @param params, the paremeters on the new surface.
* @param gpu_addr, the starting address of the new surface.
* @param untopological, tells the recycler that the texture has no way to match the overlaps
* due to topological reasons.
**/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::values.use_accurate_gpu_emulation) {
return RecycleStrategy::Flush;
}
// 3D Textures decision
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (auto s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
// Untopological decision
if (untopological == MatchTopologyResult::CompressUnmatch) {
return RecycleStrategy::Flush;
}
if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
return RecycleStrategy::Flush;
}
return RecycleStrategy::Ignore;
}
/**
* `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
*the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
*overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
*new surface from that data.
* @param overlaps, the overlapping surfaces registered in the cache.
* @param params, the paremeters on the new surface.
* @param gpu_addr, the starting address of the new surface.
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank
* @param untopological, tells the recycler that the texture has no way to match the overlaps
* due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
for (auto& surface : overlaps) {
Unregister(surface);
}
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: {
return InitializeSurface(gpu_addr, params, do_load);
}
case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(),
[](const TSurface& a, const TSurface& b) -> bool {
return a->GetModificationTick() < b->GetModificationTick();
});
for (auto& surface : overlaps) {
FlushSurface(surface);
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
case RecycleStrategy::BufferCopy: {
auto new_surface = GetUncachedSurface(gpu_addr, params);
BufferCopy(overlaps[0], new_surface);
return {new_surface, new_surface->GetMainView()};
}
default: {
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
return InitializeSurface(gpu_addr, params, do_load);
}
}
}
/**
* `RebuildSurface` this method takes a single surface and recreates into another that
* may differ in format, target or width alingment.
* @param current_surface, the registered surface in the cache which we want to convert.
* @param params, the new surface params which we'll use to recreate the surface.
**/
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
bool is_render) {
const auto gpu_addr = current_surface->GetGpuAddr();
const auto& cr_params = current_surface->GetSurfaceParams();
TSurface new_surface;
if (cr_params.pixel_format != params.pixel_format && !is_render &&
GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
SurfaceParams new_params = params;
new_params.pixel_format = cr_params.pixel_format;
new_params.component_type = cr_params.component_type;
new_params.type = cr_params.type;
new_surface = GetUncachedSurface(gpu_addr, new_params);
} else {
new_surface = GetUncachedSurface(gpu_addr, params);
}
const auto& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type ||
(cr_params.component_type != final_params.component_type)) {
BufferCopy(current_surface, new_surface);
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
for (auto& brick : bricks) {
ImageCopy(current_surface, new_surface, brick);
}
}
Unregister(current_surface);
Register(new_surface);
new_surface->MarkAsModified(current_surface->IsModified(), Tick());
return {new_surface, new_surface->GetMainView()};
}
/**
* `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
* params if it's an exact match, we return the main view of the registered surface. If it's
* formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
* match but the targets don't, we create an overview View of the registered surface.
* @param current_surface, the registered surface in the cache which we want to convert.
* @param params, the new surface params which we want to check.
**/
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
const SurfaceParams& params, bool is_render) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
const bool matches_target = current_surface->MatchTarget(params.target);
const auto match_check = [&]() -> std::pair<TSurface, TView> {
if (matches_target) {
return {current_surface, current_surface->GetMainView()};
}
return {current_surface, current_surface->EmplaceOverview(params)};
};
if (!is_mirage) {
return match_check();
}
if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
return match_check();
}
return RebuildSurface(current_surface, params, is_render);
}
/**
* `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
* matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
* are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
* for them, else we return nothing.
* @param overlaps, the overlapping surfaces registered in the cache.
* @param params, the paremeters on the new surface.
* @param gpu_addr, the starting address of the new surface.
**/
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
return {};
}
bool modified = false;
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
u32 passed_tests = 0;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.is_layered || src_params.num_levels > 1) {
// We send this cases to recycle as they are more complex to handle
return {};
}
const std::size_t candidate_size = surface->GetSizeInBytes();
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
if (!mipmap_layer) {
continue;
}
const auto [layer, mipmap] = *mipmap_layer;
if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
continue;
}
modified |= surface->IsModified();
// Now we got all the data set up
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
passed_tests++;
ImageCopy(surface, new_surface, copy_params);
}
if (passed_tests == 0) {
return {};
// In Accurate GPU all tests should pass, else we recycle
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
return {};
}
for (auto surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
return {{new_surface, new_surface->GetMainView()}};
}
/**
* `GetSurface` gets the starting address and parameters of a candidate surface and tries
* to find a matching surface within the cache. This is done in 3 big steps. The first is to
* check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
* Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
* memory else we move to step 3. Step 3 consists on figuring the relationship between the
* candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
* overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
* candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
* check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
* is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
* @param gpu_addr, the starting address of the candidate surface.
* @param params, the paremeters on the candidate surface.
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents, bool is_render) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
// Step 0: guarantee a valid surface
if (!cache_addr) {
// Return a null surface if it's invalid
SurfaceParams new_params = params;
new_params.width = 1;
new_params.height = 1;
new_params.depth = 1;
new_params.block_height = 0;
new_params.block_depth = 0;
return InitializeSurface(gpu_addr, new_params, false);
}
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
const auto struct_result = current_surface->MatchesStructure(params);
if (struct_result != MatchStructureResult::None &&
(params.target != SurfaceTarget::Texture3D ||
current_surface->MatchTarget(params.target))) {
if (struct_result == MatchStructureResult::FullMatch) {
return ManageStructuralMatch(current_surface, params, is_render);
} else {
return RebuildSurface(current_surface, params, is_render);
}
}
}
// Step 2
// Obtain all possible overlaps in the memory region
const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
// If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) {
return InitializeSurface(gpu_addr, params, preserve_contents);
}
// Step 3
// Now we need to figure the relationship between the texture and its overlaps
// we do a topological test to ensure we can find some relationship. If it fails
// inmediatly recycle the texture
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
}
// Split cases between 1 overlap or many.
if (overlaps.size() == 1) {
TSurface current_surface = overlaps[0];
// First check if the surface is within the overlap. If not, it means
// two things either the candidate surface is a supertexture of the overlap
// or they don't match in any known way.
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
if (current_surface->GetGpuAddr() == gpu_addr) {
std::optional<std::pair<TSurface, TView>> view =
TryReconstructSurface(overlaps, params, gpu_addr);
if (view) {
return *view;
}
}
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
// Now we check if the candidate is a mipmap/layer of the overlap
std::optional<TView> view =
current_surface->EmplaceView(params, gpu_addr, candidate_size);
if (view) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
if (is_mirage) {
// On a mirage view, we need to recreate the surface under this new view
// and then obtain a view again.
SurfaceParams new_params = current_surface->GetSurfaceParams();
const u32 wh = SurfaceParams::ConvertWidth(
new_params.width, new_params.pixel_format, params.pixel_format);
const u32 hh = SurfaceParams::ConvertHeight(
new_params.height, new_params.pixel_format, params.pixel_format);
new_params.width = wh;
new_params.height = hh;
new_params.pixel_format = params.pixel_format;
std::pair<TSurface, TView> pair =
RebuildSurface(current_surface, new_params, is_render);
std::optional<TView> mirage_view =
pair.first->EmplaceView(params, gpu_addr, candidate_size);
if (mirage_view)
return {pair.first, *mirage_view};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
return {current_surface, *view};
}
// The next case is unsafe, so if we r in accurate GPU, just skip it
if (Settings::values.use_accurate_gpu_emulation) {
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
// This is the case the texture is a part of the parent.
if (current_surface->MatchesSubTexture(params, gpu_addr)) {
return RebuildSurface(current_surface, params, is_render);
}
} else {
// If there are many overlaps, odds are they are subtextures of the candidate
// surface. We try to construct a new surface based on the candidate parameters,
// using the overlaps. If a single overlap fails, this will fail.
std::optional<std::pair<TSurface, TView>> view =
TryReconstructSurface(overlaps, params, gpu_addr);
if (view) {
return *view;
}
}
// We failed all the tests, recycle the overlaps into a new texture.
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
}
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents) {
auto new_surface{GetUncachedSurface(gpu_addr, params)};
Register(new_surface);
if (preserve_contents) {
LoadSurface(new_surface);
}
return {new_surface, new_surface->GetMainView()};
}
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick());
}
void FlushSurface(const TSurface& surface) {
if (!surface->IsModified()) {
return;
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
surface->MarkAsModified(false, Tick());
}
void RegisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache[cache_addr] = surface;
while (start <= end) {
registry[start].push_back(surface);
start++;
}
}
void UnregisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cache_addr);
while (start <= end) {
auto& reg{registry[start]};
reg.erase(std::find(reg.begin(), reg.end(), surface));
start++;
}
}
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const CacheAddr cache_addr_end = cache_addr + size;
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces;
while (start <= end) {
std::vector<TSurface>& list = registry[start];
for (auto& surface : list) {
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
surface->MarkAsPicked(true);
surfaces.push_back(surface);
}
}
start++;
}
for (auto& surface : surfaces) {
surface->MarkAsPicked(false);
}
return surfaces;
}
void ReserveSurface(const SurfaceParams& params, TSurface surface) {
surface_reserve[params].push_back(std::move(surface));
}
TSurface TryGetReservedSurface(const SurfaceParams& params) {
auto search{surface_reserve.find(params)};
if (search == surface_reserve.end()) {
return {};
}
for (auto& surface : search->second) {
if (!surface->IsRegistered()) {
return surface;
}
}
return {};
}
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
return siblings_table[static_cast<std::size_t>(format)];
}
struct FramebufferTargetInfo {
TSurface target;
TView view;
};
VideoCore::RasterizerInterface& rasterizer;
u64 ticks{};
// Guards the cache for protection conflicts.
bool guard_render_targets{};
bool guard_samplers{};
// The siblings table is for formats that can inter exchange with one another
// without causing issues. This is only valid when a conflict occurs on a non
// rendering use.
std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
// The internal Cache is different for the Texture Cache. It's based on buckets
// of 1MB. This fits better for the purpose of this cache as textures are normaly
// large in size.
static constexpr u64 registry_page_bits{20};
static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
// The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and
/// destroyed when used with different surface parameters.
std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
render_targets;
FramebufferTargetInfo depth_buffer;
std::vector<TSurface> sampled_textures;
StagingCache staging_cache;
std::recursive_mutex mutex;
};
} // namespace VideoCommon

View File

@@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
SwapS8Z24ToZ24S8<true>(data, width, height);
}
void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
bool convert_astc, bool convert_s8z24) {
void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
if (convert_astc && IsPixelFormatASTC(pixel_format)) {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
const std::vector<u8> rgba8_data =
Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
std::copy(rgba8_data.begin(), rgba8_data.end(), data);
const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
in_data, width, height, depth, block_width, block_height);
std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
} else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
}
}
@@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h
}
}
} // namespace Tegra::Texture
} // namespace Tegra::Texture

View File

@@ -12,10 +12,11 @@ enum class PixelFormat;
namespace Tegra::Texture {
void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
u32 width, u32 height, u32 depth, bool convert_astc,
bool convert_s8z24);
void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
} // namespace Tegra::Texture
} // namespace Tegra::Texture

View File

@@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable {
std::array<std::array<u16, M>, N> values{};
};
constexpr u32 gob_size_x = 64;
constexpr u32 gob_size_y = 8;
constexpr u32 gob_size_z = 1;
constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
constexpr u32 gob_size_x_shift = 6;
constexpr u32 gob_size_y_shift = 3;
constexpr u32 gob_size_z_shift = 0;
constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift;
constexpr u32 gob_size_x = 1U << gob_size_x_shift;
constexpr u32 gob_size_y = 1U << gob_size_y_shift;
constexpr u32 gob_size_z = 1U << gob_size_z_shift;
constexpr u32 gob_size = 1U << gob_size_shift;
constexpr u32 fast_swizzle_align = 16;
constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>();
@@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
const u32 block_height_size{1U << block_height};
const u32 block_depth_size{1U << block_depth};
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
bytes_per_pixel, out_bytes_per_pixel, block_height_size,
block_depth_size, width_spacing);
} else {
SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
bytes_per_pixel, out_bytes_per_pixel, block_height_size,
block_depth_size, width_spacing);
}
}
@@ -248,7 +256,9 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
u32 block_height_bit) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) {
@@ -269,8 +279,9 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
}
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
u32 offset_x, u32 offset_y) {
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
for (u32 line = 0; line < subrect_height; ++line) {
const u32 y2 = line + offset_y;
const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -289,8 +300,9 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
}
void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
const u32 block_height, const std::size_t copy_size, const u8* source_data,
const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
u8* swizzle_data) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
std::size_t count = 0;
for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
@@ -356,9 +368,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x);
const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height);
const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth);
const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift);
const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height);
const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth);
return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;

View File

@@ -12,8 +12,8 @@ namespace Tegra::Texture {
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
// an small rect of (64/bytes_per_pixel)X8.
inline std::size_t GetGOBSize() {
return 512;
inline std::size_t GetGOBSizeShift() {
return 9;
}
/// Unswizzles a swizzled texture without changing its format.

View File

@@ -52,9 +52,9 @@ enum class TextureFormat : u32 {
DXT45 = 0x26,
DXN1 = 0x27,
DXN2 = 0x28,
Z24S8 = 0x29,
S8Z24 = 0x29,
X8Z24 = 0x2a,
S8Z24 = 0x2b,
Z24S8 = 0x2b,
X4V4Z24__COV4R4V = 0x2c,
X4V4Z24__COV8R8V = 0x2d,
V8Z24__COV4R12V = 0x2e,
@@ -172,12 +172,16 @@ struct TICEntry {
BitField<26, 1, u32> use_header_opt_control;
BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
BitField<0, 16, u32> buffer_high_width_minus_one;
};
union {
BitField<0, 16, u32> width_minus_1;
BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
BitField<29, 3, u32> border_size;
BitField<0, 16, u32> buffer_low_width_minus_one;
};
union {
BitField<0, 16, u32> height_minus_1;
@@ -206,7 +210,10 @@ struct TICEntry {
}
u32 Width() const {
return width_minus_1 + 1;
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_1 + 1;
}
return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
}
u32 Height() const {
@@ -219,20 +226,17 @@ struct TICEntry {
u32 BlockWidth() const {
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_width;
return block_width;
}
u32 BlockHeight() const {
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_height;
return block_height;
}
u32 BlockDepth() const {
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_depth;
return block_depth;
}
bool IsTiled() const {
@@ -240,6 +244,15 @@ struct TICEntry {
header_version == TICHeaderVersion::BlockLinearColorKey;
}
bool IsLineal() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
bool IsSrgbConversionEnabled() const {
return srgb_conversion != 0;
}

View File

@@ -750,6 +750,9 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
QStringList GMainWindow::GetUnsupportedGLExtensions() {
QStringList unsupported_ext;
if (!GLAD_GL_ARB_buffer_storage) {
unsupported_ext.append(QStringLiteral("ARB_buffer_storage"));
}
if (!GLAD_GL_ARB_direct_state_access) {
unsupported_ext.append(QStringLiteral("ARB_direct_state_access"));
}

View File

@@ -52,6 +52,10 @@ private:
bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() {
std::vector<std::string> unsupported_ext;
if (!GLAD_GL_ARB_buffer_storage)
unsupported_ext.push_back("ARB_buffer_storage");
if (!GLAD_GL_ARB_direct_state_access)
unsupported_ext.push_back("ARB_direct_state_access");
if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev");
if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)