Compare commits

...

16 Commits

Author SHA1 Message Date
ReinUsesLisp
87011a97f9 gl_arb_decompiler: Implement FSwizzleAdd 2020-06-11 22:12:07 -03:00
ReinUsesLisp
a63a0daa5e gl_arb_decompiler: Implement an assembly shader decompiler
Emit code compatible with NV_gpu_program5.
This should emit code compatible with Fermi, but it wasn't tested on
that architecture. Pascal has some issues not present on Turing GPUs.
2020-06-11 22:12:07 -03:00
ReinUsesLisp
d89888389d yuzu/configuration: Show assembly shaders check box 2020-06-10 19:04:53 -03:00
bunnei
83e3b77ed7 Merge pull request #4027 from ReinUsesLisp/3d-slices
texture_cache: Implement rendering to 3D textures
2020-06-09 21:52:15 -04:00
bunnei
3626254f48 Merge pull request #4040 from ReinUsesLisp/nv-transform-feedback
gl_rasterizer: Use NV_transform_feedback for XFB on assembly shaders
2020-06-08 16:18:33 -04:00
bunnei
98d2461529 Merge pull request #4052 from ReinUsesLisp/debug-output
renderer_opengl: Only enable DEBUG_OUTPUT when graphics debugging is enabled
2020-06-08 10:16:41 -04:00
ReinUsesLisp
bd43c05470 texture_cache: Port original code management for 2D vs 3D textures
Handle blits to images as 2D, even when they have block depth.

- Fixes rendering issues on Luigi's Mansion 3
2020-06-08 05:02:22 -03:00
ReinUsesLisp
c99f5d405b texture_cache: Simplify blit code 2020-06-08 05:01:44 -03:00
ReinUsesLisp
3c2ae53b4c texture_cache: Handle 3D texture blits with one layer 2020-06-08 05:01:00 -03:00
ReinUsesLisp
c95c254f3e texture_cache: Implement rendering to 3D textures
This allows rendering to 3D textures with more than one slice.
Applications are allowed to render to more than one slice of a texture
using gl_Layer from a VTG shader.

This also requires reworking how 3D texture collisions are handled, for
now, this commit allows rendering to slices but not to miplevels. When a
render target attempts to write to a mipmap, we fallback to the previous
implementation (copying or flushing as needed).

- Fixes color correction 3D textures on UE4 games (rainbow effects).
- Allows Xenoblade games to render to 3D textures directly.
2020-06-08 05:01:00 -03:00
Rodrigo Locatti
2293e8a11a Merge pull request #4034 from ReinUsesLisp/storage-texels
vk_rasterizer: Implement storage texels and atomic image operations
2020-06-07 18:43:24 -03:00
bunnei
03fd5aa384 Merge pull request #4055 from ReinUsesLisp/nvidia-443-24
gl_device: Black list NVIDIA 443.24 for fast buffer uploads
2020-06-06 02:37:24 -04:00
ReinUsesLisp
354fbe701e renderer_opengl: Only enable DEBUG_OUTPUT when graphics debugging is enabled
Avoids logging when it's not relevant. This can potentially reduce
driver's internal thread overhead.
2020-06-05 21:21:12 -03:00
ReinUsesLisp
3d99b449d3 gl_rasterizer: Use NV_transform_feedback for XFB on assembly shaders
NV_transform_feedback, NV_transform_feedback2 and
ARB_transform_feedback3 with NV_transform_feedback interactions allows
implementing transform feedbacks as dynamic state.

Maxwell implements transform feedbacks as dynamic state, so using these
extensions with TransformFeedbackStreamAttribsNV allows us to properly
emulate transform feedbacks without having to recompile shaders when the
state changes.
2020-06-03 20:22:12 -03:00
ReinUsesLisp
866c1165af vk_shader_decompiler: Implement atomic image operations
Implement atomic operations on images.
On GLSL these are atomicImage* functions (e.g. atomicImageAdd).
2020-06-02 02:20:02 -03:00
ReinUsesLisp
4a6b9a1a71 vk_rasterizer: Implement storage texels
This is the equivalent of an image buffer on OpenGL.

- Used by Octopath Traveler
2020-06-02 02:16:33 -03:00
29 changed files with 2567 additions and 234 deletions

View File

@@ -51,6 +51,8 @@ endif()
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"

View File

@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
DEPENDS
# WARNING! It was too much work to try and make a common location for this list,
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"

View File

@@ -54,6 +54,8 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
renderer_opengl/gl_arb_decompiler.cpp
renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp

View File

@@ -598,6 +598,7 @@ public:
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
BitField<16, 1, u32> is_3d;
} memory_layout;
union {
BitField<0, 16, u32> layers;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <string_view>
#include "common/common_types.h"
namespace Tegra::Engines {
enum class ShaderType : u32;
}
namespace VideoCommon::Shader {
class ShaderIR;
class Registry;
} // namespace VideoCommon::Shader
namespace OpenGL {
class Device;
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier);
} // namespace OpenGL

View File

@@ -213,8 +213,10 @@ Device::Device()
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5;
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
GLAD_GL_NV_transform_feedback2;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);

View File

@@ -88,6 +88,10 @@ public:
return has_fast_buffer_sub_data;
}
bool HasNvViewportArray2() const {
return has_nv_viewport_array2;
}
bool UseAssemblyShaders() const {
return use_assembly_shaders;
}
@@ -111,6 +115,7 @@ private:
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
bool use_assembly_shaders{};
};

View File

@@ -93,6 +93,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
/// Translates hardware transform feedback indices
/// @param location Hardware location
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
const u8 index = location / 4;
if (index >= 8 && index <= 39) {
return {GL_GENERIC_ATTRIB_NV, index - 8};
}
if (index >= 48 && index <= 55) {
return {GL_TEXTURE_COORD_NV, index - 48};
}
switch (index) {
case 7:
return {GL_POSITION, 0};
case 40:
return {GL_PRIMARY_COLOR_NV, 0};
case 41:
return {GL_SECONDARY_COLOR_NV, 0};
case 42:
return {GL_BACK_PRIMARY_COLOR_NV, 0};
case 43:
return {GL_BACK_SECONDARY_COLOR_NV, 0};
}
UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
return {GL_POSITION, 0};
}
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
@@ -1547,12 +1575,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
const auto& regs = system.GPU().Maxwell3D().regs;
static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
GLint* cursor = attribs.data();
GLint* current_stream = streams.data();
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
const auto& layout = regs.tfb_layouts[feedback];
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
if (layout.varying_count == 0) {
continue;
}
*current_stream = static_cast<GLint>(feedback);
if (current_stream != streams.data()) {
// When stepping one stream, push the expected token
cursor[0] = GL_NEXT_BUFFER_NV;
cursor[1] = 0;
cursor[2] = 0;
cursor += STRIDE;
}
++current_stream;
const auto& locations = regs.tfb_varying_locs[feedback];
std::optional<u8> current_index;
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
const u8 location = locations[offset];
const u8 index = location / 4;
if (current_index == index) {
// Increase number of components of the previous attachment
++cursor[-2];
continue;
}
current_index = index;
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
cursor[1] = 1;
cursor += STRIDE;
}
}
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
GL_INTERLEAVED_ATTRIBS);
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
if (device.UseAssemblyShaders()) {
SyncTransformFeedback();
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1579,6 +1665,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
static_cast<GLsizeiptr>(size));
}
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
glBeginTransformFeedback(GL_POINTS);
}

View File

@@ -202,6 +202,10 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Syncs transform feedback state to match guest state
/// @note Only valid on assembly shaders
void SyncTransformFeedback();
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);

View File

@@ -20,6 +20,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -147,7 +148,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
auto program = std::make_shared<ProgramHandle>();
if (device.UseAssemblyShaders()) {
const std::string arb = "Not implemented";
const std::string arb =
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
GLuint& arb_prog = program->assembly_program.handle;

View File

@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
main_view = CreateViewInner(
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
true);
u32 num_layers = 1;
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
num_layers = params.depth;
}
main_view =
CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
CachedSurface::~CachedSurface() = default;
@@ -413,37 +418,40 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
ASSERT(params.num_levels == 1);
if (params.num_layers > 1) {
// Layered framebuffer attachments
UNIMPLEMENTED_IF(params.base_layer != 0);
switch (params.target) {
case SurfaceTarget::Texture2DArray:
glFramebufferTexture(target, attachment, GetTexture(), 0);
break;
default:
UNIMPLEMENTED();
if (params.target == SurfaceTarget::Texture3D) {
if (params.num_layers > 1) {
ASSERT(params.base_layer == 0);
glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
} else {
glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
params.base_level, params.base_layer);
}
return;
}
if (params.num_layers > 1) {
UNIMPLEMENTED_IF(params.base_layer != 0);
glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
return;
}
const GLenum view_target = surface.GetTarget();
const GLuint texture = surface.GetTexture();
switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
glFramebufferTextureLayer(target, attachment, texture, params.base_level,
glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
@@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
OGLTextureView texture_view;
texture_view.Create();
glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
if (target == GL_TEXTURE_3D) {
glTextureView(texture_view.handle, target, surface.texture.handle, format,
params.base_level, params.num_levels, 0, 1);
} else {
glTextureView(texture_view.handle, target, surface.texture.handle, format,
params.base_level, params.num_levels, params.base_layer, params.num_layers);
}
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
@@ -544,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
UNIMPLEMENTED_IF(src_params.depth != 1);
UNIMPLEMENTED_IF(dst_params.depth != 1);
state_tracker.NotifyScissor0();
state_tracker.NotifyFramebuffer();

View File

@@ -80,8 +80,10 @@ public:
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
~CachedSurfaceView();
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
void Attach(GLenum attachment, GLenum target) const;
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
/// @param attachment Attachment to bind textures to
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
void Attach(GLenum attachment, GLenum fb_target) const;
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,

View File

@@ -751,11 +751,9 @@ void RendererOpenGL::RenderScreenshot() {
}
bool RendererOpenGL::Init() {
if (GLAD_GL_KHR_debug) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
if (Settings::values.renderer_debug) {
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
}
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}

View File

@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
VkDescriptorSetLayoutCreateInfo ci;

View File

@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
VkDescriptorPoolCreateInfo ci;

View File

@@ -45,6 +45,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@@ -104,8 +105,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding;
}
@@ -377,16 +379,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
return;
}
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
// crash.
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
descriptor_type == STORAGE_TEXEL_BUFFER) {
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
entry.dstBinding = binding + i;
entry.dstArrayElement = 0;
entry.descriptorCount = 1;
entry.descriptorType = descriptor_type;
entry.offset = offset + i * entry_size;
entry.offset = static_cast<std::size_t>(offset + i * entry_size);
entry.stride = entry_size;
}
} else if (count > 0) {
@@ -407,8 +410,9 @@ void FillDescriptorUpdateTemplateEntries(
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
}

View File

@@ -468,8 +468,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const auto& entries = pipeline.GetEntries();
SetupComputeConstBuffers(entries);
SetupComputeGlobalBuffers(entries);
SetupComputeTexelBuffers(entries);
SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
SetupComputeStorageTexels(entries);
SetupComputeImages(entries);
buffer_cache.Unmap();
@@ -715,7 +716,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
if (!view) {
return false;
}
key.views.push_back(view->GetHandle());
key.views.push_back(view->GetAttachment());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
key.layers = std::min(key.layers, view->GetNumLayers());
@@ -787,8 +788,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
const auto& entries = shader->GetEntries();
SetupGraphicsConstBuffers(entries, stage);
SetupGraphicsGlobalBuffers(entries, stage);
SetupGraphicsTexelBuffers(entries, stage);
SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
}
texture_cache.GuardSamplers(false);
@@ -983,12 +985,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
}
}
void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.texel_buffers) {
for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic;
SetupTexelBuffer(image, entry);
SetupUniformTexels(image, entry);
}
}
@@ -1003,6 +1005,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
}
}
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().Maxwell3D();
@@ -1035,12 +1046,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
}
}
void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.texel_buffers) {
for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
SetupTexelBuffer(image, entry);
SetupUniformTexels(image, entry);
}
}
@@ -1055,6 +1066,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
}
}
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.storage_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute();
@@ -1104,8 +1124,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
update_descriptor_queue.AddBuffer(buffer, offset, size);
}
void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
const TexelBufferEntry& entry) {
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
const UniformTexelEntry& entry) {
const auto view = texture_cache.GetTextureSurface(tic, entry);
ASSERT(view->IsBufferView());
@@ -1117,8 +1137,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
ASSERT(!view->IsBufferView());
const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
texture.tic.z_source, texture.tic.w_source);
const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
texture.tic.z_source, texture.tic.w_source);
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
@@ -1127,6 +1147,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
sampled_views.push_back(ImageView{std::move(view), image_layout});
}
void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
const StorageTexelEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
ASSERT(view->IsBufferView());
update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
}
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
auto view = texture_cache.GetImageSurface(tic, entry);
@@ -1136,7 +1164,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
UNIMPLEMENTED_IF(tic.IsBuffer());
const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
const VkImageView image_view =
view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
const auto image_layout = update_descriptor_queue.GetLastImageLayout();

View File

@@ -193,12 +193,15 @@ private:
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup texel buffers in the graphics pipeline.
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
/// Setup uniform texels in the graphics pipeline.
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
/// Setup storage texels in the graphics pipeline.
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
@@ -209,11 +212,14 @@ private:
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
void SetupComputeTexelBuffers(const ShaderEntries& entries);
void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
/// Setup storage texels in the compute pipeline.
void SetupComputeStorageTexels(const ShaderEntries& entries);
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
@@ -222,10 +228,12 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);

View File

@@ -400,8 +400,9 @@ private:
u32 binding = specialization.base_binding;
binding = DeclareConstantBuffers(binding);
binding = DeclareGlobalBuffers(binding);
binding = DeclareTexelBuffers(binding);
binding = DeclareUniformTexels(binding);
binding = DeclareSamplers(binding);
binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@@ -889,7 +890,7 @@ private:
return binding;
}
u32 DeclareTexelBuffers(u32 binding) {
u32 DeclareUniformTexels(u32 binding) {
for (const auto& sampler : ir.GetSamplers()) {
if (!sampler.is_buffer) {
continue;
@@ -910,7 +911,7 @@ private:
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
}
return binding;
}
@@ -945,31 +946,48 @@ private:
return binding;
}
u32 DeclareImages(u32 binding) {
u32 DeclareStorageTexels(u32 binding) {
for (const auto& image : ir.GetImages()) {
const auto [dim, arrayed] = GetImageDim(image);
constexpr int depth = 0;
constexpr bool ms = false;
constexpr int sampled = 2; // This won't be accessed with a sampler
constexpr auto format = spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
if (image.is_read && !image.is_written) {
Decorate(id, spv::Decoration::NonWritable);
} else if (image.is_written && !image.is_read) {
Decorate(id, spv::Decoration::NonReadable);
if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
continue;
}
images.emplace(image.index, StorageImage{image_type, id});
DeclareImage(image, binding);
}
return binding;
}
u32 DeclareImages(u32 binding) {
for (const auto& image : ir.GetImages()) {
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
continue;
}
DeclareImage(image, binding);
}
return binding;
}
void DeclareImage(const Image& image, u32& binding) {
const auto [dim, arrayed] = GetImageDim(image);
constexpr int depth = 0;
constexpr bool ms = false;
constexpr int sampled = 2; // This won't be accessed with a sampler
const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
if (image.is_read && !image.is_written) {
Decorate(id, spv::Decoration::NonWritable);
} else if (image.is_written && !image.is_read) {
Decorate(id, spv::Decoration::NonReadable);
}
images.emplace(image.index, StorageImage{image_type, id});
}
bool IsRenderTargetEnabled(u32 rt) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@@ -1256,7 +1274,7 @@ private:
} else {
UNREACHABLE_MSG("Unmanaged offset node type");
}
pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
buffer_element);
}
return {OpLoad(t_float, pointer), Type::Float};
@@ -1611,7 +1629,7 @@ private:
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
const Id carry = OpCompositeExtract(t_uint, result, 1);
return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
}
Expression LogicalAssign(Operation operation) {
@@ -1674,7 +1692,7 @@ private:
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 index = meta.sampler.index;
if (meta.sampler.is_buffer) {
const auto& entry = texel_buffers.at(index);
const auto& entry = uniform_texels.at(index);
return OpLoad(entry.image_type, entry.image);
} else {
const auto& entry = sampled_images.at(index);
@@ -1951,39 +1969,20 @@ private:
return {};
}
Expression AtomicImageAdd(Operation operation) {
UNIMPLEMENTED();
return {};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
Expression AtomicImage(Operation operation) {
const auto& meta{std::get<MetaImage>(operation.GetMeta())};
ASSERT(meta.values.size() == 1);
Expression AtomicImageMin(Operation operation) {
UNIMPLEMENTED();
return {};
}
const Id coordinate = GetCoordinates(operation, Type::Int);
const Id image = images.at(meta.image.index).image;
const Id sample = v_uint_zero;
const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
Expression AtomicImageMax(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageAnd(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageOr(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageXor(Operation operation) {
UNIMPLEMENTED();
return {};
}
Expression AtomicImageExchange(Operation operation) {
UNIMPLEMENTED();
return {};
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(meta.values[0]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@@ -1998,7 +1997,7 @@ private:
return {v_float_zero, Type::Float};
}
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = Constant(t_uint, 0);
const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@@ -2622,11 +2621,11 @@ private:
&SPIRVDecompiler::ImageLoad,
&SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::AtomicImageAdd,
&SPIRVDecompiler::AtomicImageAnd,
&SPIRVDecompiler::AtomicImageOr,
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@@ -2768,8 +2767,11 @@ private:
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
const Id v_float_zero = Constant(t_float, 0.0f);
const Id v_float_one = Constant(t_float, 1.0f);
const Id v_uint_zero = Constant(t_uint, 0);
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
const Id v_varying_default =
@@ -2794,15 +2796,16 @@ private:
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images;
std::map<u32, TexelBuffer> storage_texels;
std::map<u32, StorageImage> images;
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id instance_index{};
Id vertex_index{};
Id base_instance{};
Id base_vertex{};
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id frag_depth{};
Id frag_coord{};
Id front_facing{};
@@ -3058,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
entries.texel_buffers.emplace_back(sampler);
entries.uniform_texels.emplace_back(sampler);
} else {
entries.samplers.emplace_back(sampler);
}
}
for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
entries.storage_texels.emplace_back(image);
} else {
entries.images.emplace_back(image);
}
}
for (const auto& attribute : ir.GetInputAttributes()) {
if (IsGenericAttribute(attribute)) {

View File

@@ -21,8 +21,9 @@ class VKDevice;
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using TexelBufferEntry = VideoCommon::Shader::Sampler;
using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -66,13 +67,15 @@ private:
struct ShaderEntries {
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
texel_buffers.size() + samplers.size() + images.size());
uniform_texels.size() + samplers.size() + storage_texels.size() +
images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
std::vector<TexelBufferEntry> texel_buffers;
std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers;
std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};

View File

@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
ci.pNext = nullptr;
ci.flags = 0;
ci.size = static_cast<VkDeviceSize>(host_memory_size);
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
ci.queueFamilyIndexCount = 0;
ci.pQueueFamilyIndices = nullptr;
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
ci.extent = {params.width, params.height, 1};
break;
case SurfaceTarget::Texture3D:
ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
ci.extent = {params.width, params.height, params.depth};
break;
case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
return ci;
}
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
}
} // Anonymous namespace
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
}
// TODO(Rodrigo): Move this to a virtual function.
main_view = CreateViewInner(
ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
true);
u32 num_layers = 1;
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
num_layers = params.depth;
}
main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
}
View CachedSurface::CreateView(const ViewParams& params) {
return CreateViewInner(params, false);
}
View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
// TODO(Rodrigo): Add name decorations
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
}
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
}
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
const ViewParams& params, bool is_proxy)
const ViewParams& params)
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
: VK_IMAGE_VIEW_TYPE_1D} {}
base_level{params.base_level}, num_levels{params.num_levels},
image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
base_layer = 0;
num_layers = 1;
base_slice = params.base_layer;
num_slices = params.num_layers;
} else {
base_layer = params.base_layer;
num_layers = params.num_layers;
}
}
CachedSurfaceView::~CachedSurfaceView() = default;
VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
});
}
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
ASSERT(base_slice == 0);
ASSERT(num_slices == params.depth);
}
VkImageViewCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
ci.pNext = nullptr;
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
return last_image_view = *image_view;
}
VkImageView CachedSurfaceView::GetAttachment() {
if (render_target) {
return *render_target;
}
VkImageViewCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
ci.pNext = nullptr;
ci.flags = 0;
ci.image = surface.GetImageHandle();
ci.format = surface.GetImage().GetFormat();
ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
ci.subresourceRange.aspectMask = aspect_mask;
ci.subresourceRange.baseMipLevel = base_level;
ci.subresourceRange.levelCount = num_levels;
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
ci.subresourceRange.baseArrayLayer = base_slice;
ci.subresourceRange.layerCount = num_slices;
} else {
ci.viewType = image_view_type;
ci.subresourceRange.baseArrayLayer = base_layer;
ci.subresourceRange.layerCount = num_layers;
}
render_target = device.GetLogical().CreateImageView(ci);
return *render_target;
}
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,

View File

@@ -91,7 +91,6 @@ protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& params) override;
View CreateViewInner(const ViewParams& params, bool is_proxy);
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
const ViewParams& params, bool is_proxy);
const ViewParams& params);
~CachedSurfaceView();
VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
VkImageView GetHandle() {
return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
}
u32 GetWidth() const {
return params.GetMipWidth(base_level);
}
@@ -180,14 +176,6 @@ public:
}
private:
static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source) {
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
}
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams params;
const VkImage image;
@@ -196,15 +184,18 @@ private:
const VKDevice& device;
CachedSurface& surface;
const u32 base_layer;
const u32 num_layers;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
u32 base_layer = 0;
u32 num_layers = 0;
u32 base_slice = 0;
u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};

View File

@@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
// Special case for 3D Texture Segments
const bool must_read_current_data =
params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
if (must_read_current_data) {
if (params.target == SurfaceTarget::Texture3D) {
// Special case for 3D texture segments
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}

View File

@@ -217,8 +217,8 @@ public:
}
bool IsProtected() const {
// Only 3D Slices are to be protected
return is_target && params.block_depth > 0;
// Only 3D slices are to be protected
return is_target && params.target == SurfaceTarget::Texture3D;
}
bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
}
TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
base_level, num_levels));
}
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
const GPUVAddr view_addr,
const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
const std::size_t candidate_size) {
if (params.target == SurfaceTarget::Texture3D ||
(params.num_levels == 1 && !params.is_layered) ||
view_params.target == SurfaceTarget::Texture3D) {
view_params.target == SurfaceTarget::Texture3D ||
(params.num_levels == 1 && !params.is_layered)) {
return {};
}
const auto layer_mipmap{GetLayerMipmap(view_addr)};

View File

@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.num_levels = 1;
params.emulated_levels = 1;
const bool is_layered = config.layers > 1 && params.block_depth == 0;
params.is_layered = is_layered;
params.depth = is_layered ? config.layers.Value() : 1;
params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
if (config.memory_layout.is_3d != 0) {
params.depth = config.layers.Value();
params.is_layered = false;
params.target = SurfaceTarget::Texture3D;
} else if (config.layers > 1) {
params.depth = config.layers.Value();
params.is_layered = true;
params.target = SurfaceTarget::Texture2DArray;
} else {
params.depth = 1;
params.is_layered = false;
params.target = SurfaceTarget::Texture2D;
}
return params;
}
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
params.width = config.width;
params.height = config.height;
params.pitch = config.pitch;
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
// TODO(Rodrigo): Try to guess texture arrays from parameters
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;

View File

@@ -298,15 +298,13 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const std::optional<VAddr> dst_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
const std::optional<VAddr> src_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
const auto& memory_manager = system.GPU().MemoryManager();
const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
ImageBlit(src_surface, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
@@ -508,12 +506,12 @@ private:
return RecycleStrategy::Flush;
}
// 3D Textures decision
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
if (params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
if (s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
@@ -731,51 +729,9 @@ private:
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const VAddr cpu_addr,
GPUVAddr gpu_addr, VAddr cpu_addr,
bool preserve_contents) {
if (params.target == SurfaceTarget::Texture3D) {
bool failed = false;
if (params.num_levels > 1) {
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
return std::nullopt;
}
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
bool modified = false;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.target != SurfaceTarget::Texture2D) {
failed = true;
break;
}
if (src_params.height != params.height) {
failed = true;
break;
}
if (src_params.block_depth != params.block_depth ||
src_params.block_height != params.block_height) {
failed = true;
break;
}
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
const auto offsets = params.GetBlockOffsetXYZ(offset);
const auto z = std::get<2>(offsets);
modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
1);
ImageCopy(surface, new_surface, copy_params);
}
if (failed) {
return std::nullopt;
}
for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
auto view = new_surface->GetMainView();
return {{std::move(new_surface), view}};
} else {
if (params.target != SurfaceTarget::Texture3D) {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -791,11 +747,60 @@ private:
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
return {{surface, surface->GetMainView()}};
return std::make_pair(surface, surface->GetMainView());
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
if (params.num_levels > 1) {
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
return std::nullopt;
}
if (overlaps.size() == 1) {
const auto& surface = overlaps[0];
const SurfaceParams& overlap_params = surface->GetSurfaceParams();
// Don't attempt to render to textures with more than one level for now
// The texture has to be to the right or the sample address if we want to render to it
if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
if (slice < overlap_params.depth) {
auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
return std::make_pair(std::move(surface), std::move(view));
}
}
}
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
bool modified = false;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
if (src_params.target != SurfaceTarget::Texture2D ||
src_params.height != params.height ||
src_params.block_depth != params.block_depth ||
src_params.block_height != params.block_height) {
return std::nullopt;
}
modified |= surface->IsModified();
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
const u32 width = params.width;
const u32 height = params.height;
const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
ImageCopy(surface, new_surface, copy_params);
}
for (const auto& surface : overlaps) {
Unregister(surface);
}
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
TView view = new_surface->GetMainView();
return std::make_pair(std::move(new_surface), std::move(view));
}
/**
@@ -873,7 +878,7 @@ private:
}
}
// Check if it's a 3D texture
// Manage 3D textures
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);

View File

@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
ui->setupUi(this);
// TODO: Remove this after assembly shaders are fully integrated
ui->use_assembly_shaders->setVisible(false);
SetConfiguration();
}