Compare commits
16 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87011a97f9 | ||
|
|
a63a0daa5e | ||
|
|
d89888389d | ||
|
|
83e3b77ed7 | ||
|
|
3626254f48 | ||
|
|
98d2461529 | ||
|
|
bd43c05470 | ||
|
|
c99f5d405b | ||
|
|
3c2ae53b4c | ||
|
|
c95c254f3e | ||
|
|
2293e8a11a | ||
|
|
03fd5aa384 | ||
|
|
354fbe701e | ||
|
|
3d99b449d3 | ||
|
|
866c1165af | ||
|
|
4a6b9a1a71 |
@@ -51,6 +51,8 @@ endif()
|
||||
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
|
||||
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
|
||||
set(HASH_FILES
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
2
externals/sirit
vendored
2
externals/sirit
vendored
Submodule externals/sirit updated: a62c5bbc10...eefca56afd
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
DEPENDS
|
||||
# WARNING! It was too much work to try and make a common location for this list,
|
||||
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
@@ -54,6 +54,8 @@ add_library(video_core STATIC
|
||||
rasterizer_interface.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
renderer_opengl/gl_arb_decompiler.cpp
|
||||
renderer_opengl/gl_arb_decompiler.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_device.cpp
|
||||
|
||||
@@ -598,6 +598,7 @@ public:
|
||||
BitField<4, 3, u32> block_height;
|
||||
BitField<8, 3, u32> block_depth;
|
||||
BitField<12, 1, InvMemoryLayout> type;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
} memory_layout;
|
||||
union {
|
||||
BitField<0, 16, u32> layers;
|
||||
|
||||
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
File diff suppressed because it is too large
Load Diff
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -213,8 +213,10 @@ Device::Device()
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||
GLAD_GL_NV_compute_program5;
|
||||
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
||||
GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||
|
||||
@@ -88,6 +88,10 @@ public:
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
bool HasNvViewportArray2() const {
|
||||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
@@ -111,6 +115,7 @@ private:
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool use_assembly_shaders{};
|
||||
};
|
||||
|
||||
|
||||
@@ -93,6 +93,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
|
||||
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||
const u8 index = location / 4;
|
||||
if (index >= 8 && index <= 39) {
|
||||
return {GL_GENERIC_ATTRIB_NV, index - 8};
|
||||
}
|
||||
if (index >= 48 && index <= 55) {
|
||||
return {GL_TEXTURE_COORD_NV, index - 48};
|
||||
}
|
||||
switch (index) {
|
||||
case 7:
|
||||
return {GL_POSITION, 0};
|
||||
case 40:
|
||||
return {GL_PRIMARY_COLOR_NV, 0};
|
||||
case 41:
|
||||
return {GL_SECONDARY_COLOR_NV, 0};
|
||||
case 42:
|
||||
return {GL_BACK_PRIMARY_COLOR_NV, 0};
|
||||
case 43:
|
||||
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
|
||||
return {GL_POSITION, 0};
|
||||
}
|
||||
|
||||
void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
@@ -1547,12 +1575,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
||||
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncTransformFeedback() {
|
||||
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
|
||||
// when this is required.
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
|
||||
static constexpr std::size_t STRIDE = 3;
|
||||
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
|
||||
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
|
||||
|
||||
GLint* cursor = attribs.data();
|
||||
GLint* current_stream = streams.data();
|
||||
|
||||
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||
const auto& layout = regs.tfb_layouts[feedback];
|
||||
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||
if (layout.varying_count == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*current_stream = static_cast<GLint>(feedback);
|
||||
if (current_stream != streams.data()) {
|
||||
// When stepping one stream, push the expected token
|
||||
cursor[0] = GL_NEXT_BUFFER_NV;
|
||||
cursor[1] = 0;
|
||||
cursor[2] = 0;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
++current_stream;
|
||||
|
||||
const auto& locations = regs.tfb_varying_locs[feedback];
|
||||
std::optional<u8> current_index;
|
||||
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
|
||||
const u8 location = locations[offset];
|
||||
const u8 index = location / 4;
|
||||
|
||||
if (current_index == index) {
|
||||
// Increase number of components of the previous attachment
|
||||
++cursor[-2];
|
||||
continue;
|
||||
}
|
||||
current_index = index;
|
||||
|
||||
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
|
||||
cursor[1] = 1;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
}
|
||||
|
||||
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
|
||||
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
|
||||
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
|
||||
GL_INTERLEAVED_ATTRIBS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
@@ -1579,6 +1665,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
||||
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
}
|
||||
|
||||
|
||||
@@ -202,6 +202,10 @@ private:
|
||||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
|
||||
/// Begin a transform feedback
|
||||
void BeginTransformFeedback(GLenum primitive_mode);
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
@@ -147,7 +148,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
const std::string arb = "Not implemented";
|
||||
const std::string arb =
|
||||
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
|
||||
|
||||
GLuint& arb_prog = program->assembly_program.handle;
|
||||
|
||||
|
||||
@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
|
||||
target = GetTextureTarget(params.target);
|
||||
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
||||
DecorateSurfaceName();
|
||||
main_view = CreateViewInner(
|
||||
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
|
||||
true);
|
||||
|
||||
u32 num_layers = 1;
|
||||
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
|
||||
num_layers = params.depth;
|
||||
}
|
||||
|
||||
main_view =
|
||||
CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
|
||||
}
|
||||
|
||||
CachedSurface::~CachedSurface() = default;
|
||||
@@ -413,37 +418,40 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
|
||||
ASSERT(params.num_levels == 1);
|
||||
|
||||
if (params.num_layers > 1) {
|
||||
// Layered framebuffer attachments
|
||||
UNIMPLEMENTED_IF(params.base_layer != 0);
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
glFramebufferTexture(target, attachment, GetTexture(), 0);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
if (params.num_layers > 1) {
|
||||
ASSERT(params.base_layer == 0);
|
||||
glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
|
||||
} else {
|
||||
glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
|
||||
params.base_level, params.base_layer);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (params.num_layers > 1) {
|
||||
UNIMPLEMENTED_IF(params.base_layer != 0);
|
||||
glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const GLenum view_target = surface.GetTarget();
|
||||
const GLuint texture = surface.GetTexture();
|
||||
switch (surface.GetSurfaceParams().target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
|
||||
glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
|
||||
glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glFramebufferTextureLayer(target, attachment, texture, params.base_level,
|
||||
glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
|
||||
params.base_layer);
|
||||
break;
|
||||
default:
|
||||
@@ -500,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||
OGLTextureView texture_view;
|
||||
texture_view.Create();
|
||||
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
|
||||
params.num_levels, params.base_layer, params.num_layers);
|
||||
if (target == GL_TEXTURE_3D) {
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||
params.base_level, params.num_levels, 0, 1);
|
||||
} else {
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||
params.base_level, params.num_levels, params.base_layer, params.num_layers);
|
||||
}
|
||||
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
|
||||
|
||||
return texture_view;
|
||||
@@ -544,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
const auto& src_params{src_view->GetSurfaceParams()};
|
||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
|
||||
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
|
||||
UNIMPLEMENTED_IF(src_params.depth != 1);
|
||||
UNIMPLEMENTED_IF(dst_params.depth != 1);
|
||||
|
||||
state_tracker.NotifyScissor0();
|
||||
state_tracker.NotifyFramebuffer();
|
||||
|
||||
@@ -80,8 +80,10 @@ public:
|
||||
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
|
||||
~CachedSurfaceView();
|
||||
|
||||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
|
||||
void Attach(GLenum attachment, GLenum target) const;
|
||||
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
|
||||
/// @param attachment Attachment to bind textures to
|
||||
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
|
||||
void Attach(GLenum attachment, GLenum fb_target) const;
|
||||
|
||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
|
||||
@@ -751,11 +751,9 @@ void RendererOpenGL::RenderScreenshot() {
|
||||
}
|
||||
|
||||
bool RendererOpenGL::Init() {
|
||||
if (GLAD_GL_KHR_debug) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
if (Settings::values.renderer_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
}
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
|
||||
};
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo ci;
|
||||
|
||||
@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
|
||||
|
||||
VkDescriptorPoolCreateInfo ci;
|
||||
|
||||
@@ -45,6 +45,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
||||
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
|
||||
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
||||
@@ -104,8 +105,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||
u32 binding = base_binding;
|
||||
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
|
||||
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
|
||||
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
|
||||
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
|
||||
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
|
||||
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
|
||||
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
|
||||
return binding;
|
||||
}
|
||||
@@ -377,16 +379,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
|
||||
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
|
||||
// crash.
|
||||
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
|
||||
descriptor_type == STORAGE_TEXEL_BUFFER) {
|
||||
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
|
||||
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
|
||||
entry.dstBinding = binding + i;
|
||||
entry.dstArrayElement = 0;
|
||||
entry.descriptorCount = 1;
|
||||
entry.descriptorType = descriptor_type;
|
||||
entry.offset = offset + i * entry_size;
|
||||
entry.offset = static_cast<std::size_t>(offset + i * entry_size);
|
||||
entry.stride = entry_size;
|
||||
}
|
||||
} else if (count > 0) {
|
||||
@@ -407,8 +410,9 @@ void FillDescriptorUpdateTemplateEntries(
|
||||
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
|
||||
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
|
||||
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
|
||||
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
|
||||
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
|
||||
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
|
||||
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
|
||||
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
|
||||
}
|
||||
|
||||
|
||||
@@ -468,8 +468,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
const auto& entries = pipeline.GetEntries();
|
||||
SetupComputeConstBuffers(entries);
|
||||
SetupComputeGlobalBuffers(entries);
|
||||
SetupComputeTexelBuffers(entries);
|
||||
SetupComputeUniformTexels(entries);
|
||||
SetupComputeTextures(entries);
|
||||
SetupComputeStorageTexels(entries);
|
||||
SetupComputeImages(entries);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
@@ -715,7 +716,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
|
||||
if (!view) {
|
||||
return false;
|
||||
}
|
||||
key.views.push_back(view->GetHandle());
|
||||
key.views.push_back(view->GetAttachment());
|
||||
key.width = std::min(key.width, view->GetWidth());
|
||||
key.height = std::min(key.height, view->GetHeight());
|
||||
key.layers = std::min(key.layers, view->GetNumLayers());
|
||||
@@ -787,8 +788,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
|
||||
const auto& entries = shader->GetEntries();
|
||||
SetupGraphicsConstBuffers(entries, stage);
|
||||
SetupGraphicsGlobalBuffers(entries, stage);
|
||||
SetupGraphicsTexelBuffers(entries, stage);
|
||||
SetupGraphicsUniformTexels(entries, stage);
|
||||
SetupGraphicsTextures(entries, stage);
|
||||
SetupGraphicsStorageTexels(entries, stage);
|
||||
SetupGraphicsImages(entries, stage);
|
||||
}
|
||||
texture_cache.GuardSamplers(false);
|
||||
@@ -983,12 +985,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
|
||||
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
for (const auto& entry : entries.texel_buffers) {
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||
SetupTexelBuffer(image, entry);
|
||||
SetupUniformTexels(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1003,6 +1005,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||
SetupStorageTexel(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
@@ -1035,12 +1046,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
|
||||
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
for (const auto& entry : entries.texel_buffers) {
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||
SetupTexelBuffer(image, entry);
|
||||
SetupUniformTexels(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1055,6 +1066,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||
SetupStorageTexel(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
@@ -1104,8 +1124,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
|
||||
const TexelBufferEntry& entry) {
|
||||
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
||||
const UniformTexelEntry& entry) {
|
||||
const auto view = texture_cache.GetTextureSurface(tic, entry);
|
||||
ASSERT(view->IsBufferView());
|
||||
|
||||
@@ -1117,8 +1137,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
|
||||
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||
ASSERT(!view->IsBufferView());
|
||||
|
||||
const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
const auto sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
update_descriptor_queue.AddSampledImage(sampler, image_view);
|
||||
|
||||
@@ -1127,6 +1147,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
|
||||
sampled_views.push_back(ImageView{std::move(view), image_layout});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
|
||||
const StorageTexelEntry& entry) {
|
||||
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
ASSERT(view->IsBufferView());
|
||||
|
||||
update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
|
||||
auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
|
||||
@@ -1136,7 +1164,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
|
||||
|
||||
UNIMPLEMENTED_IF(tic.IsBuffer());
|
||||
|
||||
const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
const VkImageView image_view =
|
||||
view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
update_descriptor_queue.AddImage(image_view);
|
||||
|
||||
const auto image_layout = update_descriptor_queue.GetLastImageLayout();
|
||||
|
||||
@@ -193,12 +193,15 @@ private:
|
||||
/// Setup global buffers in the graphics pipeline.
|
||||
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup texel buffers in the graphics pipeline.
|
||||
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
/// Setup uniform texels in the graphics pipeline.
|
||||
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup textures in the graphics pipeline.
|
||||
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup storage texels in the graphics pipeline.
|
||||
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup images in the graphics pipeline.
|
||||
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
@@ -209,11 +212,14 @@ private:
|
||||
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup texel buffers in the compute pipeline.
|
||||
void SetupComputeTexelBuffers(const ShaderEntries& entries);
|
||||
void SetupComputeUniformTexels(const ShaderEntries& entries);
|
||||
|
||||
/// Setup textures in the compute pipeline.
|
||||
void SetupComputeTextures(const ShaderEntries& entries);
|
||||
|
||||
/// Setup storage texels in the compute pipeline.
|
||||
void SetupComputeStorageTexels(const ShaderEntries& entries);
|
||||
|
||||
/// Setup images in the compute pipeline.
|
||||
void SetupComputeImages(const ShaderEntries& entries);
|
||||
|
||||
@@ -222,10 +228,12 @@ private:
|
||||
|
||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||
|
||||
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
|
||||
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
|
||||
|
||||
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
|
||||
|
||||
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
|
||||
|
||||
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
@@ -400,8 +400,9 @@ private:
|
||||
u32 binding = specialization.base_binding;
|
||||
binding = DeclareConstantBuffers(binding);
|
||||
binding = DeclareGlobalBuffers(binding);
|
||||
binding = DeclareTexelBuffers(binding);
|
||||
binding = DeclareUniformTexels(binding);
|
||||
binding = DeclareSamplers(binding);
|
||||
binding = DeclareStorageTexels(binding);
|
||||
binding = DeclareImages(binding);
|
||||
|
||||
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
|
||||
@@ -889,7 +890,7 @@ private:
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareTexelBuffers(u32 binding) {
|
||||
u32 DeclareUniformTexels(u32 binding) {
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
if (!sampler.is_buffer) {
|
||||
continue;
|
||||
@@ -910,7 +911,7 @@ private:
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
|
||||
texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
|
||||
uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
@@ -945,31 +946,48 @@ private:
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareImages(u32 binding) {
|
||||
u32 DeclareStorageTexels(u32 binding) {
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
const auto [dim, arrayed] = GetImageDim(image);
|
||||
constexpr int depth = 0;
|
||||
constexpr bool ms = false;
|
||||
constexpr int sampled = 2; // This won't be accessed with a sampler
|
||||
constexpr auto format = spv::ImageFormat::Unknown;
|
||||
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
||||
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
||||
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
||||
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
if (image.is_read && !image.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
} else if (image.is_written && !image.is_read) {
|
||||
Decorate(id, spv::Decoration::NonReadable);
|
||||
if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
images.emplace(image.index, StorageImage{image_type, id});
|
||||
DeclareImage(image, binding);
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareImages(u32 binding) {
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||
continue;
|
||||
}
|
||||
DeclareImage(image, binding);
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
|
||||
void DeclareImage(const Image& image, u32& binding) {
|
||||
const auto [dim, arrayed] = GetImageDim(image);
|
||||
constexpr int depth = 0;
|
||||
constexpr bool ms = false;
|
||||
constexpr int sampled = 2; // This won't be accessed with a sampler
|
||||
const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
|
||||
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
||||
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
||||
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
||||
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
if (image.is_read && !image.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
} else if (image.is_written && !image.is_read) {
|
||||
Decorate(id, spv::Decoration::NonReadable);
|
||||
}
|
||||
|
||||
images.emplace(image.index, StorageImage{image_type, id});
|
||||
}
|
||||
|
||||
bool IsRenderTargetEnabled(u32 rt) const {
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
|
||||
@@ -1256,7 +1274,7 @@ private:
|
||||
} else {
|
||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||
}
|
||||
pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
|
||||
pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
|
||||
buffer_element);
|
||||
}
|
||||
return {OpLoad(t_float, pointer), Type::Float};
|
||||
@@ -1611,7 +1629,7 @@ private:
|
||||
|
||||
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
|
||||
const Id carry = OpCompositeExtract(t_uint, result, 1);
|
||||
return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
|
||||
return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
|
||||
}
|
||||
|
||||
Expression LogicalAssign(Operation operation) {
|
||||
@@ -1674,7 +1692,7 @@ private:
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
const u32 index = meta.sampler.index;
|
||||
if (meta.sampler.is_buffer) {
|
||||
const auto& entry = texel_buffers.at(index);
|
||||
const auto& entry = uniform_texels.at(index);
|
||||
return OpLoad(entry.image_type, entry.image);
|
||||
} else {
|
||||
const auto& entry = sampled_images.at(index);
|
||||
@@ -1951,39 +1969,20 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageAdd(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
Expression AtomicImage(Operation operation) {
|
||||
const auto& meta{std::get<MetaImage>(operation.GetMeta())};
|
||||
ASSERT(meta.values.size() == 1);
|
||||
|
||||
Expression AtomicImageMin(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
const Id coordinate = GetCoordinates(operation, Type::Int);
|
||||
const Id image = images.at(meta.image.index).image;
|
||||
const Id sample = v_uint_zero;
|
||||
const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
|
||||
|
||||
Expression AtomicImageMax(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageAnd(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageOr(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageXor(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageExchange(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = v_uint_zero;
|
||||
const Id value = AsUint(Visit(meta.values[0]));
|
||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||
}
|
||||
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
@@ -1998,7 +1997,7 @@ private:
|
||||
return {v_float_zero, Type::Float};
|
||||
}
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = Constant(t_uint, 0);
|
||||
const Id semantics = v_uint_zero;
|
||||
const Id value = AsUint(Visit(operation[1]));
|
||||
|
||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||
@@ -2622,11 +2621,11 @@ private:
|
||||
|
||||
&SPIRVDecompiler::ImageLoad,
|
||||
&SPIRVDecompiler::ImageStore,
|
||||
&SPIRVDecompiler::AtomicImageAdd,
|
||||
&SPIRVDecompiler::AtomicImageAnd,
|
||||
&SPIRVDecompiler::AtomicImageOr,
|
||||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
|
||||
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||
@@ -2768,8 +2767,11 @@ private:
|
||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||
|
||||
const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
|
||||
|
||||
const Id v_float_zero = Constant(t_float, 0.0f);
|
||||
const Id v_float_one = Constant(t_float, 1.0f);
|
||||
const Id v_uint_zero = Constant(t_uint, 0);
|
||||
|
||||
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
|
||||
const Id v_varying_default =
|
||||
@@ -2794,15 +2796,16 @@ private:
|
||||
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
|
||||
std::map<u32, Id> constant_buffers;
|
||||
std::map<GlobalMemoryBase, Id> global_buffers;
|
||||
std::map<u32, TexelBuffer> texel_buffers;
|
||||
std::map<u32, TexelBuffer> uniform_texels;
|
||||
std::map<u32, SampledImage> sampled_images;
|
||||
std::map<u32, TexelBuffer> storage_texels;
|
||||
std::map<u32, StorageImage> images;
|
||||
|
||||
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
||||
Id instance_index{};
|
||||
Id vertex_index{};
|
||||
Id base_instance{};
|
||||
Id base_vertex{};
|
||||
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
||||
Id frag_depth{};
|
||||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
@@ -3058,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
}
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
if (sampler.is_buffer) {
|
||||
entries.texel_buffers.emplace_back(sampler);
|
||||
entries.uniform_texels.emplace_back(sampler);
|
||||
} else {
|
||||
entries.samplers.emplace_back(sampler);
|
||||
}
|
||||
}
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
entries.images.emplace_back(image);
|
||||
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||
entries.storage_texels.emplace_back(image);
|
||||
} else {
|
||||
entries.images.emplace_back(image);
|
||||
}
|
||||
}
|
||||
for (const auto& attribute : ir.GetInputAttributes()) {
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
|
||||
@@ -21,8 +21,9 @@ class VKDevice;
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using TexelBufferEntry = VideoCommon::Shader::Sampler;
|
||||
using UniformTexelEntry = VideoCommon::Shader::Sampler;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using StorageTexelEntry = VideoCommon::Shader::Image;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SET = 0;
|
||||
@@ -66,13 +67,15 @@ private:
|
||||
struct ShaderEntries {
|
||||
u32 NumBindings() const {
|
||||
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
||||
texel_buffers.size() + samplers.size() + images.size());
|
||||
uniform_texels.size() + samplers.size() + storage_texels.size() +
|
||||
images.size());
|
||||
}
|
||||
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalBufferEntry> global_buffers;
|
||||
std::vector<TexelBufferEntry> texel_buffers;
|
||||
std::vector<UniformTexelEntry> uniform_texels;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<StorageTexelEntry> storage_texels;
|
||||
std::vector<ImageEntry> images;
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
|
||||
@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
|
||||
ci.pNext = nullptr;
|
||||
ci.flags = 0;
|
||||
ci.size = static_cast<VkDeviceSize>(host_memory_size);
|
||||
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
ci.queueFamilyIndexCount = 0;
|
||||
ci.pQueueFamilyIndices = nullptr;
|
||||
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
|
||||
ci.extent = {params.width, params.height, 1};
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
ci.extent = {params.width, params.height, params.depth};
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
|
||||
return ci;
|
||||
}
|
||||
|
||||
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
|
||||
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Move this to a virtual function.
|
||||
main_view = CreateViewInner(
|
||||
ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
|
||||
true);
|
||||
u32 num_layers = 1;
|
||||
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
|
||||
num_layers = params.depth;
|
||||
}
|
||||
main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
|
||||
}
|
||||
|
||||
CachedSurface::~CachedSurface() = default;
|
||||
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
|
||||
}
|
||||
|
||||
View CachedSurface::CreateView(const ViewParams& params) {
|
||||
return CreateViewInner(params, false);
|
||||
}
|
||||
|
||||
View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
|
||||
// TODO(Rodrigo): Add name decorations
|
||||
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
|
||||
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
|
||||
}
|
||||
|
||||
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
|
||||
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
|
||||
}
|
||||
|
||||
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
|
||||
const ViewParams& params, bool is_proxy)
|
||||
const ViewParams& params)
|
||||
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
|
||||
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
|
||||
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
|
||||
base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
|
||||
num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
|
||||
: VK_IMAGE_VIEW_TYPE_1D} {}
|
||||
base_level{params.base_level}, num_levels{params.num_levels},
|
||||
image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
base_layer = 0;
|
||||
num_layers = 1;
|
||||
base_slice = params.base_layer;
|
||||
num_slices = params.num_layers;
|
||||
} else {
|
||||
base_layer = params.base_layer;
|
||||
num_layers = params.num_layers;
|
||||
}
|
||||
}
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (last_image_view && last_swizzle == new_swizzle) {
|
||||
return last_image_view;
|
||||
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
});
|
||||
}
|
||||
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
ASSERT(base_slice == 0);
|
||||
ASSERT(num_slices == params.depth);
|
||||
}
|
||||
|
||||
VkImageViewCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
ci.pNext = nullptr;
|
||||
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
return last_image_view = *image_view;
|
||||
}
|
||||
|
||||
VkImageView CachedSurfaceView::GetAttachment() {
|
||||
if (render_target) {
|
||||
return *render_target;
|
||||
}
|
||||
|
||||
VkImageViewCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
ci.pNext = nullptr;
|
||||
ci.flags = 0;
|
||||
ci.image = surface.GetImageHandle();
|
||||
ci.format = surface.GetImage().GetFormat();
|
||||
ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
|
||||
ci.subresourceRange.aspectMask = aspect_mask;
|
||||
ci.subresourceRange.baseMipLevel = base_level;
|
||||
ci.subresourceRange.levelCount = num_levels;
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
|
||||
ci.subresourceRange.baseArrayLayer = base_slice;
|
||||
ci.subresourceRange.layerCount = num_slices;
|
||||
} else {
|
||||
ci.viewType = image_view_type;
|
||||
ci.subresourceRange.baseArrayLayer = base_layer;
|
||||
ci.subresourceRange.layerCount = num_layers;
|
||||
}
|
||||
render_target = device.GetLogical().CreateImageView(ci);
|
||||
return *render_target;
|
||||
}
|
||||
|
||||
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
const VKDevice& device, VKResourceManager& resource_manager,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler,
|
||||
|
||||
@@ -91,7 +91,6 @@ protected:
|
||||
void DecorateSurfaceName();
|
||||
|
||||
View CreateView(const ViewParams& params) override;
|
||||
View CreateViewInner(const ViewParams& params, bool is_proxy);
|
||||
|
||||
private:
|
||||
void UploadBuffer(const std::vector<u8>& staging_buffer);
|
||||
@@ -120,23 +119,20 @@ private:
|
||||
class CachedSurfaceView final : public VideoCommon::ViewBase {
|
||||
public:
|
||||
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
|
||||
const ViewParams& params, bool is_proxy);
|
||||
const ViewParams& params);
|
||||
~CachedSurfaceView();
|
||||
|
||||
VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
|
||||
VkImageView GetAttachment();
|
||||
|
||||
bool IsSameSurface(const CachedSurfaceView& rhs) const {
|
||||
return &surface == &rhs.surface;
|
||||
}
|
||||
|
||||
VkImageView GetHandle() {
|
||||
return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
|
||||
Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
|
||||
}
|
||||
|
||||
u32 GetWidth() const {
|
||||
return params.GetMipWidth(base_level);
|
||||
}
|
||||
@@ -180,14 +176,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source) {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
// Store a copy of these values to avoid double dereference when reading them
|
||||
const SurfaceParams params;
|
||||
const VkImage image;
|
||||
@@ -196,15 +184,18 @@ private:
|
||||
|
||||
const VKDevice& device;
|
||||
CachedSurface& surface;
|
||||
const u32 base_layer;
|
||||
const u32 num_layers;
|
||||
const u32 base_level;
|
||||
const u32 num_levels;
|
||||
const VkImageViewType image_view_type;
|
||||
u32 base_layer = 0;
|
||||
u32 num_layers = 0;
|
||||
u32 base_slice = 0;
|
||||
u32 num_slices = 0;
|
||||
|
||||
VkImageView last_image_view = nullptr;
|
||||
u32 last_swizzle = 0;
|
||||
|
||||
vk::ImageView render_target;
|
||||
std::unordered_map<u32, vk::ImageView> view_cache;
|
||||
};
|
||||
|
||||
|
||||
@@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
|
||||
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
// Special case for 3D Texture Segments
|
||||
const bool must_read_current_data =
|
||||
params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
if (must_read_current_data) {
|
||||
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
// Special case for 3D texture segments
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -217,8 +217,8 @@ public:
|
||||
}
|
||||
|
||||
bool IsProtected() const {
|
||||
// Only 3D Slices are to be protected
|
||||
return is_target && params.block_depth > 0;
|
||||
// Only 3D slices are to be protected
|
||||
return is_target && params.target == SurfaceTarget::Texture3D;
|
||||
}
|
||||
|
||||
bool IsRenderTarget() const {
|
||||
@@ -250,6 +250,11 @@ public:
|
||||
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
|
||||
}
|
||||
|
||||
TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
|
||||
return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
|
||||
base_level, num_levels));
|
||||
}
|
||||
|
||||
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
|
||||
const GPUVAddr view_addr,
|
||||
const std::size_t candidate_size, const u32 mipmap,
|
||||
@@ -272,8 +277,8 @@ public:
|
||||
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
|
||||
const std::size_t candidate_size) {
|
||||
if (params.target == SurfaceTarget::Texture3D ||
|
||||
(params.num_levels == 1 && !params.is_layered) ||
|
||||
view_params.target == SurfaceTarget::Texture3D) {
|
||||
view_params.target == SurfaceTarget::Texture3D ||
|
||||
(params.num_levels == 1 && !params.is_layered)) {
|
||||
return {};
|
||||
}
|
||||
const auto layer_mipmap{GetLayerMipmap(view_addr)};
|
||||
|
||||
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
|
||||
params.num_levels = 1;
|
||||
params.emulated_levels = 1;
|
||||
|
||||
const bool is_layered = config.layers > 1 && params.block_depth == 0;
|
||||
params.is_layered = is_layered;
|
||||
params.depth = is_layered ? config.layers.Value() : 1;
|
||||
params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
|
||||
if (config.memory_layout.is_3d != 0) {
|
||||
params.depth = config.layers.Value();
|
||||
params.is_layered = false;
|
||||
params.target = SurfaceTarget::Texture3D;
|
||||
} else if (config.layers > 1) {
|
||||
params.depth = config.layers.Value();
|
||||
params.is_layered = true;
|
||||
params.target = SurfaceTarget::Texture2DArray;
|
||||
} else {
|
||||
params.depth = 1;
|
||||
params.is_layered = false;
|
||||
params.target = SurfaceTarget::Texture2D;
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
|
||||
params.width = config.width;
|
||||
params.height = config.height;
|
||||
params.pitch = config.pitch;
|
||||
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
|
||||
// TODO(Rodrigo): Try to guess texture arrays from parameters
|
||||
params.target = SurfaceTarget::Texture2D;
|
||||
params.depth = 1;
|
||||
params.num_levels = 1;
|
||||
|
||||
@@ -298,15 +298,13 @@ public:
|
||||
const GPUVAddr src_gpu_addr = src_config.Address();
|
||||
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
||||
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
||||
const std::optional<VAddr> dst_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
|
||||
const std::optional<VAddr> src_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
|
||||
std::pair<TSurface, TView> dst_surface =
|
||||
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
|
||||
std::pair<TSurface, TView> src_surface =
|
||||
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
|
||||
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
||||
|
||||
const auto& memory_manager = system.GPU().MemoryManager();
|
||||
const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
|
||||
const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
|
||||
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
|
||||
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
|
||||
ImageBlit(src_surface, dst_surface.second, copy_config);
|
||||
dst_surface.first->MarkAsModified(true, Tick());
|
||||
}
|
||||
|
||||
@@ -508,12 +506,12 @@ private:
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
// 3D Textures decision
|
||||
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
for (const auto& s : overlaps) {
|
||||
const auto& s_params = s->GetSurfaceParams();
|
||||
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
|
||||
if (s_params.target == SurfaceTarget::Texture3D) {
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
}
|
||||
@@ -731,51 +729,9 @@ private:
|
||||
*/
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr,
|
||||
const VAddr cpu_addr,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
bool preserve_contents) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
bool failed = false;
|
||||
if (params.num_levels > 1) {
|
||||
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
|
||||
return std::nullopt;
|
||||
}
|
||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
bool modified = false;
|
||||
for (auto& surface : overlaps) {
|
||||
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||
if (src_params.target != SurfaceTarget::Texture2D) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
if (src_params.height != params.height) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
if (src_params.block_depth != params.block_depth ||
|
||||
src_params.block_height != params.block_height) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
|
||||
const auto offsets = params.GetBlockOffsetXYZ(offset);
|
||||
const auto z = std::get<2>(offsets);
|
||||
modified |= surface->IsModified();
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
|
||||
1);
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
}
|
||||
if (failed) {
|
||||
return std::nullopt;
|
||||
}
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
new_surface->MarkAsModified(modified, Tick());
|
||||
Register(new_surface);
|
||||
auto view = new_surface->GetMainView();
|
||||
return {{std::move(new_surface), view}};
|
||||
} else {
|
||||
if (params.target != SurfaceTarget::Texture3D) {
|
||||
for (const auto& surface : overlaps) {
|
||||
if (!surface->MatchTarget(params.target)) {
|
||||
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
|
||||
@@ -791,11 +747,60 @@ private:
|
||||
continue;
|
||||
}
|
||||
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
|
||||
return {{surface, surface->GetMainView()}};
|
||||
return std::make_pair(surface, surface->GetMainView());
|
||||
}
|
||||
}
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
}
|
||||
|
||||
if (params.num_levels > 1) {
|
||||
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (overlaps.size() == 1) {
|
||||
const auto& surface = overlaps[0];
|
||||
const SurfaceParams& overlap_params = surface->GetSurfaceParams();
|
||||
// Don't attempt to render to textures with more than one level for now
|
||||
// The texture has to be to the right or the sample address if we want to render to it
|
||||
if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
|
||||
const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
|
||||
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
|
||||
if (slice < overlap_params.depth) {
|
||||
auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
|
||||
return std::make_pair(std::move(surface), std::move(view));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
bool modified = false;
|
||||
|
||||
for (auto& surface : overlaps) {
|
||||
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||
if (src_params.target != SurfaceTarget::Texture2D ||
|
||||
src_params.height != params.height ||
|
||||
src_params.block_depth != params.block_depth ||
|
||||
src_params.block_height != params.block_height) {
|
||||
return std::nullopt;
|
||||
}
|
||||
modified |= surface->IsModified();
|
||||
|
||||
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
|
||||
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
|
||||
const u32 width = params.width;
|
||||
const u32 height = params.height;
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
}
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
new_surface->MarkAsModified(modified, Tick());
|
||||
Register(new_surface);
|
||||
|
||||
TView view = new_surface->GetMainView();
|
||||
return std::make_pair(std::move(new_surface), std::move(view));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -873,7 +878,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's a 3D texture
|
||||
// Manage 3D textures
|
||||
if (params.block_depth > 0) {
|
||||
auto surface =
|
||||
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
|
||||
|
||||
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
|
||||
|
||||
ui->setupUi(this);
|
||||
|
||||
// TODO: Remove this after assembly shaders are fully integrated
|
||||
ui->use_assembly_shaders->setVisible(false);
|
||||
|
||||
SetConfiguration();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user