Compare commits

..

30 Commits

Author SHA1 Message Date
Lioncash
24620bc4ea decode/image: Fix typo in assert in GetComponentSize() 2020-04-15 22:29:51 -04:00
Lioncash
b178c9a349 decoder/image: Fix incorrect G24R8 component sizes in GetComponentSize()
The components' sizes were mismatched. This corrects that.
2020-04-15 22:10:44 -04:00
Fernando Sahmkow
e33196d4e7 Merge pull request #3612 from ReinUsesLisp/red
shader/memory: Implement RED.E.ADD and minor changes to ATOM
2020-04-15 15:03:49 -04:00
Mat M
4398bdb4c7 Merge pull request #3670 from lioncash/reorder
CMakeLists: Make -Wreorder a compile-time error
2020-04-15 14:40:05 -04:00
Lioncash
213fff67bc CMakeLists: Make -Wreorder a compile-time error
This can result in silent logic bugs within code, and given the amount
of times these kind of warnings are caused, they should be flagged at
compile-time so no new code is submitted with them.
2020-04-15 14:14:41 -04:00
Mat M
64b5985f0a Merge pull request #3662 from ReinUsesLisp/constant-attrs
gl_rasterizer: Implement constant vertex attributes
2020-04-15 11:54:50 -04:00
Mat M
9208d555b7 Merge pull request #3668 from ReinUsesLisp/vtx-format-16ui
maxwell_to_vk: Add uint16 vertex formats
2020-04-15 11:43:52 -04:00
Mat M
ab72696beb Merge pull request #3656 from ReinUsesLisp/glsl-full-decompile
gl_shader_cache: Use CompileDepth::FullDecompile on GLSL
2020-04-15 03:17:46 -04:00
Mat M
4878d6bb49 Merge pull request #3654 from ReinUsesLisp/fix-fb-attach
gl_texture_cache: Fix layered texture attachment base level
2020-04-15 03:17:18 -04:00
Mat M
50c0a92db8 Merge pull request #3663 from ReinUsesLisp/fcmp-rc
shader/arithmetic: Add FCMP_CR variant
2020-04-15 03:16:56 -04:00
Mat M
13331a3a32 Merge pull request #3664 from ReinUsesLisp/fe3h-black-squares
Revert "gl_shader_decompiler: Implement merges with bitfieldInsert"
2020-04-15 03:14:28 -04:00
Mat M
3a759d2352 Merge pull request #3667 from ReinUsesLisp/viewport-trash
vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo
2020-04-15 03:10:34 -04:00
ReinUsesLisp
3036067047 maxwell_to_vk: Add uint16 vertex formats 2020-04-15 04:06:30 -03:00
ReinUsesLisp
b4e43c64c8 maxwell_to_vk: Add missing breaks
Avoid invalid fallbacks.
2020-04-15 04:05:33 -03:00
ReinUsesLisp
0ca456830f vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo
When the dynamic state is specified, pViewports and pScissors are
ignored, quoting the specification:

  pViewports is a pointer to an array of VkViewport structures, defining
  the viewport transforms. If the viewport state is dynamic, this member
  is ignored.

That said, AMD's proprietary driver itself seem to read it regardless of
what the specification says.
2020-04-15 03:30:08 -03:00
Rodrigo Locatti
0b132e8cc1 Merge pull request #3657 from ReinUsesLisp/viewport-zero
vk_rasterizer: Default to 1 viewports with a size of 0
2020-04-15 01:51:17 -03:00
Fernando Sahmkow
daddbeffd1 Texture Cache: Only do buffer copies on accurate GPU. (#3634)
This is a simple optimization as Buffer Copies are mostly used for texture recycling. They are, however, useful when games abuse undefined behavior but most 3D APIs forbid it.
2020-04-14 23:21:00 -04:00
ReinUsesLisp
fd6371eba7 Revert "gl_shader_decompiler: Implement merges with bitfieldInsert"
This reverts commit 05cf270836.

Apparently the first approach using floats instead of bitfieldInert
worked better for Fire Emblem: Three Houses. Reverting to get that
behavior back.
2020-04-14 21:24:33 -03:00
ReinUsesLisp
fefe7f18f9 shader/arithmetic: Add FCMP_CR variant
Adds another variant of FCMP.
2020-04-14 19:11:04 -03:00
Zach Hilman
e366b4ee1f Merge pull request #3660 from bunnei/friend-blocked-users
service: friend: Stub IFriendService::GetBlockedUserListIds.
2020-04-14 16:59:46 -04:00
Zach Hilman
8040f6d544 Merge pull request #3661 from bunnei/patch-manager-fix
file_sys: patch_manager: Return early when there are no layers to apply.
2020-04-14 16:59:25 -04:00
ReinUsesLisp
6dfcabc800 gl_rasterizer: Implement constant vertex attributes
Credits go to gdkchan from Ryujinx for finding constant attributes are
used in retail games.
2020-04-14 17:58:53 -03:00
bunnei
fc35803f91 file_sys: patch_manager: Return early when there are no layers to apply. 2020-04-14 16:25:55 -04:00
bunnei
598740f1dd service: friend: Stub IFriendService::GetBlockedUserListIds.
- This is safe to stub, as there should be no adverse consequences from reporting no blocked users.
2020-04-14 16:20:51 -04:00
ReinUsesLisp
37e5c4fa7c vk_rasterizer: Default to 1 viewports with a size of 0
Silence validation layer errors.
2020-04-14 04:44:34 -03:00
ReinUsesLisp
453d7419d9 gl_shader_cache: Use CompileDepth::FullDecompile on GLSL
From my testing on a Splatoon 2 shader that takes 3800ms on average to
compile changing to FullDecompile reduces it to 900ms on average.

The shader decoder will automatically fallback to a more naive method if
it can't use full decompile.
2020-04-14 01:34:20 -03:00
ReinUsesLisp
21dc842171 gl_texture_cache: Fix layered texture attachment base level
The base level is already included in the texture view. If we specify
the base level in the texture again, this will end up in the incorrect
level and potentially out of bounds.
2020-04-13 18:24:56 -03:00
ReinUsesLisp
3185245845 shader/memory: Implement RED.E.ADD
Implements a reduction operation. It's an atomic operation that doesn't
return a value.

This commit introduces another primitive because some shading languages
might have a primitive for reduction operations.
2020-04-06 02:24:47 -03:00
ReinUsesLisp
fd0a2b5151 shader/memory: Add "using std::move" 2020-04-06 02:18:14 -03:00
ReinUsesLisp
79970c9174 shader/memory: Minor fixes in ATOM 2020-04-06 00:54:22 -03:00
32 changed files with 286 additions and 482 deletions

View File

@@ -53,6 +53,7 @@ if (MSVC)
else()
add_compile_options(
-Wall
-Werror=reorder
-Wno-attributes
)

View File

@@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
if (ext_dir != nullptr)
layers_ext.push_back(std::move(ext_dir));
}
// When there are no layers to apply, return early as there is no need to rebuild the RomFS
if (layers.empty() && layers_ext.empty()) {
return;
}
layers.push_back(std::move(extracted));
auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));

View File

@@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
struct KernelCore::Impl {
explicit Impl(Core::System& system, KernelCore& kernel)
: system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}
: global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
void Initialize(KernelCore& kernel) {
Shutdown();

View File

@@ -27,7 +27,7 @@ public:
{10110, nullptr, "GetFriendProfileImage"},
{10200, nullptr, "SendFriendRequestForApplication"},
{10211, nullptr, "AddFacedFriendRequestForApplication"},
{10400, nullptr, "GetBlockedUserListIds"},
{10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
{10500, nullptr, "GetProfileList"},
{10600, nullptr, "DeclareOpenOnlinePlaySession"},
{10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -121,6 +121,15 @@ private:
};
static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) {
// This is safe to stub, as there should be no adverse consequences from reporting no
// blocked users.
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(0); // Indicates there are no blocked users
}
void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
// Stub used by Splatoon 2
LOG_WARNING(Service_ACC, "(STUBBED) called");

View File

@@ -160,8 +160,6 @@ if (ENABLE_VULKAN)
renderer_vulkan/fixed_pipeline_state.h
renderer_vulkan/maxwell_to_vk.cpp
renderer_vulkan/maxwell_to_vk.h
renderer_vulkan/nsight_aftermath_tracker.cpp
renderer_vulkan/nsight_aftermath_tracker.h
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/vk_blit_screen.cpp
@@ -215,30 +213,19 @@ if (ENABLE_VULKAN)
renderer_vulkan/wrapper.cpp
renderer_vulkan/wrapper.h
)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad)
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
target_link_libraries(video_core PRIVATE sirit)
endif()
if (ENABLE_NSIGHT_AFTERMATH)
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
endif()
if (NOT WIN32)
message(ERROR "Nsight Aftermath doesn't support non-Windows platforms")
endif()
target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH)
target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include")
endif()
if (MSVC)
target_compile_options(video_core PRIVATE /we4267)
else()

View File

@@ -303,6 +303,10 @@ public:
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
}
bool IsConstant() const {
return constant;
}
bool IsValid() const {
return size != Size::Invalid;
}

View File

@@ -1005,6 +1005,12 @@ union Instruction {
BitField<46, 2, u64> cache_mode;
} stg;
union {
BitField<23, 3, AtomicOp> operation;
BitField<48, 1, u64> extended;
BitField<20, 3, GlobalAtomicType> type;
} red;
union {
BitField<52, 4, AtomicOp> operation;
BitField<49, 3, GlobalAtomicType> type;
@@ -1787,6 +1793,7 @@ public:
ST_S,
ST, // Store in generic memory
STG, // Store in global memory
RED, // Reduction operation
ATOM, // Atomic operation on global memory
ATOMS, // Atomic operation on shared memory
AL2P, // Transforms attribute memory into physical memory
@@ -1871,7 +1878,8 @@ public:
ICMP_R,
ICMP_CR,
ICMP_IMM,
FCMP_R,
FCMP_RR,
FCMP_RC,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -2096,6 +2104,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110101111111---", Id::RED, Type::Memory, "RED"),
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
@@ -2179,7 +2188,8 @@ private:
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),

View File

@@ -12,8 +12,9 @@ namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
: GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
: GPU(system, std::move(renderer_), true), gpu_thread{system},
cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
gpu_context(std::move(context)) {}
GPUAsynch::~GPUAsynch() = default;

View File

@@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() {
const auto attrib = gpu.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Ignore invalid attributes.
if (!attrib.IsValid()) {
// Disable constant attributes.
if (attrib.IsConstant()) {
glDisableVertexAttribArray(gl_index);
continue;
}

View File

@@ -34,6 +34,8 @@
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::CompileDepth;
using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
@@ -43,7 +45,7 @@ namespace {
constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {

View File

@@ -1821,13 +1821,15 @@ private:
Expression HMergeH0(Operation operation) {
const std::string dest = VisitOperand(operation, 0).AsUint();
const std::string src = VisitOperand(operation, 1).AsUint();
return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint};
return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
Type::HalfFloat};
}
Expression HMergeH1(Operation operation) {
const std::string dest = VisitOperand(operation, 0).AsUint();
const std::string src = VisitOperand(operation, 1).AsUint();
return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint};
return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
Type::HalfFloat};
}
Expression HPack2(Operation operation) {
@@ -2117,8 +2119,14 @@ private:
return {};
}
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
Visit(operation[1]).As(type)),
type};
Visit(operation[1]).AsUint()),
Type::Uint};
}
template <const std::string_view& opname, Type type>
Expression Reduce(Operation operation) {
code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
return {};
}
Expression Branch(Operation operation) {
@@ -2477,6 +2485,20 @@ private:
&GLSLDecompiler::Atomic<Func::Or, Type::Int>,
&GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
&GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
&GLSLDecompiler::Reduce<Func::And, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Add, Type::Int>,
&GLSLDecompiler::Reduce<Func::Min, Type::Int>,
&GLSLDecompiler::Reduce<Func::Max, Type::Int>,
&GLSLDecompiler::Reduce<Func::And, Type::Int>,
&GLSLDecompiler::Reduce<Func::Or, Type::Int>,
&GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack,

View File

@@ -417,7 +417,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
switch (params.target) {
case SurfaceTarget::Texture2DArray:
glFramebufferTexture(target, attachment, GetTexture(), params.base_level);
glFramebufferTexture(target, attachment, GetTexture(), 0);
break;
default:
UNIMPLEMENTED();

View File

@@ -315,8 +315,8 @@ public:
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
Core::Frontend::GraphicsContext& context)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default;

View File

@@ -360,6 +360,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -370,6 +371,14 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R8G8B8_UINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return VK_FORMAT_R8G8B8A8_UINT;
case Maxwell::VertexAttribute::Size::Size_16:
return VK_FORMAT_R16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
return VK_FORMAT_R16G16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
return VK_FORMAT_R16G16B16_UINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return VK_FORMAT_R16G16B16A16_UINT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -381,6 +390,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
default:
break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:

View File

@@ -1,220 +0,0 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#ifdef HAS_NSIGHT_AFTERMATH
#include <mutex>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include <fmt/format.h>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#include <GFSDK_Aftermath.h>
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
namespace Vulkan {
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
NsightAftermathTracker::NsightAftermathTracker() = default;
NsightAftermathTracker::~NsightAftermathTracker() {
if (initialized) {
(void)GFSDK_Aftermath_DisableGpuCrashDumps();
}
}
bool NsightAftermathTracker::Initialize() {
if (!dl.Open(AFTERMATH_LIB_NAME)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
return false;
}
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
&GFSDK_Aftermath_EnableGpuCrashDumps) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
&GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
&GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
&GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
&GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
return false;
}
dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
(void)FileUtil::DeleteDirRecursively(dump_dir);
if (!FileUtil::CreateDir(dump_dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
return false;
}
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
return false;
}
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
initialized = true;
return true;
}
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
if (!initialized) {
return;
}
std::vector<u32> spirv_copy = spirv;
GFSDK_Aftermath_SpirvCode shader;
shader.pData = spirv_copy.data();
shader.size = static_cast<u32>(spirv_copy.size() * 4);
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderHash hash;
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
return;
}
FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
return;
}
if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
return;
}
}
void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size) {
std::scoped_lock lock{mutex};
LOG_CRITICAL(Render_Vulkan, "called");
GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
LOG_ERROR(Render_Vulkan, "Failed to create decoder");
return;
}
SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
u32 json_size = 0;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
this, &json_size))) {
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
return;
}
std::vector<char> json(json_size);
if (!GFSDK_Aftermath_SUCCEED(
GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
LOG_ERROR(Render_Vulkan, "Failed to query JSON");
return;
}
const std::string base_name = [this] {
const int id = dump_id++;
if (id == 0) {
return fmt::format("{}/crash.nv-gpudmp", dump_dir);
} else {
return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
}
}();
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
return;
}
const std::string_view json_view(json.data(), json.size());
if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
return;
}
}
void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size) {
std::scoped_lock lock{mutex};
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
return;
}
const std::string path =
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
FileUtil::IOFile file(path, "wb");
if (!file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
return;
}
if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
shader_debug_info_size) {
LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
return;
}
}
void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
}
void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
u32 gpu_crash_dump_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
gpu_crash_dump_size);
}
void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
u32 shader_debug_info_size, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
shader_debug_info, shader_debug_info_size);
}
void NsightAftermathTracker::CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
add_description);
}
} // namespace Vulkan
#endif // HAS_NSIGHT_AFTERMATH

View File

@@ -1,87 +0,0 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <string>
#include <vector>
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
#ifdef HAS_NSIGHT_AFTERMATH
#include <GFSDK_Aftermath_Defines.h>
#include <GFSDK_Aftermath_GpuCrashDump.h>
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
#endif
#include "common/common_types.h"
#include "common/dynamic_library.h"
namespace Vulkan {
class NsightAftermathTracker {
public:
NsightAftermathTracker();
~NsightAftermathTracker();
NsightAftermathTracker(const NsightAftermathTracker&) = delete;
NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
// Delete move semantics because Aftermath initialization uses a pointer to this.
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
bool Initialize();
void SaveShader(const std::vector<u32>& spirv) const;
private:
#ifdef HAS_NSIGHT_AFTERMATH
static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
void* user_data);
static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
void* user_data);
static void CrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
void OnCrashDumpDescriptionCallback(
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
mutable std::mutex mutex;
std::string dump_dir;
int dump_id = 0;
bool initialized = false;
Common::DynamicLibrary dl;
PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
#endif
};
#ifndef HAS_NSIGHT_AFTERMATH
inline NsightAftermathTracker::NsightAftermathTracker() = default;
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
inline bool NsightAftermathTracker::Initialize() {
return false;
}
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
#endif
} // namespace Vulkan

View File

@@ -535,7 +535,9 @@ void VKBlitScreen::CreateGraphicsPipeline() {
viewport_state_ci.pNext = nullptr;
viewport_state_ci.flags = 0;
viewport_state_ci.viewportCount = 1;
viewport_state_ci.pViewports = nullptr;
viewport_state_ci.scissorCount = 1;
viewport_state_ci.pScissors = nullptr;
VkPipelineRasterizationStateCreateInfo rasterization_ci;
rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;

View File

@@ -105,8 +105,6 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
}
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
device.SaveShader(code);
VkShaderModuleCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
ci.pNext = nullptr;

View File

@@ -9,7 +9,6 @@
#include <string_view>
#include <thread>
#include <unordered_set>
#include <utility>
#include <vector>
#include "common/assert.h"
@@ -168,7 +167,6 @@ bool VKDevice::Create() {
VkPhysicalDeviceFeatures2 features2;
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features2.pNext = nullptr;
const void* first_next = &features2;
void** next = &features2.pNext;
auto& features = features2.features;
@@ -298,19 +296,7 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
if (nv_device_diagnostics_config) {
nsight_aftermath_tracker.Initialize();
diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
diagnostics_nv.pNext = &features2;
diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
first_next = &diagnostics_nv;
}
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld);
if (!logical) {
LOG_ERROR(Render_Vulkan, "Failed to create logical device");
return false;
@@ -358,12 +344,17 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
// Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{3});
}
// Wait some time to let the log flush
std::this_thread::sleep_for(std::chrono::seconds{1});
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
nsight_aftermath_tracker.SaveShader(spirv);
if (!nv_device_diagnostic_checkpoints) {
return;
}
[[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld);
// Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
// executed. It can be done on a debugger by evaluating the expression:
// *(VKGraphicsPipeline*)data[0]
}
bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
@@ -536,8 +527,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostics_config,
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
Test(extension, nv_device_diagnostic_checkpoints,
VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
}
}

View File

@@ -10,7 +10,6 @@
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
@@ -44,9 +43,6 @@ public:
/// Reports a device loss.
void ReportLoss() const;
/// Reports a shader to Nsight Aftermath.
void SaveShader(const std::vector<u32>& spirv) const;
/// Returns the dispatch loader with direct function pointers of the device.
const vk::DeviceDispatch& GetDispatchLoader() const {
return dld;
@@ -177,6 +173,11 @@ public:
return ext_transform_feedback;
}
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
}
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -232,7 +233,7 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
@@ -240,9 +241,6 @@ private:
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
/// Nsight Aftermath GPU crash tracker
NsightAftermathTracker nsight_aftermath_tracker;
};
} // namespace Vulkan

View File

@@ -147,8 +147,6 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
continue;
}
device.SaveShader(stage->code);
ci.codeSize = stage->code.size() * sizeof(u32);
ci.pCode = stage->code.data();
modules.push_back(device.GetLogical().CreateShaderModule(ci));

View File

@@ -113,19 +113,8 @@ u64 HostCounter::BlockingQuery() const {
if (ticks >= cache.Scheduler().Ticks()) {
cache.Scheduler().Flush();
}
u64 data;
const VkResult result = cache.Device().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
switch (result) {
case VK_SUCCESS:
return data;
case VK_ERROR_DEVICE_LOST:
cache.Device().ReportLoss();
[[fallthrough]];
default:
throw vk::Exception(result);
}
return cache.Device().GetLogical().GetQueryResult<u64>(
query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
}
} // namespace Vulkan

View File

@@ -62,13 +62,16 @@ constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::Sha
VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
const auto& src = regs.viewport_transform[index];
const float width = src.scale_x * 2.0f;
const float height = src.scale_y * 2.0f;
VkViewport viewport;
viewport.x = src.translate_x - src.scale_x;
viewport.y = src.translate_y - src.scale_y;
viewport.width = src.scale_x * 2.0f;
viewport.height = src.scale_y * 2.0f;
viewport.width = width != 0.0f ? width : 1.0f;
viewport.height = height != 0.0f ? height : 1.0f;
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
viewport.maxDepth = src.translate_z + src.scale_z;
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
@@ -347,6 +350,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_bindings.Bind(scheduler);
if (device.IsNvDeviceDiagnosticCheckpoints()) {
scheduler.Record(
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); });
}
BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout();
@@ -473,6 +481,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
if (device.IsNvDeviceDiagnosticCheckpoints()) {
scheduler.Record(
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); });
}
scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
layout = pipeline.GetLayout(),

View File

@@ -166,15 +166,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
submit_info.pCommandBuffers = current_cmdbuf.address();
submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
submit_info.pSignalSemaphores = &semaphore;
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
}
device.GetGraphicsQueue().Submit(submit_info, *current_fence);
}
void VKScheduler::AllocateNewContext() {

View File

@@ -1938,11 +1938,8 @@ private:
return {};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
Type value_type = result_type>
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
Expression Atomic(Operation operation) {
const Id type_def = GetTypeDefinition(result_type);
Id pointer;
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
pointer = GetSharedMemoryPointer(*smem);
@@ -1950,15 +1947,19 @@ private:
pointer = GetGlobalMemoryPointer(*gmem);
} else {
UNREACHABLE();
return {Constant(type_def, 0), result_type};
return {v_float_zero, Type::Float};
}
const Id value = As(Visit(operation[1]), value_type);
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = Constant(type_def, 0);
const Id semantics = Constant(t_uint, 0);
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
Expression Reduce(Operation operation) {
Atomic<func>(operation);
return {};
}
Expression Branch(Operation operation) {
@@ -2547,21 +2548,35 @@ private:
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,

View File

@@ -61,6 +61,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdPipelineBarrier);
X(vkCmdPushConstants);
X(vkCmdSetBlendConstants);
X(vkCmdSetCheckpointNV);
X(vkCmdSetDepthBias);
X(vkCmdSetDepthBounds);
X(vkCmdSetScissor);
@@ -115,6 +116,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults);
X(vkGetQueueCheckpointDataNV);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
@@ -407,6 +409,17 @@ DebugCallback Instance::TryCreateDebugCallback(
return DebugCallback(messenger, handle, *dld);
}
std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const {
if (!dld.vkGetQueueCheckpointDataNV) {
return {};
}
u32 num;
dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr);
std::vector<VkCheckpointDataNV> checkpoints(num);
dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data());
return checkpoints;
}
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
}
@@ -456,11 +469,12 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
}
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
Span<const char*> enabled_extensions,
const VkPhysicalDeviceFeatures2& enabled_features,
DeviceDispatch& dld) noexcept {
VkDeviceCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
ci.pNext = next;
ci.pNext = &enabled_features;
ci.flags = 0;
ci.queueCreateInfoCount = queues_ci.size();
ci.pQueueCreateInfos = queues_ci.data();

View File

@@ -197,6 +197,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
PFN_vkCmdPushConstants vkCmdPushConstants;
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV;
PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
PFN_vkCmdSetScissor vkCmdSetScissor;
@@ -251,6 +252,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkGetFenceStatus vkGetFenceStatus;
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV;
PFN_vkMapMemory vkMapMemory;
PFN_vkQueueSubmit vkQueueSubmit;
PFN_vkResetFences vkResetFences;
@@ -565,8 +567,12 @@ public:
/// Construct a queue handle.
constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
/// Returns the checkpoint data.
/// @note Returns an empty vector when the function pointer is not present.
std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const;
void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const {
Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence));
}
VkResult Present(const VkPresentInfoKHR& present_info) const noexcept {
@@ -653,7 +659,8 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
public:
static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
Span<const char*> enabled_extensions, const void* next,
Span<const char*> enabled_extensions,
const VkPhysicalDeviceFeatures2& enabled_features,
DeviceDispatch& dld) noexcept;
Queue GetQueue(u32 family_index) const noexcept;
@@ -727,11 +734,18 @@ public:
dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
}
VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
noexcept {
return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
flags);
void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
void* data, VkDeviceSize stride, VkQueryResultFlags flags) const {
Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
flags));
}
template <typename T>
T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const {
static_assert(std::is_trivially_copyable_v<T>);
T value;
GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags);
return value;
}
};
@@ -906,6 +920,10 @@ public:
dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
}
void SetCheckpointNV(const void* checkpoint_marker) const noexcept {
dld->vkCmdSetCheckpointNV(handle, checkpoint_marker);
}
void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
}

View File

@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FCMP_R: {
case OpCode::Id::FCMP_RR:
case OpCode::Id::FCMP_RC: {
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
Node op_c = GetRegister(instr.gpr39);
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));

View File

@@ -119,7 +119,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
}
break;
}
UNIMPLEMENTED_MSG("texture format not implement={}", format);
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return ComponentType::FLOAT;
}
@@ -201,10 +201,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 0;
case TextureFormat::G24R8:
if (component == 0) {
return 8;
return 24;
}
if (component == 1) {
return 24;
return 8;
}
return 0;
case TextureFormat::G8R8:
@@ -212,7 +212,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
default:
UNIMPLEMENTED_MSG("texture format not implement={}", format);
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return 0;
}
}
@@ -249,7 +249,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R1:
return std::size_t{R};
default:
UNIMPLEMENTED_MSG("texture format not implement={}", format);
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return std::size_t{R | G | B | A};
}
}

View File

@@ -3,7 +3,9 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <utility>
#include <vector>
#include <fmt/format.h>
#include "common/alignment.h"
@@ -16,6 +18,7 @@
namespace VideoCommon::Shader {
using std::move;
using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
@@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;
namespace {
Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
const OperationCode operation_code = [op] {
switch (op) {
case AtomicOp::Add:
return OperationCode::AtomicIAdd;
case AtomicOp::Min:
return OperationCode::AtomicIMin;
case AtomicOp::Max:
return OperationCode::AtomicIMax;
case AtomicOp::And:
return OperationCode::AtomicIAnd;
case AtomicOp::Or:
return OperationCode::AtomicIOr;
case AtomicOp::Xor:
return OperationCode::AtomicIXor;
case AtomicOp::Exch:
return OperationCode::AtomicIExchange;
default:
UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
return OperationCode::AtomicIAdd;
}
}();
return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
OperationCode GetAtomOperation(AtomicOp op) {
switch (op) {
case AtomicOp::Add:
return OperationCode::AtomicIAdd;
case AtomicOp::Min:
return OperationCode::AtomicIMin;
case AtomicOp::Max:
return OperationCode::AtomicIMax;
case AtomicOp::And:
return OperationCode::AtomicIAnd;
case AtomicOp::Or:
return OperationCode::AtomicIOr;
case AtomicOp::Xor:
return OperationCode::AtomicIXor;
case AtomicOp::Exch:
return OperationCode::AtomicIExchange;
default:
UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
return OperationCode::AtomicIAdd;
}
}
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
@@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
Immediate(size));
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
}
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
std::move(offset), Immediate(size));
Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
Immediate(size));
}
Node Sign16Extend(Node value) {
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
}
} // Anonymous namespace
@@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
if (IsUnaligned(type)) {
const u32 mask = GetUnalignedMask(type);
value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
value = InsertUnaligned(gmem, move(value), real_address, mask, size);
}
bb.push_back(Operation(OperationCode::Assign, gmem, value));
}
break;
}
case OpCode::Id::RED: {
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
// Tracking failed, skip atomic.
break;
}
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetRegister(instr.gpr0);
bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
break;
}
case OpCode::Id::ATOM: {
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
instr.atom.operation == AtomicOp::Dec ||
instr.atom.operation == AtomicOp::SafeAdd,
"operation={}", static_cast<int>(instr.atom.operation.Value()));
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
instr.atom.type == GlobalAtomicType::U64,
instr.atom.type == GlobalAtomicType::U64 ||
instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
"type={}", static_cast<int>(instr.atom.type.Value()));
const auto [real_address, base_address, descriptor] =
@@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
const bool is_signed =
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
GetRegister(instr.gpr20));
SetRegister(bb, instr.gpr0, std::move(value));
SetRegister(bb, instr.gpr0,
SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
GetRegister(instr.gpr20)));
break;
}
case OpCode::Id::ATOMS: {
@@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
const s32 offset = instr.atoms.GetImmediateOffset();
Node address = GetRegister(instr.gpr8);
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
Node value =
GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
SetRegister(bb, instr.gpr0, std::move(value));
address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
SetRegister(bb, instr.gpr0,
SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
break;
}
case OpCode::Id::AL2P: {

View File

@@ -178,6 +178,20 @@ enum class OperationCode {
AtomicIOr, /// (memory, int) -> int
AtomicIXor, /// (memory, int) -> int
ReduceUAdd, /// (memory, uint) -> void
ReduceUMin, /// (memory, uint) -> void
ReduceUMax, /// (memory, uint) -> void
ReduceUAnd, /// (memory, uint) -> void
ReduceUOr, /// (memory, uint) -> void
ReduceUXor, /// (memory, uint) -> void
ReduceIAdd, /// (memory, int) -> void
ReduceIMin, /// (memory, int) -> void
ReduceIMax, /// (memory, int) -> void
ReduceIAnd, /// (memory, int) -> void
ReduceIOr, /// (memory, int) -> void
ReduceIXor, /// (memory, int) -> void
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void

View File

@@ -509,7 +509,9 @@ private:
}
const auto& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type) {
BufferCopy(current_surface, new_surface);
if (Settings::values.use_accurate_gpu_emulation) {
BufferCopy(current_surface, new_surface);
}
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
for (auto& brick : bricks) {