Compare commits
30 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24620bc4ea | ||
|
|
b178c9a349 | ||
|
|
e33196d4e7 | ||
|
|
4398bdb4c7 | ||
|
|
213fff67bc | ||
|
|
64b5985f0a | ||
|
|
9208d555b7 | ||
|
|
ab72696beb | ||
|
|
4878d6bb49 | ||
|
|
50c0a92db8 | ||
|
|
13331a3a32 | ||
|
|
3a759d2352 | ||
|
|
3036067047 | ||
|
|
b4e43c64c8 | ||
|
|
0ca456830f | ||
|
|
0b132e8cc1 | ||
|
|
daddbeffd1 | ||
|
|
fd6371eba7 | ||
|
|
fefe7f18f9 | ||
|
|
e366b4ee1f | ||
|
|
8040f6d544 | ||
|
|
6dfcabc800 | ||
|
|
fc35803f91 | ||
|
|
598740f1dd | ||
|
|
37e5c4fa7c | ||
|
|
453d7419d9 | ||
|
|
21dc842171 | ||
|
|
3185245845 | ||
|
|
fd0a2b5151 | ||
|
|
79970c9174 |
@@ -53,6 +53,7 @@ if (MSVC)
|
||||
else()
|
||||
add_compile_options(
|
||||
-Wall
|
||||
-Werror=reorder
|
||||
-Wno-attributes
|
||||
)
|
||||
|
||||
|
||||
@@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
|
||||
if (ext_dir != nullptr)
|
||||
layers_ext.push_back(std::move(ext_dir));
|
||||
}
|
||||
|
||||
// When there are no layers to apply, return early as there is no need to rebuild the RomFS
|
||||
if (layers.empty() && layers_ext.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
layers.push_back(std::move(extracted));
|
||||
|
||||
auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));
|
||||
|
||||
@@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
|
||||
|
||||
struct KernelCore::Impl {
|
||||
explicit Impl(Core::System& system, KernelCore& kernel)
|
||||
: system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}
|
||||
: global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
|
||||
|
||||
void Initialize(KernelCore& kernel) {
|
||||
Shutdown();
|
||||
|
||||
@@ -27,7 +27,7 @@ public:
|
||||
{10110, nullptr, "GetFriendProfileImage"},
|
||||
{10200, nullptr, "SendFriendRequestForApplication"},
|
||||
{10211, nullptr, "AddFacedFriendRequestForApplication"},
|
||||
{10400, nullptr, "GetBlockedUserListIds"},
|
||||
{10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
|
||||
{10500, nullptr, "GetProfileList"},
|
||||
{10600, nullptr, "DeclareOpenOnlinePlaySession"},
|
||||
{10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
|
||||
@@ -121,6 +121,15 @@ private:
|
||||
};
|
||||
static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
|
||||
|
||||
void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) {
|
||||
// This is safe to stub, as there should be no adverse consequences from reporting no
|
||||
// blocked users.
|
||||
LOG_WARNING(Service_ACC, "(STUBBED) called");
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u32>(0); // Indicates there are no blocked users
|
||||
}
|
||||
|
||||
void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
|
||||
// Stub used by Splatoon 2
|
||||
LOG_WARNING(Service_ACC, "(STUBBED) called");
|
||||
|
||||
@@ -160,8 +160,6 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/fixed_pipeline_state.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/nsight_aftermath_tracker.cpp
|
||||
renderer_vulkan/nsight_aftermath_tracker.h
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/vk_blit_screen.cpp
|
||||
@@ -215,30 +213,19 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/wrapper.cpp
|
||||
renderer_vulkan/wrapper.h
|
||||
)
|
||||
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad)
|
||||
|
||||
if (ENABLE_VULKAN)
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
target_link_libraries(video_core PRIVATE sirit)
|
||||
endif()
|
||||
|
||||
if (ENABLE_NSIGHT_AFTERMATH)
|
||||
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
|
||||
message(ERROR "Environment variable NSIGHT_AFTERMATH_SDK has to be provided")
|
||||
endif()
|
||||
if (NOT WIN32)
|
||||
message(ERROR "Nsight Aftermath doesn't support non-Windows platforms")
|
||||
endif()
|
||||
target_compile_definitions(video_core PRIVATE HAS_NSIGHT_AFTERMATH)
|
||||
target_include_directories(video_core PRIVATE "$ENV{NSIGHT_AFTERMATH_SDK}/include")
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
target_compile_options(video_core PRIVATE /we4267)
|
||||
else()
|
||||
|
||||
@@ -303,6 +303,10 @@ public:
|
||||
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
|
||||
}
|
||||
|
||||
bool IsConstant() const {
|
||||
return constant;
|
||||
}
|
||||
|
||||
bool IsValid() const {
|
||||
return size != Size::Invalid;
|
||||
}
|
||||
|
||||
@@ -1005,6 +1005,12 @@ union Instruction {
|
||||
BitField<46, 2, u64> cache_mode;
|
||||
} stg;
|
||||
|
||||
union {
|
||||
BitField<23, 3, AtomicOp> operation;
|
||||
BitField<48, 1, u64> extended;
|
||||
BitField<20, 3, GlobalAtomicType> type;
|
||||
} red;
|
||||
|
||||
union {
|
||||
BitField<52, 4, AtomicOp> operation;
|
||||
BitField<49, 3, GlobalAtomicType> type;
|
||||
@@ -1787,6 +1793,7 @@ public:
|
||||
ST_S,
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
RED, // Reduction operation
|
||||
ATOM, // Atomic operation on global memory
|
||||
ATOMS, // Atomic operation on shared memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
@@ -1871,7 +1878,8 @@ public:
|
||||
ICMP_R,
|
||||
ICMP_CR,
|
||||
ICMP_IMM,
|
||||
FCMP_R,
|
||||
FCMP_RR,
|
||||
FCMP_RC,
|
||||
MUFU, // Multi-Function Operator
|
||||
RRO_C, // Range Reduction Operator
|
||||
RRO_R,
|
||||
@@ -2096,6 +2104,7 @@ private:
|
||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("1110101111111---", Id::RED, Type::Memory, "RED"),
|
||||
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||
@@ -2179,7 +2188,8 @@ private:
|
||||
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
|
||||
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
|
||||
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
|
||||
INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
|
||||
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
|
||||
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
|
||||
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
|
||||
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
|
||||
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
|
||||
|
||||
@@ -12,8 +12,9 @@ namespace VideoCommon {
|
||||
|
||||
GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
|
||||
: GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
|
||||
cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
|
||||
: GPU(system, std::move(renderer_), true), gpu_thread{system},
|
||||
cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
|
||||
gpu_context(std::move(context)) {}
|
||||
|
||||
GPUAsynch::~GPUAsynch() = default;
|
||||
|
||||
|
||||
@@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() {
|
||||
const auto attrib = gpu.regs.vertex_attrib_format[index];
|
||||
const auto gl_index = static_cast<GLuint>(index);
|
||||
|
||||
// Ignore invalid attributes.
|
||||
if (!attrib.IsValid()) {
|
||||
// Disable constant attributes.
|
||||
if (attrib.IsConstant()) {
|
||||
glDisableVertexAttribArray(gl_index);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
namespace OpenGL {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using VideoCommon::Shader::CompileDepth;
|
||||
using VideoCommon::Shader::CompilerSettings;
|
||||
using VideoCommon::Shader::ProgramCode;
|
||||
using VideoCommon::Shader::Registry;
|
||||
using VideoCommon::Shader::ShaderIR;
|
||||
@@ -43,7 +45,7 @@ namespace {
|
||||
constexpr u32 STAGE_MAIN_OFFSET = 10;
|
||||
constexpr u32 KERNEL_MAIN_OFFSET = 0;
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
|
||||
constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};
|
||||
|
||||
/// Gets the address for the specified shader stage program
|
||||
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
|
||||
|
||||
@@ -1821,13 +1821,15 @@ private:
|
||||
Expression HMergeH0(Operation operation) {
|
||||
const std::string dest = VisitOperand(operation, 0).AsUint();
|
||||
const std::string src = VisitOperand(operation, 1).AsUint();
|
||||
return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint};
|
||||
return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
|
||||
Type::HalfFloat};
|
||||
}
|
||||
|
||||
Expression HMergeH1(Operation operation) {
|
||||
const std::string dest = VisitOperand(operation, 0).AsUint();
|
||||
const std::string src = VisitOperand(operation, 1).AsUint();
|
||||
return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint};
|
||||
return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
|
||||
Type::HalfFloat};
|
||||
}
|
||||
|
||||
Expression HPack2(Operation operation) {
|
||||
@@ -2117,8 +2119,14 @@ private:
|
||||
return {};
|
||||
}
|
||||
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
||||
Visit(operation[1]).As(type)),
|
||||
type};
|
||||
Visit(operation[1]).AsUint()),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
template <const std::string_view& opname, Type type>
|
||||
Expression Reduce(Operation operation) {
|
||||
code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression Branch(Operation operation) {
|
||||
@@ -2477,6 +2485,20 @@ private:
|
||||
&GLSLDecompiler::Atomic<Func::Or, Type::Int>,
|
||||
&GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
|
||||
|
||||
&GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
|
||||
&GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
|
||||
&GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
|
||||
&GLSLDecompiler::Reduce<Func::And, Type::Uint>,
|
||||
&GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
|
||||
&GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
|
||||
|
||||
&GLSLDecompiler::Reduce<Func::Add, Type::Int>,
|
||||
&GLSLDecompiler::Reduce<Func::Min, Type::Int>,
|
||||
&GLSLDecompiler::Reduce<Func::Max, Type::Int>,
|
||||
&GLSLDecompiler::Reduce<Func::And, Type::Int>,
|
||||
&GLSLDecompiler::Reduce<Func::Or, Type::Int>,
|
||||
&GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
|
||||
|
||||
&GLSLDecompiler::Branch,
|
||||
&GLSLDecompiler::BranchIndirect,
|
||||
&GLSLDecompiler::PushFlowStack,
|
||||
|
||||
@@ -417,7 +417,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
glFramebufferTexture(target, attachment, GetTexture(), params.base_level);
|
||||
glFramebufferTexture(target, attachment, GetTexture(), 0);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
|
||||
@@ -315,8 +315,8 @@ public:
|
||||
|
||||
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
|
||||
Core::Frontend::GraphicsContext& context)
|
||||
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
|
||||
frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
|
||||
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
|
||||
has_debug_tool{HasDebugTool()} {}
|
||||
|
||||
RendererOpenGL::~RendererOpenGL() = default;
|
||||
|
||||
|
||||
@@ -360,6 +360,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedInt:
|
||||
switch (size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
@@ -370,6 +371,14 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
|
||||
return VK_FORMAT_R8G8B8_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
|
||||
return VK_FORMAT_R8G8B8A8_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
return VK_FORMAT_R16_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16:
|
||||
return VK_FORMAT_R16G16_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
return VK_FORMAT_R16G16B16_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return VK_FORMAT_R16G16B16A16_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_32:
|
||||
return VK_FORMAT_R32_UINT;
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32:
|
||||
@@ -381,6 +390,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||
switch (size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#define VK_NO_PROTOTYPES
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include <GFSDK_Aftermath.h>
|
||||
#include <GFSDK_Aftermath_Defines.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDump.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
|
||||
|
||||
#include "common/common_paths.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
|
||||
|
||||
NsightAftermathTracker::NsightAftermathTracker() = default;
|
||||
|
||||
NsightAftermathTracker::~NsightAftermathTracker() {
|
||||
if (initialized) {
|
||||
(void)GFSDK_Aftermath_DisableGpuCrashDumps();
|
||||
}
|
||||
}
|
||||
|
||||
bool NsightAftermathTracker::Initialize() {
|
||||
if (!dl.Open(AFTERMATH_LIB_NAME)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
|
||||
&GFSDK_Aftermath_DisableGpuCrashDumps) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
|
||||
&GFSDK_Aftermath_EnableGpuCrashDumps) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderDebugInfoIdentifier",
|
||||
&GFSDK_Aftermath_GetShaderDebugInfoIdentifier) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GetShaderHashSpirv", &GFSDK_Aftermath_GetShaderHashSpirv) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_CreateDecoder",
|
||||
&GFSDK_Aftermath_GpuCrashDump_CreateDecoder) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_DestroyDecoder",
|
||||
&GFSDK_Aftermath_GpuCrashDump_DestroyDecoder) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GenerateJSON",
|
||||
&GFSDK_Aftermath_GpuCrashDump_GenerateJSON) ||
|
||||
!dl.GetSymbol("GFSDK_Aftermath_GpuCrashDump_GetJSON",
|
||||
&GFSDK_Aftermath_GpuCrashDump_GetJSON)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
|
||||
return false;
|
||||
}
|
||||
|
||||
dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash";
|
||||
|
||||
(void)FileUtil::DeleteDirRecursively(dump_dir);
|
||||
if (!FileUtil::CreateDir(dump_dir)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
|
||||
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
|
||||
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
|
||||
ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
|
||||
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
|
||||
|
||||
initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<u32> spirv_copy = spirv;
|
||||
GFSDK_Aftermath_SpirvCode shader;
|
||||
shader.pData = spirv_copy.data();
|
||||
shader.size = static_cast<u32>(spirv_copy.size() * 4);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
GFSDK_Aftermath_ShaderHash hash;
|
||||
if (!GFSDK_Aftermath_SUCCEED(
|
||||
GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &hash))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to hash SPIR-V module");
|
||||
return;
|
||||
}
|
||||
|
||||
FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
|
||||
return;
|
||||
}
|
||||
if (file.WriteArray(spirv.data(), spirv.size()) != spirv.size()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump,
|
||||
u32 gpu_crash_dump_size) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
LOG_CRITICAL(Render_Vulkan, "called");
|
||||
|
||||
GFSDK_Aftermath_GpuCrashDump_Decoder decoder;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(
|
||||
GFSDK_Aftermath_Version_API, gpu_crash_dump, gpu_crash_dump_size, &decoder))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create decoder");
|
||||
return;
|
||||
}
|
||||
SCOPE_EXIT({ GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder); });
|
||||
|
||||
u32 json_size = 0;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
|
||||
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO,
|
||||
GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE, nullptr, nullptr, nullptr, nullptr,
|
||||
this, &json_size))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to generate JSON");
|
||||
return;
|
||||
}
|
||||
std::vector<char> json(json_size);
|
||||
if (!GFSDK_Aftermath_SUCCEED(
|
||||
GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, json_size, json.data()))) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query JSON");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string base_name = [this] {
|
||||
const int id = dump_id++;
|
||||
if (id == 0) {
|
||||
return fmt::format("{}/crash.nv-gpudmp", dump_dir);
|
||||
} else {
|
||||
return fmt::format("{}/crash_{}.nv-gpudmp", dump_dir, id);
|
||||
}
|
||||
}();
|
||||
|
||||
std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size);
|
||||
if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write dump file");
|
||||
return;
|
||||
}
|
||||
const std::string_view json_view(json.data(), json.size());
|
||||
if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write JSON");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_info,
|
||||
u32 shader_debug_info_size) {
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier;
|
||||
if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(
|
||||
GFSDK_Aftermath_Version_API, shader_debug_info, shader_debug_info_size, &identifier))) {
|
||||
LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_GetShaderDebugInfoIdentifier failed");
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string path =
|
||||
fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]);
|
||||
FileUtil::IOFile file(path, "wb");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create file {}", path);
|
||||
return;
|
||||
}
|
||||
if (file.WriteBytes(static_cast<const u8*>(shader_debug_info), shader_debug_info_size) !=
|
||||
shader_debug_info_size) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to write file {}", path);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::OnCrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description) {
|
||||
add_description(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, "yuzu");
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::GpuCrashDumpCallback(const void* gpu_crash_dump,
|
||||
u32 gpu_crash_dump_size, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnGpuCrashDumpCallback(gpu_crash_dump,
|
||||
gpu_crash_dump_size);
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::ShaderDebugInfoCallback(const void* shader_debug_info,
|
||||
u32 shader_debug_info_size, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnShaderDebugInfoCallback(
|
||||
shader_debug_info, shader_debug_info_size);
|
||||
}
|
||||
|
||||
void NsightAftermathTracker::CrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data) {
|
||||
static_cast<NsightAftermathTracker*>(user_data)->OnCrashDumpDescriptionCallback(
|
||||
add_description);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
#endif // HAS_NSIGHT_AFTERMATH
|
||||
@@ -1,87 +0,0 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define VK_NO_PROTOTYPES
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
#include <GFSDK_Aftermath_Defines.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDump.h>
|
||||
#include <GFSDK_Aftermath_GpuCrashDumpDecoding.h>
|
||||
#endif
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class NsightAftermathTracker {
|
||||
public:
|
||||
NsightAftermathTracker();
|
||||
~NsightAftermathTracker();
|
||||
|
||||
NsightAftermathTracker(const NsightAftermathTracker&) = delete;
|
||||
NsightAftermathTracker& operator=(const NsightAftermathTracker&) = delete;
|
||||
|
||||
// Delete move semantics because Aftermath initialization uses a pointer to this.
|
||||
NsightAftermathTracker(NsightAftermathTracker&&) = delete;
|
||||
NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
|
||||
|
||||
bool Initialize();
|
||||
|
||||
void SaveShader(const std::vector<u32>& spirv) const;
|
||||
|
||||
private:
|
||||
#ifdef HAS_NSIGHT_AFTERMATH
|
||||
static void GpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size,
|
||||
void* user_data);
|
||||
|
||||
static void ShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size,
|
||||
void* user_data);
|
||||
|
||||
static void CrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description, void* user_data);
|
||||
|
||||
void OnGpuCrashDumpCallback(const void* gpu_crash_dump, u32 gpu_crash_dump_size);
|
||||
|
||||
void OnShaderDebugInfoCallback(const void* shader_debug_info, u32 shader_debug_info_size);
|
||||
|
||||
void OnCrashDumpDescriptionCallback(
|
||||
PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription add_description);
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
||||
std::string dump_dir;
|
||||
int dump_id = 0;
|
||||
|
||||
bool initialized = false;
|
||||
|
||||
Common::DynamicLibrary dl;
|
||||
PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps;
|
||||
PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps;
|
||||
PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier;
|
||||
PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON;
|
||||
PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifndef HAS_NSIGHT_AFTERMATH
|
||||
inline NsightAftermathTracker::NsightAftermathTracker() = default;
|
||||
inline NsightAftermathTracker::~NsightAftermathTracker() = default;
|
||||
inline bool NsightAftermathTracker::Initialize() {
|
||||
return false;
|
||||
}
|
||||
inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
|
||||
#endif
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -535,7 +535,9 @@ void VKBlitScreen::CreateGraphicsPipeline() {
|
||||
viewport_state_ci.pNext = nullptr;
|
||||
viewport_state_ci.flags = 0;
|
||||
viewport_state_ci.viewportCount = 1;
|
||||
viewport_state_ci.pViewports = nullptr;
|
||||
viewport_state_ci.scissorCount = 1;
|
||||
viewport_state_ci.pScissors = nullptr;
|
||||
|
||||
VkPipelineRasterizationStateCreateInfo rasterization_ci;
|
||||
rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
|
||||
@@ -105,8 +105,6 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat
|
||||
}
|
||||
|
||||
vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
|
||||
device.SaveShader(code);
|
||||
|
||||
VkShaderModuleCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
ci.pNext = nullptr;
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
@@ -168,7 +167,6 @@ bool VKDevice::Create() {
|
||||
VkPhysicalDeviceFeatures2 features2;
|
||||
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
features2.pNext = nullptr;
|
||||
const void* first_next = &features2;
|
||||
void** next = &features2.pNext;
|
||||
|
||||
auto& features = features2.features;
|
||||
@@ -298,19 +296,7 @@ bool VKDevice::Create() {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||
}
|
||||
|
||||
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
|
||||
if (nv_device_diagnostics_config) {
|
||||
nsight_aftermath_tracker.Initialize();
|
||||
|
||||
diagnostics_nv.sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV;
|
||||
diagnostics_nv.pNext = &features2;
|
||||
diagnostics_nv.flags = VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV |
|
||||
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV |
|
||||
VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV;
|
||||
first_next = &diagnostics_nv;
|
||||
}
|
||||
|
||||
logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
|
||||
logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld);
|
||||
if (!logical) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create logical device");
|
||||
return false;
|
||||
@@ -358,12 +344,17 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
|
||||
void VKDevice::ReportLoss() const {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
|
||||
|
||||
// Wait for the log to flush and for Nsight Aftermath to dump the results
|
||||
std::this_thread::sleep_for(std::chrono::seconds{3});
|
||||
}
|
||||
// Wait some time to let the log flush
|
||||
std::this_thread::sleep_for(std::chrono::seconds{1});
|
||||
|
||||
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
|
||||
nsight_aftermath_tracker.SaveShader(spirv);
|
||||
if (!nv_device_diagnostic_checkpoints) {
|
||||
return;
|
||||
}
|
||||
|
||||
[[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld);
|
||||
// Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
|
||||
// executed. It can be done on a debugger by evaluating the expression:
|
||||
// *(VKGraphicsPipeline*)data[0]
|
||||
}
|
||||
|
||||
bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
|
||||
@@ -536,8 +527,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
|
||||
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
|
||||
false);
|
||||
if (Settings::values.renderer_debug) {
|
||||
Test(extension, nv_device_diagnostics_config,
|
||||
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
|
||||
Test(extension, nv_device_diagnostic_checkpoints,
|
||||
VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@@ -44,9 +43,6 @@ public:
|
||||
/// Reports a device loss.
|
||||
void ReportLoss() const;
|
||||
|
||||
/// Reports a shader to Nsight Aftermath.
|
||||
void SaveShader(const std::vector<u32>& spirv) const;
|
||||
|
||||
/// Returns the dispatch loader with direct function pointers of the device.
|
||||
const vk::DeviceDispatch& GetDispatchLoader() const {
|
||||
return dld;
|
||||
@@ -177,6 +173,11 @@ public:
|
||||
return ext_transform_feedback;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
|
||||
bool IsNvDeviceDiagnosticCheckpoints() const {
|
||||
return nv_device_diagnostic_checkpoints;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return vendor_name;
|
||||
@@ -232,7 +233,7 @@ private:
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
|
||||
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
|
||||
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
|
||||
|
||||
// Telemetry parameters
|
||||
std::string vendor_name; ///< Device's driver name.
|
||||
@@ -240,9 +241,6 @@ private:
|
||||
|
||||
/// Format properties dictionary.
|
||||
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
||||
|
||||
/// Nsight Aftermath GPU crash tracker
|
||||
NsightAftermathTracker nsight_aftermath_tracker;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -147,8 +147,6 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
|
||||
continue;
|
||||
}
|
||||
|
||||
device.SaveShader(stage->code);
|
||||
|
||||
ci.codeSize = stage->code.size() * sizeof(u32);
|
||||
ci.pCode = stage->code.data();
|
||||
modules.push_back(device.GetLogical().CreateShaderModule(ci));
|
||||
|
||||
@@ -113,19 +113,8 @@ u64 HostCounter::BlockingQuery() const {
|
||||
if (ticks >= cache.Scheduler().Ticks()) {
|
||||
cache.Scheduler().Flush();
|
||||
}
|
||||
u64 data;
|
||||
const VkResult result = cache.Device().GetLogical().GetQueryResults(
|
||||
query.first, query.second, 1, sizeof(data), &data, sizeof(data),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
switch (result) {
|
||||
case VK_SUCCESS:
|
||||
return data;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
cache.Device().ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
throw vk::Exception(result);
|
||||
}
|
||||
return cache.Device().GetLogical().GetQueryResult<u64>(
|
||||
query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -62,13 +62,16 @@ constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::Sha
|
||||
|
||||
VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
|
||||
const auto& src = regs.viewport_transform[index];
|
||||
const float width = src.scale_x * 2.0f;
|
||||
const float height = src.scale_y * 2.0f;
|
||||
|
||||
VkViewport viewport;
|
||||
viewport.x = src.translate_x - src.scale_x;
|
||||
viewport.y = src.translate_y - src.scale_y;
|
||||
viewport.width = src.scale_x * 2.0f;
|
||||
viewport.height = src.scale_y * 2.0f;
|
||||
viewport.width = width != 0.0f ? width : 1.0f;
|
||||
viewport.height = height != 0.0f ? height : 1.0f;
|
||||
|
||||
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
|
||||
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
|
||||
viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
|
||||
viewport.maxDepth = src.translate_z + src.scale_z;
|
||||
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
|
||||
@@ -347,6 +350,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
|
||||
buffer_bindings.Bind(scheduler);
|
||||
|
||||
if (device.IsNvDeviceDiagnosticCheckpoints()) {
|
||||
scheduler.Record(
|
||||
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); });
|
||||
}
|
||||
|
||||
BeginTransformFeedback();
|
||||
|
||||
const auto pipeline_layout = pipeline.GetLayout();
|
||||
@@ -473,6 +481,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||
|
||||
if (device.IsNvDeviceDiagnosticCheckpoints()) {
|
||||
scheduler.Record(
|
||||
[&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); });
|
||||
}
|
||||
|
||||
scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
|
||||
grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
|
||||
layout = pipeline.GetLayout(),
|
||||
|
||||
@@ -166,15 +166,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
|
||||
submit_info.pCommandBuffers = current_cmdbuf.address();
|
||||
submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
|
||||
submit_info.pSignalSemaphores = &semaphore;
|
||||
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
device.ReportLoss();
|
||||
[[fallthrough]];
|
||||
default:
|
||||
vk::Check(result);
|
||||
}
|
||||
device.GetGraphicsQueue().Submit(submit_info, *current_fence);
|
||||
}
|
||||
|
||||
void VKScheduler::AllocateNewContext() {
|
||||
|
||||
@@ -1938,11 +1938,8 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
|
||||
Type value_type = result_type>
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
Expression Atomic(Operation operation) {
|
||||
const Id type_def = GetTypeDefinition(result_type);
|
||||
|
||||
Id pointer;
|
||||
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||
pointer = GetSharedMemoryPointer(*smem);
|
||||
@@ -1950,15 +1947,19 @@ private:
|
||||
pointer = GetGlobalMemoryPointer(*gmem);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
return {Constant(type_def, 0), result_type};
|
||||
return {v_float_zero, Type::Float};
|
||||
}
|
||||
|
||||
const Id value = As(Visit(operation[1]), value_type);
|
||||
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = Constant(type_def, 0);
|
||||
const Id semantics = Constant(t_uint, 0);
|
||||
const Id value = AsUint(Visit(operation[1]));
|
||||
|
||||
return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
|
||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||
}
|
||||
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
Expression Reduce(Operation operation) {
|
||||
Atomic<func>(operation);
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression Branch(Operation operation) {
|
||||
@@ -2547,21 +2548,35 @@ private:
|
||||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
|
||||
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
|
||||
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
|
||||
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::BranchIndirect,
|
||||
|
||||
@@ -61,6 +61,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
X(vkCmdPipelineBarrier);
|
||||
X(vkCmdPushConstants);
|
||||
X(vkCmdSetBlendConstants);
|
||||
X(vkCmdSetCheckpointNV);
|
||||
X(vkCmdSetDepthBias);
|
||||
X(vkCmdSetDepthBounds);
|
||||
X(vkCmdSetScissor);
|
||||
@@ -115,6 +116,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
X(vkGetFenceStatus);
|
||||
X(vkGetImageMemoryRequirements);
|
||||
X(vkGetQueryPoolResults);
|
||||
X(vkGetQueueCheckpointDataNV);
|
||||
X(vkMapMemory);
|
||||
X(vkQueueSubmit);
|
||||
X(vkResetFences);
|
||||
@@ -407,6 +409,17 @@ DebugCallback Instance::TryCreateDebugCallback(
|
||||
return DebugCallback(messenger, handle, *dld);
|
||||
}
|
||||
|
||||
std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const {
|
||||
if (!dld.vkGetQueueCheckpointDataNV) {
|
||||
return {};
|
||||
}
|
||||
u32 num;
|
||||
dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr);
|
||||
std::vector<VkCheckpointDataNV> checkpoints(num);
|
||||
dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data());
|
||||
return checkpoints;
|
||||
}
|
||||
|
||||
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
|
||||
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
|
||||
}
|
||||
@@ -456,11 +469,12 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
|
||||
}
|
||||
|
||||
Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
|
||||
Span<const char*> enabled_extensions, const void* next,
|
||||
Span<const char*> enabled_extensions,
|
||||
const VkPhysicalDeviceFeatures2& enabled_features,
|
||||
DeviceDispatch& dld) noexcept {
|
||||
VkDeviceCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
ci.pNext = next;
|
||||
ci.pNext = &enabled_features;
|
||||
ci.flags = 0;
|
||||
ci.queueCreateInfoCount = queues_ci.size();
|
||||
ci.pQueueCreateInfos = queues_ci.data();
|
||||
|
||||
@@ -197,6 +197,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
|
||||
PFN_vkCmdPushConstants vkCmdPushConstants;
|
||||
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
|
||||
PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV;
|
||||
PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
|
||||
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
|
||||
PFN_vkCmdSetScissor vkCmdSetScissor;
|
||||
@@ -251,6 +252,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||
PFN_vkGetFenceStatus vkGetFenceStatus;
|
||||
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
|
||||
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
|
||||
PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV;
|
||||
PFN_vkMapMemory vkMapMemory;
|
||||
PFN_vkQueueSubmit vkQueueSubmit;
|
||||
PFN_vkResetFences vkResetFences;
|
||||
@@ -565,8 +567,12 @@ public:
|
||||
/// Construct a queue handle.
|
||||
constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
|
||||
|
||||
VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
|
||||
return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
|
||||
/// Returns the checkpoint data.
|
||||
/// @note Returns an empty vector when the function pointer is not present.
|
||||
std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const;
|
||||
|
||||
void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const {
|
||||
Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence));
|
||||
}
|
||||
|
||||
VkResult Present(const VkPresentInfoKHR& present_info) const noexcept {
|
||||
@@ -653,7 +659,8 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
|
||||
|
||||
public:
|
||||
static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
|
||||
Span<const char*> enabled_extensions, const void* next,
|
||||
Span<const char*> enabled_extensions,
|
||||
const VkPhysicalDeviceFeatures2& enabled_features,
|
||||
DeviceDispatch& dld) noexcept;
|
||||
|
||||
Queue GetQueue(u32 family_index) const noexcept;
|
||||
@@ -727,11 +734,18 @@ public:
|
||||
dld->vkResetQueryPoolEXT(handle, query_pool, first, count);
|
||||
}
|
||||
|
||||
VkResult GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
|
||||
void* data, VkDeviceSize stride, VkQueryResultFlags flags) const
|
||||
noexcept {
|
||||
return dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
|
||||
flags);
|
||||
void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size,
|
||||
void* data, VkDeviceSize stride, VkQueryResultFlags flags) const {
|
||||
Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride,
|
||||
flags));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const {
|
||||
static_assert(std::is_trivially_copyable_v<T>);
|
||||
T value;
|
||||
GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -906,6 +920,10 @@ public:
|
||||
dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
|
||||
}
|
||||
|
||||
void SetCheckpointNV(const void* checkpoint_marker) const noexcept {
|
||||
dld->vkCmdSetCheckpointNV(handle, checkpoint_marker);
|
||||
}
|
||||
|
||||
void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
|
||||
dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
|
||||
}
|
||||
|
||||
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FCMP_R: {
|
||||
case OpCode::Id::FCMP_RR:
|
||||
case OpCode::Id::FCMP_RC: {
|
||||
UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
|
||||
Node op_c = GetRegister(instr.gpr39);
|
||||
Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
|
||||
|
||||
@@ -119,7 +119,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
|
||||
}
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return ComponentType::FLOAT;
|
||||
}
|
||||
|
||||
@@ -201,10 +201,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
|
||||
return 0;
|
||||
case TextureFormat::G24R8:
|
||||
if (component == 0) {
|
||||
return 8;
|
||||
return 24;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
return 8;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::G8R8:
|
||||
@@ -212,7 +212,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
|
||||
case TextureFormat::G4R4:
|
||||
return (component == 0 || component == 1) ? 4 : 0;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -249,7 +249,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
|
||||
case TextureFormat::R1:
|
||||
return std::size_t{R};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
|
||||
return std::size_t{R | G | B | A};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
@@ -16,6 +18,7 @@
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::AtomicOp;
|
||||
using Tegra::Shader::AtomicType;
|
||||
using Tegra::Shader::Attribute;
|
||||
@@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;
|
||||
|
||||
namespace {
|
||||
|
||||
Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
|
||||
const OperationCode operation_code = [op] {
|
||||
switch (op) {
|
||||
case AtomicOp::Add:
|
||||
return OperationCode::AtomicIAdd;
|
||||
case AtomicOp::Min:
|
||||
return OperationCode::AtomicIMin;
|
||||
case AtomicOp::Max:
|
||||
return OperationCode::AtomicIMax;
|
||||
case AtomicOp::And:
|
||||
return OperationCode::AtomicIAnd;
|
||||
case AtomicOp::Or:
|
||||
return OperationCode::AtomicIOr;
|
||||
case AtomicOp::Xor:
|
||||
return OperationCode::AtomicIXor;
|
||||
case AtomicOp::Exch:
|
||||
return OperationCode::AtomicIExchange;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
|
||||
return OperationCode::AtomicIAdd;
|
||||
}
|
||||
}();
|
||||
return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
|
||||
OperationCode GetAtomOperation(AtomicOp op) {
|
||||
switch (op) {
|
||||
case AtomicOp::Add:
|
||||
return OperationCode::AtomicIAdd;
|
||||
case AtomicOp::Min:
|
||||
return OperationCode::AtomicIMin;
|
||||
case AtomicOp::Max:
|
||||
return OperationCode::AtomicIMax;
|
||||
case AtomicOp::And:
|
||||
return OperationCode::AtomicIAnd;
|
||||
case AtomicOp::Or:
|
||||
return OperationCode::AtomicIOr;
|
||||
case AtomicOp::Xor:
|
||||
return OperationCode::AtomicIXor;
|
||||
case AtomicOp::Exch:
|
||||
return OperationCode::AtomicIExchange;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
|
||||
return OperationCode::AtomicIAdd;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
|
||||
@@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
|
||||
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
|
||||
Immediate(size));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
|
||||
}
|
||||
|
||||
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
|
||||
std::move(offset), Immediate(size));
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
|
||||
return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
|
||||
Immediate(size));
|
||||
}
|
||||
|
||||
Node Sign16Extend(Node value) {
|
||||
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
|
||||
Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
|
||||
Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
|
||||
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
|
||||
return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
|
||||
return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
@@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
|
||||
if (IsUnaligned(type)) {
|
||||
const u32 mask = GetUnalignedMask(type);
|
||||
value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
|
||||
value = InsertUnaligned(gmem, move(value), real_address, mask, size);
|
||||
}
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::RED: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
|
||||
UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
// Tracking failed, skip atomic.
|
||||
break;
|
||||
}
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = GetRegister(instr.gpr0);
|
||||
bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
|
||||
instr.atom.operation == AtomicOp::Dec ||
|
||||
instr.atom.operation == AtomicOp::SafeAdd,
|
||||
"operation={}", static_cast<int>(instr.atom.operation.Value()));
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
|
||||
instr.atom.type == GlobalAtomicType::U64,
|
||||
instr.atom.type == GlobalAtomicType::U64 ||
|
||||
instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
|
||||
instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
|
||||
"type={}", static_cast<int>(instr.atom.type.Value()));
|
||||
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
@@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
|
||||
const bool is_signed =
|
||||
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
|
||||
instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
|
||||
GetRegister(instr.gpr20));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
SetRegister(bb, instr.gpr0,
|
||||
SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
|
||||
GetRegister(instr.gpr20)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOMS: {
|
||||
@@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
|
||||
const s32 offset = instr.atoms.GetImmediateOffset();
|
||||
Node address = GetRegister(instr.gpr8);
|
||||
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
|
||||
Node value =
|
||||
GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
|
||||
GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
|
||||
SetRegister(bb, instr.gpr0,
|
||||
SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
|
||||
GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::AL2P: {
|
||||
|
||||
@@ -178,6 +178,20 @@ enum class OperationCode {
|
||||
AtomicIOr, /// (memory, int) -> int
|
||||
AtomicIXor, /// (memory, int) -> int
|
||||
|
||||
ReduceUAdd, /// (memory, uint) -> void
|
||||
ReduceUMin, /// (memory, uint) -> void
|
||||
ReduceUMax, /// (memory, uint) -> void
|
||||
ReduceUAnd, /// (memory, uint) -> void
|
||||
ReduceUOr, /// (memory, uint) -> void
|
||||
ReduceUXor, /// (memory, uint) -> void
|
||||
|
||||
ReduceIAdd, /// (memory, int) -> void
|
||||
ReduceIMin, /// (memory, int) -> void
|
||||
ReduceIMax, /// (memory, int) -> void
|
||||
ReduceIAnd, /// (memory, int) -> void
|
||||
ReduceIOr, /// (memory, int) -> void
|
||||
ReduceIXor, /// (memory, int) -> void
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
BranchIndirect, /// (uint branch_target) -> void
|
||||
PushFlowStack, /// (uint branch_target) -> void
|
||||
|
||||
@@ -509,7 +509,9 @@ private:
|
||||
}
|
||||
const auto& final_params = new_surface->GetSurfaceParams();
|
||||
if (cr_params.type != final_params.type) {
|
||||
BufferCopy(current_surface, new_surface);
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
BufferCopy(current_surface, new_surface);
|
||||
}
|
||||
} else {
|
||||
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
|
||||
for (auto& brick : bricks) {
|
||||
|
||||
Reference in New Issue
Block a user