Compare commits
35 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19ce0d4f1a | ||
|
|
faf5ae6a50 | ||
|
|
116a940dbb | ||
|
|
7ea362e134 | ||
|
|
e54699565a | ||
|
|
f73e569ba8 | ||
|
|
c3e43c7e81 | ||
|
|
f632d00eb1 | ||
|
|
36651f215a | ||
|
|
707bf41c6f | ||
|
|
d2b50c5ebd | ||
|
|
4bbb22a477 | ||
|
|
d49ed4a421 | ||
|
|
74f515e8b6 | ||
|
|
e36814d6d5 | ||
|
|
ef2b6733d0 | ||
|
|
dc70a36b44 | ||
|
|
40cd4df584 | ||
|
|
2f79cc3ef5 | ||
|
|
2883cc1658 | ||
|
|
560cfbc21a | ||
|
|
be9f80ef56 | ||
|
|
526e533e90 | ||
|
|
3c1b6b5723 | ||
|
|
5c7253f8d3 | ||
|
|
930b7c18a6 | ||
|
|
b2c7636710 | ||
|
|
ff64c3951a | ||
|
|
fb6cf12a17 | ||
|
|
c34da106ed | ||
|
|
cc81c0ce64 | ||
|
|
c8473f399e | ||
|
|
cd0f5dfc17 | ||
|
|
f3d1b370aa | ||
|
|
95137a04e1 |
@@ -39,6 +39,7 @@ mkdir "artifacts"
|
||||
# Build a tar.xz for the source of the release
|
||||
Copy-Item .\license.txt -Destination $MSVC_SOURCE
|
||||
Copy-Item .\README.md -Destination $MSVC_SOURCE
|
||||
Copy-Item .\CMakeLists.txt -Destination $MSVC_SOURCE
|
||||
Copy-Item .\src -Recurse -Destination $MSVC_SOURCE
|
||||
Copy-Item .\externals -Recurse -Destination $MSVC_SOURCE
|
||||
Copy-Item .\dist -Recurse -Destination $MSVC_SOURCE
|
||||
@@ -60,4 +61,4 @@ Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Ite
|
||||
|
||||
Get-ChildItem . -Filter "*.zip" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.7z" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
|
||||
Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
|
||||
|
||||
@@ -29,7 +29,7 @@ option(ENABLE_VULKAN "Enables Vulkan backend" ON)
|
||||
|
||||
option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
|
||||
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/hooks/pre-commit AND NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
|
||||
message(STATUS "Copying pre-commit hook")
|
||||
file(COPY hooks/pre-commit
|
||||
DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
|
||||
@@ -49,7 +49,10 @@ function(check_submodules_present)
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
check_submodules_present()
|
||||
|
||||
if(EXISTS ${PROJECT_SOURCE_DIR}/.gitmodules)
|
||||
check_submodules_present()
|
||||
endif()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/dist/compatibility_list/compatibility_list.qrc
|
||||
${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
|
||||
|
||||
2
externals/Vulkan-Headers
vendored
2
externals/Vulkan-Headers
vendored
Submodule externals/Vulkan-Headers updated: fd568d51ed...d42d0747ee
@@ -96,6 +96,8 @@ void Cpu::RunLoop(bool tight_loop) {
|
||||
} else {
|
||||
arm_interface->Step();
|
||||
}
|
||||
// We are stopping a run, exclusive state must be cleared
|
||||
arm_interface->ClearExclusiveState();
|
||||
}
|
||||
core_timing.Advance();
|
||||
|
||||
|
||||
@@ -11,8 +11,6 @@
|
||||
#include "core/core_cpu.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/scheduler.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
union ResultCode;
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "core/hle/kernel/hle_ipc.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/server_port.h"
|
||||
#include "core/hle/kernel/server_session.h"
|
||||
#include "core/hle/kernel/session.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/hle/kernel/address_arbiter.h"
|
||||
#include "core/hle/kernel/client_port.h"
|
||||
#include "core/hle/kernel/errors.h"
|
||||
#include "core/hle/kernel/handle_table.h"
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
|
||||
@@ -458,7 +458,6 @@ void Scheduler::SwitchContext() {
|
||||
cpu_core.LoadContext(new_thread->GetContext());
|
||||
cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
|
||||
cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
|
||||
cpu_core.ClearExclusiveState();
|
||||
} else {
|
||||
current_thread = nullptr;
|
||||
// Note: We do not reset the current process and current page table when idling because
|
||||
|
||||
@@ -4,11 +4,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/multi_level_queue.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "core/hle/kernel/wait_object.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/physical_memory.h"
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <boost/smart_ptr/intrusive_ptr.hpp>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "core/hle/kernel/object.h"
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
@@ -165,24 +165,20 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
|
||||
Telemetry::AppendOSInfo(field_collection);
|
||||
|
||||
// Log user configuration information
|
||||
AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
|
||||
Settings::values.enable_audio_stretching);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
|
||||
Settings::values.use_multi_core);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
|
||||
Settings::values.resolution_factor);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
|
||||
Settings::values.use_frame_limit);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
|
||||
Settings::values.use_disk_shader_cache);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
|
||||
constexpr auto field_type = Telemetry::FieldType::UserConfig;
|
||||
AddField(field_type, "Audio_SinkId", Settings::values.sink_id);
|
||||
AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
|
||||
AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core);
|
||||
AddField(field_type, "Renderer_Backend", "OpenGL");
|
||||
AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor);
|
||||
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
||||
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
|
||||
AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
||||
AddField(field_type, "Renderer_UseAccurateGpuEmulation",
|
||||
Settings::values.use_accurate_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
|
||||
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation);
|
||||
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
|
||||
Settings::values.use_docked_mode);
|
||||
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
|
||||
}
|
||||
|
||||
bool TelemetrySession::SubmitTestcase() {
|
||||
|
||||
@@ -491,6 +491,23 @@ public:
|
||||
INSERT_UNION_PADDING_WORDS(1);
|
||||
};
|
||||
|
||||
enum class DepthMode : u32 {
|
||||
MinusOneToOne = 0,
|
||||
ZeroToOne = 1,
|
||||
};
|
||||
|
||||
enum class TessellationPrimitive : u32 {
|
||||
Isolines = 0,
|
||||
Triangles = 1,
|
||||
Quads = 2,
|
||||
};
|
||||
|
||||
enum class TessellationSpacing : u32 {
|
||||
Equal = 0,
|
||||
FractionalOdd = 1,
|
||||
FractionalEven = 2,
|
||||
};
|
||||
|
||||
struct RenderTargetConfig {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
@@ -628,7 +645,19 @@ public:
|
||||
};
|
||||
} sync_info;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x11E);
|
||||
INSERT_UNION_PADDING_WORDS(0x15);
|
||||
|
||||
union {
|
||||
BitField<0, 2, TessellationPrimitive> prim;
|
||||
BitField<4, 2, TessellationSpacing> spacing;
|
||||
BitField<8, 1, u32> cw;
|
||||
BitField<9, 1, u32> connected;
|
||||
} tess_mode;
|
||||
|
||||
std::array<f32, 4> tess_level_outer;
|
||||
std::array<f32, 2> tess_level_inner;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x102);
|
||||
|
||||
u32 tfb_enabled;
|
||||
|
||||
@@ -662,7 +691,9 @@ public:
|
||||
u32 polygon_offset_line_enable;
|
||||
u32 polygon_offset_fill_enable;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xD);
|
||||
u32 patch_vertices;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
|
||||
std::array<ScissorTest, NumViewports> scissor_test;
|
||||
|
||||
@@ -1386,6 +1417,9 @@ ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(sync_info, 0xB2);
|
||||
ASSERT_REG_POSITION(tess_mode, 0xC8);
|
||||
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
|
||||
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
|
||||
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
|
||||
ASSERT_REG_POSITION(rt, 0x200);
|
||||
ASSERT_REG_POSITION(viewport_transform, 0x280);
|
||||
@@ -1397,6 +1431,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
|
||||
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
|
||||
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
|
||||
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
|
||||
ASSERT_REG_POSITION(patch_vertices, 0x373);
|
||||
ASSERT_REG_POSITION(scissor_test, 0x380);
|
||||
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
||||
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
|
||||
|
||||
@@ -799,6 +799,12 @@ union Instruction {
|
||||
BitField<40, 1, u64> invert;
|
||||
} popc;
|
||||
|
||||
union {
|
||||
BitField<41, 1, u64> sh;
|
||||
BitField<40, 1, u64> invert;
|
||||
BitField<48, 1, u64> is_signed;
|
||||
} flo;
|
||||
|
||||
union {
|
||||
BitField<39, 3, u64> pred;
|
||||
BitField<42, 1, u64> neg_pred;
|
||||
@@ -1439,6 +1445,26 @@ union Instruction {
|
||||
}
|
||||
} tlds;
|
||||
|
||||
union {
|
||||
BitField<28, 1, u64> is_array;
|
||||
BitField<29, 2, TextureType> texture_type;
|
||||
BitField<35, 1, u64> aoffi_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
switch (mode) {
|
||||
case TextureMiscMode::AOFFI:
|
||||
return aoffi_flag != 0;
|
||||
case TextureMiscMode::NODEP:
|
||||
return nodep_flag != 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} txd;
|
||||
|
||||
union {
|
||||
BitField<24, 2, StoreCacheManagement> cache_management;
|
||||
BitField<33, 3, ImageType> image_type;
|
||||
@@ -1632,6 +1658,8 @@ public:
|
||||
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
|
||||
TMML_B, // Texture Mip Map Level
|
||||
TMML, // Texture Mip Map Level
|
||||
TXD, // Texture Gradient/Load with Derivates
|
||||
TXD_B, // Texture Gradient/Load with Derivates Bindless
|
||||
SUST, // Surface Store
|
||||
SULD, // Surface Load
|
||||
SUATOM, // Surface Atomic Operation
|
||||
@@ -1664,6 +1692,9 @@ public:
|
||||
ISCADD_C, // Scale and Add
|
||||
ISCADD_R,
|
||||
ISCADD_IMM,
|
||||
FLO_R,
|
||||
FLO_C,
|
||||
FLO_IMM,
|
||||
LEA_R1,
|
||||
LEA_R2,
|
||||
LEA_RZ,
|
||||
@@ -1727,6 +1758,10 @@ public:
|
||||
SHR_C,
|
||||
SHR_R,
|
||||
SHR_IMM,
|
||||
SHF_RIGHT_R,
|
||||
SHF_RIGHT_IMM,
|
||||
SHF_LEFT_R,
|
||||
SHF_LEFT_IMM,
|
||||
FMNMX_C,
|
||||
FMNMX_R,
|
||||
FMNMX_IMM,
|
||||
@@ -1924,6 +1959,8 @@ private:
|
||||
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
|
||||
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
|
||||
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
|
||||
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
|
||||
INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
|
||||
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
|
||||
INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
|
||||
INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
|
||||
@@ -1965,6 +2002,9 @@ private:
|
||||
INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
|
||||
INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
|
||||
INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
|
||||
INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
|
||||
INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
|
||||
INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
|
||||
INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
|
||||
INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
|
||||
INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
|
||||
@@ -2022,6 +2062,10 @@ private:
|
||||
INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
|
||||
INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
|
||||
INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
|
||||
INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
|
||||
INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
|
||||
INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
|
||||
INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
|
||||
INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
|
||||
INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
|
||||
INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
|
||||
|
||||
@@ -3,9 +3,12 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
@@ -13,6 +16,7 @@
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
|
||||
|
||||
@@ -35,36 +39,49 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
|
||||
local_state.draw.draw_framebuffer = framebuffer.handle;
|
||||
local_state.ApplyFramebufferState();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (key.colors[index]) {
|
||||
key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
|
||||
GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
}
|
||||
if (key.colors_count) {
|
||||
glDrawBuffers(key.colors_count, key.color_attachments.data());
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
if (key.zeta) {
|
||||
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
if (key.zeta) {
|
||||
key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
|
||||
GL_DRAW_FRAMEBUFFER);
|
||||
std::size_t num_buffers = 0;
|
||||
std::array<GLenum, Maxwell::NumRenderTargets> targets;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (!key.colors[index]) {
|
||||
targets[index] = GL_NONE;
|
||||
continue;
|
||||
}
|
||||
const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
|
||||
key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
|
||||
const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
|
||||
targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
|
||||
num_buffers = index + 1;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
}
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
std::size_t FramebufferCacheKey::Hash() const {
|
||||
static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
|
||||
return static_cast<std::size_t>(
|
||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
|
||||
std::size_t FramebufferCacheKey::Hash() const noexcept {
|
||||
std::size_t hash = std::hash<View>{}(zeta);
|
||||
for (const auto& color : colors) {
|
||||
hash ^= std::hash<View>{}(color);
|
||||
}
|
||||
hash ^= static_cast<std::size_t>(color_attachments) << 16;
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
|
||||
return std::tie(stencil_enable, colors_count, color_attachments, colors, zeta) ==
|
||||
std::tie(rhs.stencil_enable, rhs.colors_count, rhs.color_attachments, rhs.colors,
|
||||
rhs.zeta);
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return std::tie(colors, zeta, color_attachments) ==
|
||||
std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -18,21 +18,24 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
struct alignas(sizeof(u64)) FramebufferCacheKey {
|
||||
bool stencil_enable = false;
|
||||
u16 colors_count = 0;
|
||||
constexpr std::size_t BitsPerAttachment = 4;
|
||||
|
||||
std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
struct FramebufferCacheKey {
|
||||
View zeta;
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
u32 color_attachments = 0;
|
||||
|
||||
std::size_t Hash() const;
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const;
|
||||
bool operator==(const FramebufferCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const {
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
void SetAttachment(std::size_t index, u32 attachment) {
|
||||
color_attachments |= attachment << (BitsPerAttachment * index);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -93,7 +93,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||
state.draw.shader_program = 0;
|
||||
state.Apply();
|
||||
clear_framebuffer.Create();
|
||||
|
||||
LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
|
||||
CheckExtensions();
|
||||
@@ -373,78 +372,58 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
|
||||
// Bind the framebuffer surfaces
|
||||
FramebufferCacheKey fbkey;
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
FramebufferCacheKey key;
|
||||
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
|
||||
for (std::size_t index = 0; index < colors_count; ++index) {
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
|
||||
|
||||
if (color_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
if (!color_surface) {
|
||||
continue;
|
||||
}
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
|
||||
fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
|
||||
fbkey.colors[index] = std::move(color_surface);
|
||||
key.SetAttachment(index, regs.rt_control.GetMap(index));
|
||||
key.colors[index] = std::move(color_surface);
|
||||
}
|
||||
fbkey.colors_count = static_cast<u16>(regs.rt_control.count);
|
||||
|
||||
if (depth_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
|
||||
fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
fbkey.zeta = std::move(depth_surface);
|
||||
key.zeta = std::move(depth_surface);
|
||||
}
|
||||
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
|
||||
state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
|
||||
SyncViewport(state);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
|
||||
bool using_depth_fb, bool using_stencil_fb) {
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface{};
|
||||
View color_surface;
|
||||
if (using_color_fb) {
|
||||
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
|
||||
}
|
||||
View depth_surface{};
|
||||
View depth_surface;
|
||||
if (using_depth_fb || using_stencil_fb) {
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(false);
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
current_state.draw.draw_framebuffer = clear_framebuffer.handle;
|
||||
FramebufferCacheKey key;
|
||||
key.colors[0] = color_surface;
|
||||
key.zeta = depth_surface;
|
||||
|
||||
current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
|
||||
current_state.ApplyFramebufferState();
|
||||
|
||||
if (color_surface) {
|
||||
color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
const auto& params = depth_surface->GetSurfaceParams();
|
||||
switch (params.type) {
|
||||
case VideoCore::Surface::SurfaceType::Depth:
|
||||
depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
break;
|
||||
case VideoCore::Surface::SurfaceType::DepthStencil:
|
||||
depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
} else {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
|
||||
@@ -223,8 +223,6 @@ private:
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
AccelDraw accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
OGLFramebuffer clear_framebuffer;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -49,8 +49,9 @@ class ExprDecompiler;
|
||||
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
struct TextureAoffi {};
|
||||
struct TextureDerivates {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
@@ -1112,6 +1113,8 @@ private:
|
||||
expr += GenerateTextureArgument(*argument);
|
||||
} else if (std::holds_alternative<TextureAoffi>(variant)) {
|
||||
expr += GenerateTextureAoffi(meta->aoffi);
|
||||
} else if (std::holds_alternative<TextureDerivates>(variant)) {
|
||||
expr += GenerateTextureDerivates(meta->derivates);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -1181,6 +1184,36 @@ private:
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
|
||||
if (derivates.empty()) {
|
||||
return {};
|
||||
}
|
||||
constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
|
||||
std::string expr = ", ";
|
||||
const std::size_t components = derivates.size() / 2;
|
||||
std::string dx = coord_constructors.at(components - 1);
|
||||
std::string dy = coord_constructors.at(components - 1);
|
||||
dx += '(';
|
||||
dy += '(';
|
||||
|
||||
for (std::size_t index = 0; index < components; ++index) {
|
||||
const auto operand_x{derivates.at(index * 2)};
|
||||
const auto operand_y{derivates.at(index * 2 + 1)};
|
||||
dx += Visit(operand_x).AsFloat();
|
||||
dy += Visit(operand_y).AsFloat();
|
||||
|
||||
if (index + 1 < components) {
|
||||
dx += ", ";
|
||||
dy += ", ";
|
||||
}
|
||||
}
|
||||
dx += ')';
|
||||
dy += ')';
|
||||
expr += dx + ", " + dy;
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string BuildIntegerCoordinates(Operation operation) {
|
||||
constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
|
||||
const std::size_t coords_count{operation.GetOperandsCount()};
|
||||
@@ -1450,6 +1483,11 @@ private:
|
||||
return GenerateUnary(operation, "bitCount", type, type);
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
Expression BitMSB(Operation operation) {
|
||||
return GenerateUnary(operation, "findMSB", type, type);
|
||||
}
|
||||
|
||||
Expression HNegate(Operation operation) {
|
||||
const auto GetNegate = [&](std::size_t index) {
|
||||
return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
|
||||
@@ -1738,6 +1776,14 @@ private:
|
||||
return {std::move(expr), Type::Float};
|
||||
}
|
||||
|
||||
Expression TextureGradient(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
|
||||
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
|
||||
}
|
||||
|
||||
Expression ImageLoad(Operation operation) {
|
||||
if (!device.HasImageLoadFormatted()) {
|
||||
LOG_ERROR(Render_OpenGL,
|
||||
@@ -2003,6 +2049,7 @@ private:
|
||||
&GLSLDecompiler::BitfieldInsert<Type::Int>,
|
||||
&GLSLDecompiler::BitfieldExtract<Type::Int>,
|
||||
&GLSLDecompiler::BitCount<Type::Int>,
|
||||
&GLSLDecompiler::BitMSB<Type::Int>,
|
||||
|
||||
&GLSLDecompiler::Add<Type::Uint>,
|
||||
&GLSLDecompiler::Mul<Type::Uint>,
|
||||
@@ -2021,6 +2068,7 @@ private:
|
||||
&GLSLDecompiler::BitfieldInsert<Type::Uint>,
|
||||
&GLSLDecompiler::BitfieldExtract<Type::Uint>,
|
||||
&GLSLDecompiler::BitCount<Type::Uint>,
|
||||
&GLSLDecompiler::BitMSB<Type::Uint>,
|
||||
|
||||
&GLSLDecompiler::Add<Type::HalfFloat>,
|
||||
&GLSLDecompiler::Mul<Type::HalfFloat>,
|
||||
@@ -2084,6 +2132,7 @@ private:
|
||||
&GLSLDecompiler::TextureQueryDimensions,
|
||||
&GLSLDecompiler::TextureQueryLod,
|
||||
&GLSLDecompiler::TexelFetch,
|
||||
&GLSLDecompiler::TextureGradient,
|
||||
|
||||
&GLSLDecompiler::ImageLoad,
|
||||
&GLSLDecompiler::ImageStore,
|
||||
|
||||
@@ -4,6 +4,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace vk {
|
||||
class DispatchLoaderDynamic;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
|
||||
}
|
||||
|
||||
#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
|
||||
#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
namespace Vulkan {
|
||||
@@ -41,5 +52,7 @@ using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
|
||||
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
|
||||
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
|
||||
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
|
||||
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
|
||||
using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bitset>
|
||||
#include <cstdlib>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string_view>
|
||||
@@ -15,6 +16,15 @@ namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eD16UnormS8Uint, vk::Format{}};
|
||||
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint, vk::Format{}};
|
||||
|
||||
} // namespace Alternatives
|
||||
|
||||
template <typename T>
|
||||
void SetNext(void**& next, T& data) {
|
||||
*next = &data;
|
||||
@@ -22,7 +32,7 @@ void SetNext(void**& next, T& data) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
|
||||
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
vk::PhysicalDeviceFeatures2 features;
|
||||
T extension_features;
|
||||
features.pNext = &extension_features;
|
||||
@@ -30,17 +40,14 @@ T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
|
||||
return extension_features;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
namespace Alternatives {
|
||||
|
||||
constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eD16UnormS8Uint, vk::Format{}};
|
||||
constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint, vk::Format{}};
|
||||
constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
|
||||
|
||||
} // namespace Alternatives
|
||||
template <typename T>
|
||||
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
vk::PhysicalDeviceProperties2 properties;
|
||||
T extension_properties;
|
||||
properties.pNext = &extension_properties;
|
||||
physical.getProperties2(&properties, dldi);
|
||||
return extension_properties;
|
||||
}
|
||||
|
||||
constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
|
||||
switch (format) {
|
||||
@@ -53,8 +60,7 @@ constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
|
||||
}
|
||||
}
|
||||
|
||||
constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
|
||||
FormatType format_type) {
|
||||
vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) {
|
||||
switch (format_type) {
|
||||
case FormatType::Linear:
|
||||
return properties.linearTilingFeatures;
|
||||
@@ -67,11 +73,13 @@ constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properti
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface)
|
||||
: physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
|
||||
: physical{physical}, properties{physical.getProperties(dldi)},
|
||||
format_properties{GetFormatProperties(dldi, physical)} {
|
||||
SetupFamilies(dldi, surface);
|
||||
SetupProperties(dldi);
|
||||
SetupFeatures(dldi);
|
||||
}
|
||||
|
||||
@@ -89,12 +97,22 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
features.depthClamp = true;
|
||||
features.samplerAnisotropy = true;
|
||||
features.largePoints = true;
|
||||
features.multiViewport = true;
|
||||
features.depthBiasClamp = true;
|
||||
features.geometryShader = true;
|
||||
features.tessellationShader = true;
|
||||
features.fragmentStoresAndAtomics = true;
|
||||
features.shaderImageGatherExtended = true;
|
||||
features.shaderStorageImageWriteWithoutFormat = true;
|
||||
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
|
||||
|
||||
vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
|
||||
vertex_divisor.vertexAttributeInstanceRateDivisor = true;
|
||||
vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
|
||||
SetNext(next, vertex_divisor);
|
||||
vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage;
|
||||
bit16_storage.uniformAndStorageBuffer16BitAccess = true;
|
||||
SetNext(next, bit16_storage);
|
||||
|
||||
vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage;
|
||||
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
||||
SetNext(next, bit8_storage);
|
||||
|
||||
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||
if (is_float16_supported) {
|
||||
@@ -120,6 +138,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
|
||||
}
|
||||
|
||||
if (!ext_depth_range_unrestricted) {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||
}
|
||||
|
||||
vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
|
||||
nullptr, static_cast<u32>(extensions.size()), extensions.data(),
|
||||
nullptr);
|
||||
@@ -135,16 +157,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
logical = UniqueDevice(
|
||||
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
|
||||
|
||||
if (khr_driver_properties) {
|
||||
vk::PhysicalDeviceDriverPropertiesKHR driver;
|
||||
vk::PhysicalDeviceProperties2 properties;
|
||||
properties.pNext = &driver;
|
||||
physical.getProperties2(&properties, dld);
|
||||
driver_id = driver.driverID;
|
||||
LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Driver: Unknown");
|
||||
}
|
||||
CollectTelemetryParameters();
|
||||
|
||||
graphics_queue = logical->getQueue(graphics_family, 0, dld);
|
||||
present_queue = logical->getQueue(present_family, 0, dld);
|
||||
@@ -190,6 +203,18 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
|
||||
|
||||
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
|
||||
const vk::DispatchLoaderDynamic& dldi) const {
|
||||
// Disable for now to avoid converting ASTC twice.
|
||||
return false;
|
||||
static constexpr std::array astc_formats = {
|
||||
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
|
||||
if (!features.textureCompressionASTC_LDR) {
|
||||
return false;
|
||||
}
|
||||
@@ -197,12 +222,6 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
|
||||
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
|
||||
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eTransferDst};
|
||||
constexpr std::array astc_formats = {
|
||||
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock};
|
||||
for (const auto format : astc_formats) {
|
||||
const auto format_properties{physical.getFormatProperties(format, dldi)};
|
||||
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
|
||||
@@ -225,11 +244,17 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
|
||||
|
||||
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface) {
|
||||
LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
|
||||
bool is_suitable = true;
|
||||
|
||||
constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
|
||||
constexpr std::array required_extensions = {
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
};
|
||||
std::bitset<required_extensions.size()> available_extensions{};
|
||||
|
||||
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
@@ -246,7 +271,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
|
||||
LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
|
||||
is_suitable = false;
|
||||
}
|
||||
}
|
||||
@@ -263,7 +288,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
|
||||
}
|
||||
if (!has_graphics || !has_present) {
|
||||
LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
|
||||
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
@@ -273,8 +298,15 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
|
||||
constexpr u32 required_ubo_size = 65536;
|
||||
if (limits.maxUniformBufferRange < required_ubo_size) {
|
||||
LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
|
||||
limits.maxUniformBufferRange, required_ubo_size);
|
||||
LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required",
|
||||
limits.maxUniformBufferRange, required_ubo_size);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
constexpr u32 required_num_viewports = 16;
|
||||
if (limits.maxViewports < required_num_viewports) {
|
||||
LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
|
||||
limits.maxViewports, required_num_viewports);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
@@ -285,24 +317,32 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
std::make_pair(features.depthClamp, "depthClamp"),
|
||||
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
||||
std::make_pair(features.largePoints, "largePoints"),
|
||||
std::make_pair(features.multiViewport, "multiViewport"),
|
||||
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
||||
std::make_pair(features.geometryShader, "geometryShader"),
|
||||
std::make_pair(features.tessellationShader, "tessellationShader"),
|
||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||
"shaderStorageImageWriteWithoutFormat"),
|
||||
};
|
||||
for (const auto& [supported, name] : feature_report) {
|
||||
if (supported) {
|
||||
continue;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
|
||||
LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
if (!is_suitable) {
|
||||
LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
|
||||
}
|
||||
|
||||
return is_suitable;
|
||||
}
|
||||
|
||||
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(7);
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||
|
||||
const auto Test = [&](const vk::ExtensionProperties& extension,
|
||||
std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
bool push) {
|
||||
@@ -317,13 +357,30 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
}
|
||||
};
|
||||
|
||||
extensions.reserve(13);
|
||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
||||
extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
||||
|
||||
[[maybe_unused]] const bool nsight =
|
||||
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
bool khr_shader_float16_int8{};
|
||||
bool ext_subgroup_size_control{};
|
||||
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
Test(extension, khr_uniform_buffer_standard_layout,
|
||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
|
||||
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||
Test(extension, ext_depth_range_unrestricted,
|
||||
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
Test(extension, ext_shader_viewport_index_layer,
|
||||
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
|
||||
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
|
||||
false);
|
||||
}
|
||||
|
||||
if (khr_shader_float16_int8) {
|
||||
@@ -332,6 +389,23 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (ext_subgroup_size_control) {
|
||||
const auto features =
|
||||
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
|
||||
const auto properties =
|
||||
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
|
||||
|
||||
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
|
||||
|
||||
if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize &&
|
||||
properties.maxSubgroupSize >= GuestWarpSize) {
|
||||
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
|
||||
guest_warp_stages = properties.requiredSubgroupSizeStages;
|
||||
}
|
||||
} else {
|
||||
is_warp_potentially_bigger = true;
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
@@ -358,19 +432,23 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
|
||||
present_family = *present_family_;
|
||||
}
|
||||
|
||||
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto props = physical.getProperties(dldi);
|
||||
device_type = props.deviceType;
|
||||
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
|
||||
storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
|
||||
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
|
||||
}
|
||||
|
||||
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto supported_features{physical.getFeatures(dldi)};
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
|
||||
}
|
||||
|
||||
void VKDevice::CollectTelemetryParameters() {
|
||||
const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld);
|
||||
driver_id = driver.driverID;
|
||||
vendor_name = driver.driverName;
|
||||
|
||||
const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld);
|
||||
reported_extensions.reserve(std::size(extensions));
|
||||
for (const auto& extension : extensions) {
|
||||
reported_extensions.push_back(extension.extensionName);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
|
||||
static const float QUEUE_PRIORITY = 1.0f;
|
||||
|
||||
@@ -385,50 +463,70 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
|
||||
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
|
||||
constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8SnormPack32,
|
||||
vk::Format::eA8B8G8R8SrgbPack32,
|
||||
vk::Format::eB5G6R5UnormPack16,
|
||||
vk::Format::eA2B10G10R10UnormPack32,
|
||||
vk::Format::eR32G32B32A32Sfloat,
|
||||
vk::Format::eR16G16B16A16Uint,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16Snorm,
|
||||
vk::Format::eR16G16Sfloat,
|
||||
vk::Format::eR16Unorm,
|
||||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8Unorm,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8Unorm,
|
||||
vk::Format::eB10G11R11UfloatPack32,
|
||||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR16Sfloat,
|
||||
vk::Format::eR16G16B16A16Sfloat,
|
||||
vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eD32Sfloat,
|
||||
vk::Format::eD16Unorm,
|
||||
vk::Format::eD16UnormS8Uint,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eBc1RgbaUnormBlock,
|
||||
vk::Format::eBc2UnormBlock,
|
||||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eBc1RgbaSrgbBlock,
|
||||
vk::Format::eBc3SrgbBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eAstc4x4UnormBlock,
|
||||
vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock};
|
||||
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8SnormPack32,
|
||||
vk::Format::eA8B8G8R8SrgbPack32,
|
||||
vk::Format::eB5G6R5UnormPack16,
|
||||
vk::Format::eA2B10G10R10UnormPack32,
|
||||
vk::Format::eA1R5G5B5UnormPack16,
|
||||
vk::Format::eR32G32B32A32Sfloat,
|
||||
vk::Format::eR32G32B32A32Uint,
|
||||
vk::Format::eR32G32Sfloat,
|
||||
vk::Format::eR32G32Uint,
|
||||
vk::Format::eR16G16B16A16Uint,
|
||||
vk::Format::eR16G16B16A16Unorm,
|
||||
vk::Format::eR16G16Unorm,
|
||||
vk::Format::eR16G16Snorm,
|
||||
vk::Format::eR16G16Sfloat,
|
||||
vk::Format::eR16Unorm,
|
||||
vk::Format::eR8G8B8A8Srgb,
|
||||
vk::Format::eR8G8Unorm,
|
||||
vk::Format::eR8G8Snorm,
|
||||
vk::Format::eR8Unorm,
|
||||
vk::Format::eR8Uint,
|
||||
vk::Format::eB10G11R11UfloatPack32,
|
||||
vk::Format::eR32Sfloat,
|
||||
vk::Format::eR32Uint,
|
||||
vk::Format::eR16Sfloat,
|
||||
vk::Format::eR16G16B16A16Sfloat,
|
||||
vk::Format::eB8G8R8A8Unorm,
|
||||
vk::Format::eR4G4B4A4UnormPack16,
|
||||
vk::Format::eD32Sfloat,
|
||||
vk::Format::eD16Unorm,
|
||||
vk::Format::eD16UnormS8Uint,
|
||||
vk::Format::eD24UnormS8Uint,
|
||||
vk::Format::eD32SfloatS8Uint,
|
||||
vk::Format::eBc1RgbaUnormBlock,
|
||||
vk::Format::eBc2UnormBlock,
|
||||
vk::Format::eBc3UnormBlock,
|
||||
vk::Format::eBc4UnormBlock,
|
||||
vk::Format::eBc5UnormBlock,
|
||||
vk::Format::eBc5SnormBlock,
|
||||
vk::Format::eBc7UnormBlock,
|
||||
vk::Format::eBc6HUfloatBlock,
|
||||
vk::Format::eBc6HSfloatBlock,
|
||||
vk::Format::eBc1RgbaSrgbBlock,
|
||||
vk::Format::eBc3SrgbBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock,
|
||||
vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock,
|
||||
vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock,
|
||||
vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock,
|
||||
vk::Format::eAstc6x5SrgbBlock,
|
||||
vk::Format::eE5B9G9R9UfloatPack32};
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
|
||||
for (const auto format : formats) {
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
@@ -14,6 +16,9 @@ namespace Vulkan {
|
||||
/// Format usage descriptor.
|
||||
enum class FormatType { Linear, Optimal, Buffer };
|
||||
|
||||
/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
|
||||
const u32 GuestWarpSize = 32;
|
||||
|
||||
/// Handles data specific to a physical device.
|
||||
class VKDevice final {
|
||||
public:
|
||||
@@ -71,7 +76,22 @@ public:
|
||||
|
||||
/// Returns true if the device is integrated with the host CPU.
|
||||
bool IsIntegrated() const {
|
||||
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
}
|
||||
|
||||
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
|
||||
u32 GetApiVersion() const {
|
||||
return properties.apiVersion;
|
||||
}
|
||||
|
||||
/// Returns the current driver version provided in Vulkan-formatted version numbers.
|
||||
u32 GetDriverVersion() const {
|
||||
return properties.driverVersion;
|
||||
}
|
||||
|
||||
/// Returns the device name.
|
||||
std::string_view GetModelName() const {
|
||||
return properties.deviceName;
|
||||
}
|
||||
|
||||
/// Returns the driver ID.
|
||||
@@ -80,18 +100,23 @@ public:
|
||||
}
|
||||
|
||||
/// Returns uniform buffer alignment requeriment.
|
||||
u64 GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
vk::DeviceSize GetUniformBufferAlignment() const {
|
||||
return properties.limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns storage alignment requeriment.
|
||||
u64 GetStorageBufferAlignment() const {
|
||||
return storage_buffer_alignment;
|
||||
vk::DeviceSize GetStorageBufferAlignment() const {
|
||||
return properties.limits.minStorageBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the maximum range for storage buffers.
|
||||
u64 GetMaxStorageBufferRange() const {
|
||||
return max_storage_buffer_range;
|
||||
vk::DeviceSize GetMaxStorageBufferRange() const {
|
||||
return properties.limits.maxStorageBufferRange;
|
||||
}
|
||||
|
||||
/// Returns the maximum size for push constants.
|
||||
vk::DeviceSize GetMaxPushConstantsSize() const {
|
||||
return properties.limits.maxPushConstantsSize;
|
||||
}
|
||||
|
||||
/// Returns true if ASTC is natively supported.
|
||||
@@ -104,6 +129,16 @@ public:
|
||||
return is_float16_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
|
||||
bool IsWarpSizePotentiallyBiggerThanGuest() const {
|
||||
return is_warp_potentially_bigger;
|
||||
}
|
||||
|
||||
/// Returns true if the device can be forced to use the guest warp size.
|
||||
bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const {
|
||||
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_scalar_block_layout.
|
||||
bool IsKhrUniformBufferStandardLayoutSupported() const {
|
||||
return khr_uniform_buffer_standard_layout;
|
||||
@@ -114,6 +149,26 @@ public:
|
||||
return ext_index_type_uint8;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
|
||||
bool IsExtDepthRangeUnrestrictedSupported() const {
|
||||
return ext_depth_range_unrestricted;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
|
||||
bool IsExtShaderViewportIndexLayerSupported() const {
|
||||
return ext_shader_viewport_index_layer;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return vendor_name;
|
||||
}
|
||||
|
||||
/// Returns the list of available extensions.
|
||||
const std::vector<std::string>& GetAvailableExtensions() const {
|
||||
return reported_extensions;
|
||||
}
|
||||
|
||||
/// Checks if the physical device is suitable.
|
||||
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
@@ -125,12 +180,12 @@ private:
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
|
||||
|
||||
/// Sets up device properties.
|
||||
void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
|
||||
|
||||
/// Sets up device features.
|
||||
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
|
||||
|
||||
/// Collects telemetry information from the device.
|
||||
void CollectTelemetryParameters();
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
@@ -148,23 +203,28 @@ private:
|
||||
|
||||
const vk::PhysicalDevice physical; ///< Physical device.
|
||||
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
|
||||
vk::PhysicalDeviceProperties properties; ///< Device properties.
|
||||
UniqueDevice logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
vk::Queue present_queue; ///< Main present queue.
|
||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||
u32 present_family{}; ///< Main present queue family index.
|
||||
vk::PhysicalDeviceType device_type; ///< Physical device type.
|
||||
vk::DriverIdKHR driver_id{}; ///< Driver ID.
|
||||
u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
|
||||
u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
|
||||
u64 max_storage_buffer_range{}; ///< Max storage buffer size.
|
||||
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||
bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
|
||||
std::unordered_map<vk::Format, vk::FormatProperties>
|
||||
format_properties; ///< Format properties dictionary.
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
|
||||
// Telemetry parameters
|
||||
std::string vendor_name; ///< Device's driver name.
|
||||
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
|
||||
|
||||
/// Format properties dictionary.
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -983,6 +983,11 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Id TextureGradient(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Id ImageLoad(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
@@ -1391,6 +1396,7 @@ private:
|
||||
&SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
|
||||
&SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
|
||||
|
||||
&SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
|
||||
&SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
|
||||
@@ -1409,6 +1415,7 @@ private:
|
||||
&SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
|
||||
&SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
|
||||
&SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
|
||||
|
||||
&SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
|
||||
&SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
|
||||
@@ -1473,6 +1480,7 @@ private:
|
||||
&SPIRVDecompiler::TextureQueryDimensions,
|
||||
&SPIRVDecompiler::TextureQueryLod,
|
||||
&SPIRVDecompiler::TexelFetch,
|
||||
&SPIRVDecompiler::TextureGradient,
|
||||
|
||||
&SPIRVDecompiler::ImageLoad,
|
||||
&SPIRVDecompiler::ImageStore,
|
||||
|
||||
@@ -19,12 +19,18 @@
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) {
|
||||
|
||||
vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats,
|
||||
bool srgb) {
|
||||
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
|
||||
return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear};
|
||||
vk::SurfaceFormatKHR format;
|
||||
format.format = vk::Format::eB8G8R8A8Unorm;
|
||||
format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||
return format;
|
||||
}
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
|
||||
return format.format == vk::Format::eB8G8R8A8Unorm &&
|
||||
const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
|
||||
const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm;
|
||||
return format.format == request_format &&
|
||||
format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||
});
|
||||
return found != formats.end() ? *found : formats[0];
|
||||
@@ -51,28 +57,26 @@ vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u3
|
||||
std::min(capabilities.maxImageExtent.height, extent.height));
|
||||
return extent;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device)
|
||||
: surface{surface}, device{device} {}
|
||||
|
||||
VKSwapchain::~VKSwapchain() = default;
|
||||
|
||||
void VKSwapchain::Create(u32 width, u32 height) {
|
||||
const auto dev = device.GetLogical();
|
||||
void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
const auto physical_device = device.GetPhysical();
|
||||
|
||||
const vk::SurfaceCapabilitiesKHR capabilities{
|
||||
physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
|
||||
const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)};
|
||||
if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dev.waitIdle(dld);
|
||||
device.GetLogical().waitIdle(dld);
|
||||
Destroy();
|
||||
|
||||
CreateSwapchain(capabilities, width, height);
|
||||
CreateSwapchain(capabilities, width, height, srgb);
|
||||
CreateSemaphores();
|
||||
CreateImageViews();
|
||||
|
||||
@@ -107,7 +111,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
|
||||
break;
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
if (current_width > 0 && current_height > 0) {
|
||||
Create(current_width, current_height);
|
||||
Create(current_width, current_height, current_srgb);
|
||||
recreated = true;
|
||||
}
|
||||
break;
|
||||
@@ -129,23 +133,19 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
|
||||
}
|
||||
|
||||
void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width,
|
||||
u32 height) {
|
||||
const auto dev{device.GetLogical()};
|
||||
u32 height, bool srgb) {
|
||||
const auto& dld{device.GetDispatchLoader()};
|
||||
const auto physical_device{device.GetPhysical()};
|
||||
const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)};
|
||||
const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)};
|
||||
|
||||
const std::vector<vk::SurfaceFormatKHR> formats{
|
||||
physical_device.getSurfaceFormatsKHR(surface, dld)};
|
||||
|
||||
const std::vector<vk::PresentModeKHR> present_modes{
|
||||
physical_device.getSurfacePresentModesKHR(surface, dld)};
|
||||
|
||||
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
|
||||
const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
|
||||
const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
|
||||
extent = ChooseSwapExtent(capabilities, width, height);
|
||||
|
||||
current_width = extent.width;
|
||||
current_height = extent.height;
|
||||
current_srgb = srgb;
|
||||
|
||||
u32 requested_image_count{capabilities.minImageCount + 1};
|
||||
if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
|
||||
@@ -169,6 +169,7 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
|
||||
swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
|
||||
}
|
||||
|
||||
const auto dev{device.GetLogical()};
|
||||
swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
|
||||
|
||||
images = dev.getSwapchainImagesKHR(*swapchain, dld);
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
~VKSwapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height);
|
||||
void Create(u32 width, u32 height, bool srgb);
|
||||
|
||||
/// Acquires the next image in the swapchain, waits as needed.
|
||||
void AcquireNextImage();
|
||||
@@ -60,8 +60,13 @@ public:
|
||||
return image_format;
|
||||
}
|
||||
|
||||
bool GetSrgbState() const {
|
||||
return current_srgb;
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height);
|
||||
void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
|
||||
bool srgb);
|
||||
void CreateSemaphores();
|
||||
void CreateImageViews();
|
||||
|
||||
@@ -87,6 +92,7 @@ private:
|
||||
|
||||
u32 current_width{};
|
||||
u32 current_height{};
|
||||
bool current_srgb{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::FLO_R:
|
||||
case OpCode::Id::FLO_C:
|
||||
case OpCode::Id::FLO_IMM: {
|
||||
Node value;
|
||||
if (instr.flo.invert) {
|
||||
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.is_signed) {
|
||||
value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
|
||||
} else {
|
||||
value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
|
||||
}
|
||||
if (instr.flo.sh) {
|
||||
value =
|
||||
Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
|
||||
}
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::SEL_C:
|
||||
case OpCode::Id::SEL_R:
|
||||
case OpCode::Id::SEL_IMM: {
|
||||
|
||||
@@ -134,13 +134,52 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
WriteTexsInstructionFloat(bb, instr, values, true);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXD_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
case OpCode::Id::TXD: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
|
||||
"AOFFI is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
|
||||
|
||||
u64 base_reg = instr.gpr8.Value();
|
||||
const auto derivate_reg = instr.gpr20.Value();
|
||||
const auto texture_type = instr.txd.texture_type.Value();
|
||||
const auto coord_count = GetCoordCount(texture_type);
|
||||
|
||||
const auto& sampler = is_bindless
|
||||
? GetBindlessSampler(base_reg, {{texture_type, false, false}})
|
||||
: GetSampler(instr.sampler, {{texture_type, false, false}});
|
||||
if (is_bindless) {
|
||||
base_reg++;
|
||||
}
|
||||
|
||||
std::vector<Node> coords;
|
||||
std::vector<Node> derivates;
|
||||
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||
coords.push_back(GetRegister(base_reg + i));
|
||||
const std::size_t derivate = i * 2;
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate));
|
||||
derivates.push_back(GetRegister(derivate_reg + derivate + 1));
|
||||
}
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
||||
}
|
||||
|
||||
WriteTexInstructionFloat(bb, instr, values);
|
||||
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TXQ_B:
|
||||
is_bindless = true;
|
||||
[[fallthrough]];
|
||||
@@ -158,7 +197,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||
@@ -212,7 +251,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
continue;
|
||||
}
|
||||
auto params = coords;
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
@@ -442,7 +481,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
@@ -574,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
|
||||
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
|
||||
element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
@@ -608,7 +647,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
@@ -653,7 +692,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
|
||||
@@ -68,6 +68,7 @@ enum class OperationCode {
|
||||
IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
|
||||
IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
|
||||
IBitCount, /// (MetaArithmetic, int) -> int
|
||||
IBitMSB, /// (MetaArithmetic, int) -> int
|
||||
|
||||
UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
UMul, /// (MetaArithmetic, uint a, uint b) -> uint
|
||||
@@ -86,6 +87,7 @@ enum class OperationCode {
|
||||
UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
|
||||
UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
|
||||
UBitCount, /// (MetaArithmetic, uint) -> uint
|
||||
UBitMSB, /// (MetaArithmetic, uint) -> uint
|
||||
|
||||
HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||
@@ -149,6 +151,7 @@ enum class OperationCode {
|
||||
TextureQueryDimensions, /// (MetaTexture, float a) -> float4
|
||||
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
|
||||
TexelFetch, /// (MetaTexture, int[N], int) -> float4
|
||||
TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
|
||||
|
||||
ImageLoad, /// (MetaImage, int[N] coords) -> void
|
||||
ImageStore, /// (MetaImage, int[N] coords) -> void
|
||||
@@ -367,6 +370,7 @@ struct MetaTexture {
|
||||
Node array;
|
||||
Node depth_compare;
|
||||
std::vector<Node> aoffi;
|
||||
std::vector<Node> derivates;
|
||||
Node bias;
|
||||
Node lod;
|
||||
Node component{};
|
||||
|
||||
@@ -254,16 +254,14 @@ public:
|
||||
if (!layer_mipmap) {
|
||||
return {};
|
||||
}
|
||||
const u32 end_layer{layer_mipmap->first};
|
||||
const u32 end_mipmap{layer_mipmap->second};
|
||||
const auto [end_layer, end_mipmap] = *layer_mipmap;
|
||||
if (layer != end_layer) {
|
||||
if (mipmap == 0 && end_mipmap == 0) {
|
||||
return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1));
|
||||
return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
|
||||
}
|
||||
return {};
|
||||
} else {
|
||||
return GetView(
|
||||
ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1));
|
||||
return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,8 +276,7 @@ public:
|
||||
if (!layer_mipmap) {
|
||||
return {};
|
||||
}
|
||||
const u32 layer{layer_mipmap->first};
|
||||
const u32 mipmap{layer_mipmap->second};
|
||||
const auto [layer, mipmap] = *layer_mipmap;
|
||||
if (GetMipmapSize(mipmap) != candidate_size) {
|
||||
return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
|
||||
}
|
||||
|
||||
@@ -246,6 +246,16 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
|
||||
return params;
|
||||
}
|
||||
|
||||
VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
|
||||
const VideoCommon::Shader::Sampler& entry) {
|
||||
return TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray());
|
||||
}
|
||||
|
||||
VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
|
||||
const VideoCommon::Shader::Image& entry) {
|
||||
return ImageTypeToSurfaceTarget(entry.GetType());
|
||||
}
|
||||
|
||||
bool SurfaceParams::IsLayered() const {
|
||||
switch (target) {
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
|
||||
@@ -45,6 +45,14 @@ public:
|
||||
static SurfaceParams CreateForFermiCopySurface(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& config);
|
||||
|
||||
/// Obtains the texture target from a shader's sampler entry.
|
||||
static VideoCore::Surface::SurfaceTarget ExpectedTarget(
|
||||
const VideoCommon::Shader::Sampler& entry);
|
||||
|
||||
/// Obtains the texture target from a shader's sampler entry.
|
||||
static VideoCore::Surface::SurfaceTarget ExpectedTarget(
|
||||
const VideoCommon::Shader::Image& entry);
|
||||
|
||||
std::size_t Hash() const {
|
||||
return static_cast<std::size_t>(
|
||||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
|
||||
|
||||
@@ -95,10 +95,16 @@ public:
|
||||
std::lock_guard lock{mutex};
|
||||
const auto gpu_addr{tic.Address()};
|
||||
if (!gpu_addr) {
|
||||
return {};
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
|
||||
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
|
||||
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
|
||||
if (guard_samplers) {
|
||||
sampled_textures.push_back(surface);
|
||||
}
|
||||
@@ -110,10 +116,15 @@ public:
|
||||
std::lock_guard lock{mutex};
|
||||
const auto gpu_addr{tic.Address()};
|
||||
if (!gpu_addr) {
|
||||
return {};
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
|
||||
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
|
||||
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
|
||||
if (guard_samplers) {
|
||||
sampled_textures.push_back(surface);
|
||||
}
|
||||
@@ -143,11 +154,17 @@ public:
|
||||
SetEmptyDepthBuffer();
|
||||
return {};
|
||||
}
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
SetEmptyDepthBuffer();
|
||||
return {};
|
||||
}
|
||||
const auto depth_params{SurfaceParams::CreateForDepthBuffer(
|
||||
system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
|
||||
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
|
||||
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
|
||||
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
|
||||
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
|
||||
if (depth_buffer.target)
|
||||
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
||||
depth_buffer.target = surface_view.first;
|
||||
@@ -180,8 +197,16 @@ public:
|
||||
return {};
|
||||
}
|
||||
|
||||
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
|
||||
preserve_contents, true);
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
SetEmptyColorBuffer(index);
|
||||
return {};
|
||||
}
|
||||
|
||||
auto surface_view =
|
||||
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
|
||||
preserve_contents, true);
|
||||
if (render_targets[index].target)
|
||||
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
||||
render_targets[index].target = surface_view.first;
|
||||
@@ -230,8 +255,14 @@ public:
|
||||
const GPUVAddr src_gpu_addr = src_config.Address();
|
||||
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
||||
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
||||
std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
|
||||
std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
|
||||
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
|
||||
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
|
||||
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
|
||||
const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
|
||||
std::pair<TSurface, TView> dst_surface =
|
||||
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
|
||||
std::pair<TSurface, TView> src_surface =
|
||||
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
|
||||
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
||||
dst_surface.first->MarkAsModified(true, Tick());
|
||||
}
|
||||
@@ -347,13 +378,6 @@ protected:
|
||||
return new_surface;
|
||||
}
|
||||
|
||||
std::pair<TSurface, TView> GetFermiSurface(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& config) {
|
||||
SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config);
|
||||
const GPUVAddr gpu_addr = config.Address();
|
||||
return GetSurface(gpu_addr, params, true, false);
|
||||
}
|
||||
|
||||
Core::System& system;
|
||||
|
||||
private:
|
||||
@@ -614,22 +638,9 @@ private:
|
||||
* left blank.
|
||||
* @param is_render Whether or not the surface is a render target.
|
||||
**/
|
||||
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool preserve_contents, bool is_render) {
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
|
||||
// Step 0: guarantee a valid surface
|
||||
if (!cache_addr) {
|
||||
// Return a null surface if it's invalid
|
||||
SurfaceParams new_params = params;
|
||||
new_params.width = 1;
|
||||
new_params.height = 1;
|
||||
new_params.depth = 1;
|
||||
new_params.block_height = 0;
|
||||
new_params.block_depth = 0;
|
||||
return InitializeSurface(gpu_addr, new_params, false);
|
||||
}
|
||||
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
|
||||
const SurfaceParams& params, bool preserve_contents,
|
||||
bool is_render) {
|
||||
|
||||
// Step 1
|
||||
// Check Level 1 Cache for a fast structural match. If candidate surface
|
||||
@@ -793,6 +804,41 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a null surface based on a target texture.
|
||||
* @param target The target of the null surface.
|
||||
*/
|
||||
TView GetNullSurface(SurfaceTarget target) {
|
||||
const u32 i_target = static_cast<u32>(target);
|
||||
if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) {
|
||||
return it->second->GetMainView();
|
||||
}
|
||||
SurfaceParams params{};
|
||||
params.target = target;
|
||||
params.is_tiled = false;
|
||||
params.srgb_conversion = false;
|
||||
params.is_layered = false;
|
||||
params.block_width = 0;
|
||||
params.block_height = 0;
|
||||
params.block_depth = 0;
|
||||
params.tile_width_spacing = 1;
|
||||
params.width = 1;
|
||||
params.height = 1;
|
||||
params.depth = 1;
|
||||
params.pitch = 4;
|
||||
params.num_levels = 1;
|
||||
params.emulated_levels = 1;
|
||||
params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
|
||||
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
|
||||
auto surface = CreateSurface(0ULL, params);
|
||||
invalid_memory.clear();
|
||||
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
|
||||
surface->UploadTexture(invalid_memory);
|
||||
surface->MarkAsModified(false, Tick());
|
||||
invalid_cache.emplace(i_target, surface);
|
||||
return surface->GetMainView();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the a source and destination starting address and parameters,
|
||||
* and tries to deduce if they are supposed to be depth textures. If so, their
|
||||
@@ -991,6 +1037,11 @@ private:
|
||||
|
||||
std::vector<TSurface> sampled_textures;
|
||||
|
||||
/// This cache stores null surfaces in order to be used as a placeholder
|
||||
/// for invalid texture calls.
|
||||
std::unordered_map<u32, TSurface> invalid_cache;
|
||||
std::vector<u8> invalid_memory;
|
||||
|
||||
StagingCache staging_cache;
|
||||
std::recursive_mutex mutex;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user