Compare commits
130 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6e5b84d1f | ||
|
|
0f7822acb1 | ||
|
|
5a092fb61e | ||
|
|
de644d506f | ||
|
|
bfa6193eb9 | ||
|
|
778043a44c | ||
|
|
2f420618ea | ||
|
|
be660e7749 | ||
|
|
8868fb745f | ||
|
|
edb2114bac | ||
|
|
1394a581f2 | ||
|
|
52bcfac116 | ||
|
|
109df7705f | ||
|
|
32a127faaa | ||
|
|
a8ac99b619 | ||
|
|
c409722435 | ||
|
|
a6ddd7c382 | ||
|
|
b660ef6c8a | ||
|
|
0f57bbfa3f | ||
|
|
2a3d4cad63 | ||
|
|
798ec003ce | ||
|
|
f22d02083c | ||
|
|
e2f5d16540 | ||
|
|
ed2cd9d8f3 | ||
|
|
0bd9bc7201 | ||
|
|
256cb2979b | ||
|
|
cf0aad7d6a | ||
|
|
1799f4e774 | ||
|
|
c09a9e5cc7 | ||
|
|
79aa7b3ace | ||
|
|
35db6e1c68 | ||
|
|
389549b80d | ||
|
|
a6a43a5ae0 | ||
|
|
7c6203dc5e | ||
|
|
36362e9695 | ||
|
|
d563017dfe | ||
|
|
4417770ba9 | ||
|
|
5c9dee2c94 | ||
|
|
6e5d8aac4d | ||
|
|
55ebf68636 | ||
|
|
89d11f2268 | ||
|
|
92021a344c | ||
|
|
c2ea1e1bcb | ||
|
|
42250427c5 | ||
|
|
5633887569 | ||
|
|
e1911e5c8b | ||
|
|
87011a97f9 | ||
|
|
a63a0daa5e | ||
|
|
d89888389d | ||
|
|
b15cbf9bcf | ||
|
|
74ff1db758 | ||
|
|
83e3b77ed7 | ||
|
|
6508cdd003 | ||
|
|
7646f2c21d | ||
|
|
6e122f0b2c | ||
|
|
3626254f48 | ||
|
|
98d2461529 | ||
|
|
bd43c05470 | ||
|
|
c99f5d405b | ||
|
|
3c2ae53b4c | ||
|
|
c95c254f3e | ||
|
|
2293e8a11a | ||
|
|
abcea1bb18 | ||
|
|
678f95e4f8 | ||
|
|
b96f65b62b | ||
|
|
dc27252352 | ||
|
|
03fad5ebe8 | ||
|
|
03fd5aa384 | ||
|
|
e78d681a6c | ||
|
|
354fbe701e | ||
|
|
98671b4cfe | ||
|
|
5b2b6d594c | ||
|
|
e1438f8e91 | ||
|
|
22369df357 | ||
|
|
624def4f38 | ||
|
|
34d4abc4f9 | ||
|
|
c0d2e3212f | ||
|
|
eca3d16e54 | ||
|
|
1b64fae912 | ||
|
|
3d99b449d3 | ||
|
|
c647999c61 | ||
|
|
411f5527d4 | ||
|
|
aaa4822fcb | ||
|
|
623b93a2b3 | ||
|
|
23d3e4a3c4 | ||
|
|
597d8b4bd4 | ||
|
|
9a0c1456e3 | ||
|
|
c5de3c1059 | ||
|
|
3a20e74f40 | ||
|
|
866c1165af | ||
|
|
4a6b9a1a71 | ||
|
|
3a59e724c9 | ||
|
|
4511502ca6 | ||
|
|
bb6d93630f | ||
|
|
74f2e5f1a4 | ||
|
|
70188d69b0 | ||
|
|
3a6714ab7f | ||
|
|
6c0b1a9ee2 | ||
|
|
8c84a7e7ec | ||
|
|
4d10d3113f | ||
|
|
0ee310ebdc | ||
|
|
ee21e4ecd3 | ||
|
|
e68ee43a1a | ||
|
|
104b334e40 | ||
|
|
0ac8848eae | ||
|
|
edbf3144d2 | ||
|
|
f7debcaa04 | ||
|
|
a280822c82 | ||
|
|
bb8ef38152 | ||
|
|
f2d1aa97ad | ||
|
|
1ee1a5d3d6 | ||
|
|
8118ea160b | ||
|
|
b032ebdfee | ||
|
|
e454f7e7a7 | ||
|
|
dd70e097cc | ||
|
|
5616be12be | ||
|
|
5b37cecd76 | ||
|
|
43bf860b22 | ||
|
|
fc153f6bcd | ||
|
|
3b2dee88e6 | ||
|
|
b8b6f94ba9 | ||
|
|
d2b2557542 | ||
|
|
f3f056c3b6 | ||
|
|
31eb658fea | ||
|
|
606a62d4c7 | ||
|
|
efe7b7483b | ||
|
|
ea14af2164 | ||
|
|
4cff5dd194 | ||
|
|
91dddca26e | ||
|
|
cf6a40fc12 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -13,6 +13,9 @@
|
||||
[submodule "soundtouch"]
|
||||
path = externals/soundtouch
|
||||
url = https://github.com/citra-emu/ext-soundtouch.git
|
||||
[submodule "libressl"]
|
||||
path = externals/libressl
|
||||
url = https://github.com/citra-emu/ext-libressl-portable.git
|
||||
[submodule "discord-rpc"]
|
||||
path = externals/discord-rpc
|
||||
url = https://github.com/discordapp/discord-rpc.git
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.11)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
@@ -13,7 +13,7 @@ project(yuzu)
|
||||
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
|
||||
|
||||
option(ENABLE_QT "Enable the Qt frontend" ON)
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF)
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
|
||||
|
||||
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
||||
|
||||
@@ -152,7 +152,6 @@ macro(yuzu_find_packages)
|
||||
"Boost 1.71 boost/1.72.0"
|
||||
"Catch2 2.11 catch2/2.11.0"
|
||||
"fmt 6.2 fmt/6.2.0"
|
||||
"OpenSSL 1.1 openssl/1.1.1f"
|
||||
# can't use until https://github.com/bincrafters/community/issues/1173
|
||||
#"libzip 1.5 libzip/1.5.2@bincrafters/stable"
|
||||
"lz4 1.8 lz4/1.9.2"
|
||||
@@ -312,15 +311,6 @@ elseif (TARGET Boost::boost)
|
||||
add_library(boost ALIAS Boost::boost)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET OpenSSL::SSL)
|
||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
|
||||
endif()
|
||||
if (NOT TARGET OpenSSL::Crypto)
|
||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
|
||||
endif()
|
||||
|
||||
if (TARGET sdl2::sdl2)
|
||||
# imported from the conan generated sdl2Config.cmake
|
||||
set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
|
||||
@@ -51,6 +51,8 @@ endif()
|
||||
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
|
||||
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
|
||||
set(HASH_FILES
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
4
dist/qt_themes/qdarkstyle/style.qss
vendored
4
dist/qt_themes/qdarkstyle/style.qss
vendored
@@ -673,10 +673,6 @@ QTabWidget::pane {
|
||||
border-bottom-left-radius: 2px;
|
||||
}
|
||||
|
||||
QTabWidget::tab-bar {
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
QTabBar {
|
||||
qproperty-drawBase: 0;
|
||||
border-radius: 3px;
|
||||
|
||||
26
externals/CMakeLists.txt
vendored
26
externals/CMakeLists.txt
vendored
@@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
|
||||
# xbyak
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
|
||||
# Catch
|
||||
add_library(catch-single-include INTERFACE)
|
||||
target_include_directories(catch-single-include INTERFACE catch/single_include)
|
||||
@@ -66,6 +73,15 @@ if (NOT LIBZIP_FOUND)
|
||||
endif()
|
||||
|
||||
if (ENABLE_WEB_SERVICE)
|
||||
# LibreSSL
|
||||
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||
target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
|
||||
get_directory_property(OPENSSL_LIBRARIES
|
||||
DIRECTORY libressl
|
||||
DEFINITION OPENSSL_LIBS)
|
||||
|
||||
# lurlparser
|
||||
add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
|
||||
|
||||
@@ -73,13 +89,5 @@ if (ENABLE_WEB_SERVICE)
|
||||
add_library(httplib INTERFACE)
|
||||
target_include_directories(httplib INTERFACE ./httplib)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET xbyak)
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
1
externals/libressl
vendored
Submodule
1
externals/libressl
vendored
Submodule
Submodule externals/libressl added at 7d01cb01cb
2
externals/sirit
vendored
2
externals/sirit
vendored
Submodule externals/sirit updated: a62c5bbc10...eefca56afd
@@ -180,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
|
||||
|
||||
// Copy output header
|
||||
UpdateDataHeader response_data{worker_params};
|
||||
std::vector<u8> output_params(response_data.total_size);
|
||||
if (behavior_info.IsElapsedFrameCountSupported()) {
|
||||
response_data.frame_count = 0x10;
|
||||
response_data.total_size += 0x10;
|
||||
response_data.render_info = sizeof(RendererInfo);
|
||||
response_data.total_size += sizeof(RendererInfo);
|
||||
}
|
||||
|
||||
std::vector<u8> output_params(response_data.total_size);
|
||||
std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
|
||||
|
||||
// Copy output memory pool entries
|
||||
@@ -219,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
|
||||
return Audren::ERR_INVALID_PARAMETERS;
|
||||
}
|
||||
|
||||
if (behavior_info.IsElapsedFrameCountSupported()) {
|
||||
const std::size_t renderer_info_offset{
|
||||
sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
|
||||
response_data.effects_size + response_data.sinks_size +
|
||||
response_data.performance_manager_size + response_data.behavior_size};
|
||||
RendererInfo renderer_info{};
|
||||
renderer_info.elasped_frame_count = elapsed_frame_count;
|
||||
std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
|
||||
sizeof(RendererInfo));
|
||||
}
|
||||
|
||||
return MakeResult(output_params);
|
||||
}
|
||||
|
||||
@@ -447,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
|
||||
}
|
||||
}
|
||||
audio_out->QueueBuffer(stream, tag, std::move(buffer));
|
||||
elapsed_frame_count++;
|
||||
}
|
||||
|
||||
void AudioRenderer::ReleaseAndQueueBuffers() {
|
||||
|
||||
@@ -196,6 +196,12 @@ struct EffectOutStatus {
|
||||
};
|
||||
static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
|
||||
|
||||
struct RendererInfo {
|
||||
u64_le elasped_frame_count{};
|
||||
INSERT_PADDING_WORDS(2);
|
||||
};
|
||||
static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
|
||||
|
||||
struct UpdateDataHeader {
|
||||
UpdateDataHeader() {}
|
||||
|
||||
@@ -209,7 +215,7 @@ struct UpdateDataHeader {
|
||||
mixes_size = 0x0;
|
||||
sinks_size = config.sink_count * 0x20;
|
||||
performance_manager_size = 0x10;
|
||||
frame_count = 0;
|
||||
render_info = 0;
|
||||
total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
|
||||
effects_size + sinks_size + performance_manager_size;
|
||||
}
|
||||
@@ -223,8 +229,8 @@ struct UpdateDataHeader {
|
||||
u32_le mixes_size{};
|
||||
u32_le sinks_size{};
|
||||
u32_le performance_manager_size{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le frame_count{};
|
||||
u32_le splitter_size{};
|
||||
u32_le render_info{};
|
||||
INSERT_PADDING_WORDS(4);
|
||||
u32_le total_size{};
|
||||
};
|
||||
@@ -258,6 +264,7 @@ private:
|
||||
std::unique_ptr<AudioOut> audio_out;
|
||||
StreamPtr stream;
|
||||
Core::Memory::Memory& memory;
|
||||
std::size_t elapsed_frame_count{};
|
||||
};
|
||||
|
||||
} // namespace AudioCore
|
||||
|
||||
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
DEPENDS
|
||||
# WARNING! It was too much work to try and make a common location for this list,
|
||||
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
inline int RegToIndex(const Xbyak::Reg& reg) {
|
||||
inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
|
||||
using Kind = Xbyak::Reg::Kind;
|
||||
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
||||
"RegSet only support GPRs and XMM registers.");
|
||||
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
|
||||
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
||||
}
|
||||
|
||||
inline Xbyak::Reg64 IndexToReg64(int reg_index) {
|
||||
inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
|
||||
ASSERT(reg_index < 16);
|
||||
return Xbyak::Reg64(reg_index);
|
||||
return Xbyak::Reg64(static_cast<int>(reg_index));
|
||||
}
|
||||
|
||||
inline Xbyak::Xmm IndexToXmm(int reg_index) {
|
||||
inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
|
||||
ASSERT(reg_index >= 16 && reg_index < 32);
|
||||
return Xbyak::Xmm(reg_index - 16);
|
||||
return Xbyak::Xmm(static_cast<int>(reg_index - 16));
|
||||
}
|
||||
|
||||
inline Xbyak::Reg IndexToReg(int reg_index) {
|
||||
inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
|
||||
if (reg_index < 16) {
|
||||
return IndexToReg64(reg_index);
|
||||
} else {
|
||||
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
|
||||
|
||||
#endif
|
||||
|
||||
inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||
size_t needed_frame_size, s32* out_subtraction,
|
||||
s32* out_xmm_offset) {
|
||||
struct ABIFrameInfo {
|
||||
s32 subtraction;
|
||||
s32 xmm_offset;
|
||||
};
|
||||
|
||||
inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||
size_t needed_frame_size) {
|
||||
const auto count = (regs & ABI_ALL_GPRS).count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xF;
|
||||
|
||||
*out_subtraction = (s32)subtraction;
|
||||
*out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
|
||||
return ABIFrameInfo{static_cast<s32>(subtraction),
|
||||
static_cast<s32>(subtraction - xmm_base_subtraction)};
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.push(IndexToReg64(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
if (subtraction != 0) {
|
||||
code.sub(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
for (int i = 0; i < regs.count(); i++) {
|
||||
if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
|
||||
code.push(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.subtraction != 0) {
|
||||
code.sub(code.rsp, frame_info.subtraction);
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
|
||||
xmm_offset += 0x10;
|
||||
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
|
||||
frame_info.xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
|
||||
xmm_offset += 0x10;
|
||||
code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
|
||||
frame_info.xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.add(code.rsp, subtraction);
|
||||
if (frame_info.subtraction != 0) {
|
||||
code.add(code.rsp, frame_info.subtraction);
|
||||
}
|
||||
|
||||
// GPRs need to be popped in reverse order
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
if (regs[i]) {
|
||||
code.pop(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment,
|
||||
size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
for (std::size_t j = 0; j < regs.size(); ++j) {
|
||||
const std::size_t i = regs.size() - j - 1;
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.push(IndexToReg64(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.sub(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
return ABI_SHADOW_SPACE;
|
||||
}
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.add(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
// GPRs need to be popped in reverse order
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
if (regs[i]) {
|
||||
code.pop(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,8 @@ public:
|
||||
}
|
||||
|
||||
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
|
||||
UNIMPLEMENTED();
|
||||
UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
|
||||
MemoryReadCode(pc));
|
||||
}
|
||||
|
||||
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
||||
@@ -89,8 +90,6 @@ public:
|
||||
|
||||
ARM_Dynarmic_32& parent;
|
||||
std::size_t num_interpreted_instructions{};
|
||||
u64 tpidrro_el0{};
|
||||
u64 tpidr_el0{};
|
||||
};
|
||||
|
||||
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
|
||||
@@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
|
||||
config.callbacks = cb.get();
|
||||
// TODO(bunnei): Implement page table for 32-bit
|
||||
// config.page_table = &page_table.pointers;
|
||||
config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
|
||||
config.coprocessors[15] = cp15;
|
||||
config.define_unpredictable_behaviour = true;
|
||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||
}
|
||||
@@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::Step() {
|
||||
cb->InterpreterFallback(jit->Regs()[15], 1);
|
||||
jit->Step();
|
||||
}
|
||||
|
||||
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
|
||||
std::size_t core_index)
|
||||
: ARM_Interface{system},
|
||||
cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
|
||||
: ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
|
||||
cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
|
||||
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
|
||||
|
||||
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
|
||||
@@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
|
||||
}
|
||||
|
||||
u64 ARM_Dynarmic_32::GetTlsAddress() const {
|
||||
return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
||||
return cp15->uro;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
|
||||
CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
|
||||
cp15->uro = static_cast<u32>(address);
|
||||
}
|
||||
|
||||
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
|
||||
return cb->tpidr_el0;
|
||||
return cp15->uprw;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
|
||||
cb->tpidr_el0 = value;
|
||||
cp15->uprw = static_cast<u32>(value);
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
|
||||
|
||||
@@ -22,6 +22,7 @@ class Memory;
|
||||
namespace Core {
|
||||
|
||||
class DynarmicCallbacks32;
|
||||
class DynarmicCP15;
|
||||
class DynarmicExclusiveMonitor;
|
||||
class System;
|
||||
|
||||
@@ -66,12 +67,14 @@ private:
|
||||
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
|
||||
|
||||
friend class DynarmicCallbacks32;
|
||||
friend class DynarmicCP15;
|
||||
|
||||
std::unique_ptr<DynarmicCallbacks32> cb;
|
||||
JitCacheType jit_cache;
|
||||
std::shared_ptr<Dynarmic::A32::Jit> jit;
|
||||
std::shared_ptr<DynarmicCP15> cp15;
|
||||
std::size_t core_index;
|
||||
DynarmicExclusiveMonitor& exclusive_monitor;
|
||||
std::array<u32, 84> CP15_regs{};
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -2,79 +2,132 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_32.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
|
||||
using Callback = Dynarmic::A32::Coprocessor::Callback;
|
||||
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
|
||||
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Dynarmic::A32::CoprocReg> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
template <typename FormatContext>
|
||||
auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
|
||||
return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
|
||||
}
|
||||
};
|
||||
|
||||
namespace Core {
|
||||
|
||||
static u32 dummy_value;
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
|
||||
CoprocReg CRd, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
|
||||
CRm, opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
// TODO(merry): Privileged CP15 registers
|
||||
|
||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
|
||||
// CP15_FLUSH_PREFETCH_BUFFER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
|
||||
return &dummy_value;
|
||||
}
|
||||
|
||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
|
||||
switch (opc2) {
|
||||
case 4:
|
||||
// CP15_DATA_SYNC_BARRIER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
|
||||
return &dummy_value;
|
||||
case 5:
|
||||
// CP15_DATA_MEMORY_BARRIER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
|
||||
default:
|
||||
return {};
|
||||
return &dummy_value;
|
||||
}
|
||||
}
|
||||
|
||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
||||
// CP15_THREAD_UPRW
|
||||
return &uprw;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||
opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
// TODO(merry): Privileged CP15 registers
|
||||
|
||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
|
||||
switch (opc2) {
|
||||
case 2:
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
||||
// CP15_THREAD_UPRW
|
||||
return &uprw;
|
||||
case 3:
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
||||
default:
|
||||
return {};
|
||||
// CP15_THREAD_URO
|
||||
return &uro;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||
opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||
if (!two && opc == 0 && CRm == CoprocReg::C14) {
|
||||
// CNTPCT
|
||||
const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
|
||||
[](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
|
||||
ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
|
||||
return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
|
||||
});
|
||||
return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) {
|
||||
if (option) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd, *option);
|
||||
} else {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) {
|
||||
if (option) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd, *option);
|
||||
} else {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -10,128 +10,15 @@
|
||||
#include <dynarmic/A32/coprocessor.h>
|
||||
#include "common/common_types.h"
|
||||
|
||||
enum class CP15Register {
|
||||
// c0 - Information registers
|
||||
CP15_MAIN_ID,
|
||||
CP15_CACHE_TYPE,
|
||||
CP15_TCM_STATUS,
|
||||
CP15_TLB_TYPE,
|
||||
CP15_CPU_ID,
|
||||
CP15_PROCESSOR_FEATURE_0,
|
||||
CP15_PROCESSOR_FEATURE_1,
|
||||
CP15_DEBUG_FEATURE_0,
|
||||
CP15_AUXILIARY_FEATURE_0,
|
||||
CP15_MEMORY_MODEL_FEATURE_0,
|
||||
CP15_MEMORY_MODEL_FEATURE_1,
|
||||
CP15_MEMORY_MODEL_FEATURE_2,
|
||||
CP15_MEMORY_MODEL_FEATURE_3,
|
||||
CP15_ISA_FEATURE_0,
|
||||
CP15_ISA_FEATURE_1,
|
||||
CP15_ISA_FEATURE_2,
|
||||
CP15_ISA_FEATURE_3,
|
||||
CP15_ISA_FEATURE_4,
|
||||
namespace Core {
|
||||
|
||||
// c1 - Control registers
|
||||
CP15_CONTROL,
|
||||
CP15_AUXILIARY_CONTROL,
|
||||
CP15_COPROCESSOR_ACCESS_CONTROL,
|
||||
|
||||
// c2 - Translation table registers
|
||||
CP15_TRANSLATION_BASE_TABLE_0,
|
||||
CP15_TRANSLATION_BASE_TABLE_1,
|
||||
CP15_TRANSLATION_BASE_CONTROL,
|
||||
CP15_DOMAIN_ACCESS_CONTROL,
|
||||
CP15_RESERVED,
|
||||
|
||||
// c5 - Fault status registers
|
||||
CP15_FAULT_STATUS,
|
||||
CP15_INSTR_FAULT_STATUS,
|
||||
CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
|
||||
CP15_INST_FSR,
|
||||
|
||||
// c6 - Fault Address registers
|
||||
CP15_FAULT_ADDRESS,
|
||||
CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
|
||||
CP15_WFAR,
|
||||
CP15_IFAR,
|
||||
|
||||
// c7 - Cache operation registers
|
||||
CP15_WAIT_FOR_INTERRUPT,
|
||||
CP15_PHYS_ADDRESS,
|
||||
CP15_INVALIDATE_INSTR_CACHE,
|
||||
CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
|
||||
CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
|
||||
CP15_FLUSH_PREFETCH_BUFFER,
|
||||
CP15_FLUSH_BRANCH_TARGET_CACHE,
|
||||
CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
|
||||
CP15_INVALIDATE_DATA_CACHE,
|
||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
||||
CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
|
||||
CP15_CLEAN_DATA_CACHE,
|
||||
CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
|
||||
CP15_DATA_SYNC_BARRIER,
|
||||
CP15_DATA_MEMORY_BARRIER,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
||||
|
||||
// c8 - TLB operations
|
||||
CP15_INVALIDATE_ITLB,
|
||||
CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
|
||||
CP15_INVALIDATE_DTLB,
|
||||
CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
|
||||
CP15_INVALIDATE_UTLB,
|
||||
CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
|
||||
|
||||
// c9 - Data cache lockdown register
|
||||
CP15_DATA_CACHE_LOCKDOWN,
|
||||
|
||||
// c10 - TLB/Memory map registers
|
||||
CP15_TLB_LOCKDOWN,
|
||||
CP15_PRIMARY_REGION_REMAP,
|
||||
CP15_NORMAL_REGION_REMAP,
|
||||
|
||||
// c13 - Thread related registers
|
||||
CP15_PID,
|
||||
CP15_CONTEXT_ID,
|
||||
CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
|
||||
CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
|
||||
CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
|
||||
|
||||
// c15 - Performance and TLB lockdown registers
|
||||
CP15_PERFORMANCE_MONITOR_CONTROL,
|
||||
CP15_CYCLE_COUNTER,
|
||||
CP15_COUNT_0,
|
||||
CP15_COUNT_1,
|
||||
CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
|
||||
CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
|
||||
CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
|
||||
CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
|
||||
CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
|
||||
CP15_TLB_DEBUG_CONTROL,
|
||||
|
||||
// Skyeye defined
|
||||
CP15_TLB_FAULT_ADDR,
|
||||
CP15_TLB_FAULT_STATUS,
|
||||
|
||||
// Not an actual register.
|
||||
// All registers should be defined above this.
|
||||
CP15_REGISTER_COUNT,
|
||||
};
|
||||
class ARM_Dynarmic_32;
|
||||
|
||||
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
|
||||
public:
|
||||
using CoprocReg = Dynarmic::A32::CoprocReg;
|
||||
|
||||
explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
|
||||
explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
|
||||
|
||||
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
|
||||
CoprocReg CRn, CoprocReg CRm,
|
||||
@@ -147,6 +34,9 @@ public:
|
||||
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) override;
|
||||
|
||||
private:
|
||||
u32* CP15{};
|
||||
ARM_Dynarmic_32& parent;
|
||||
u32 uprw;
|
||||
u32 uro;
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryAvailable() const {
|
||||
const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
|
||||
page_table->GetTotalHeapSize() + image_size + main_thread_stack_size};
|
||||
page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
|
||||
main_thread_stack_size};
|
||||
|
||||
if (capacity < memory_usage_capacity) {
|
||||
return capacity;
|
||||
@@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsed() const {
|
||||
return image_size + main_thread_stack_size + page_table->GetTotalHeapSize();
|
||||
return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
|
||||
GetSystemResourceSize();
|
||||
}
|
||||
|
||||
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
|
||||
|
||||
@@ -38,7 +38,7 @@ void ReadableEvent::Clear() {
|
||||
|
||||
ResultCode ReadableEvent::Reset() {
|
||||
if (!is_signaled) {
|
||||
LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
|
||||
LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
|
||||
GetObjectId(), GetTypeName(), GetName());
|
||||
return ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
|
||||
const std::size_t index{ResourceTypeToIndex(resource)};
|
||||
|
||||
s64 new_value = current[index] + amount;
|
||||
while (new_value > limit[index] && available[index] + amount <= limit[index]) {
|
||||
if (new_value > limit[index] && available[index] + amount <= limit[index]) {
|
||||
// TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
|
||||
new_value = current[index] + amount;
|
||||
|
||||
if (timeout >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_value <= limit[index]) {
|
||||
|
||||
@@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
|
||||
{40, nullptr, "AcquireXpadIdEventHandle"},
|
||||
{41, nullptr, "ReleaseXpadIdEventHandle"},
|
||||
{51, &Hid::ActivateXpad, "ActivateXpad"},
|
||||
{55, nullptr, "GetXpadIds"},
|
||||
{55, &Hid::GetXpadIDs, "GetXpadIds"},
|
||||
{56, nullptr, "ActivateJoyXpad"},
|
||||
{58, nullptr, "GetJoyXpadLifoHandle"},
|
||||
{59, nullptr, "GetJoyXpadIds"},
|
||||
@@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||
|
||||
LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push(0);
|
||||
}
|
||||
|
||||
void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||
|
||||
@@ -86,6 +86,7 @@ public:
|
||||
private:
|
||||
void CreateAppletResource(Kernel::HLERequestContext& ctx);
|
||||
void ActivateXpad(Kernel::HLERequestContext& ctx);
|
||||
void GetXpadIDs(Kernel::HLERequestContext& ctx);
|
||||
void ActivateDebugPad(Kernel::HLERequestContext& ctx);
|
||||
void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
|
||||
void ActivateMouse(Kernel::HLERequestContext& ctx);
|
||||
|
||||
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
|
||||
case IoctlCommand::IocGetCharacteristicsCommand:
|
||||
return GetCharacteristics(input, output, output2, version);
|
||||
case IoctlCommand::IocGetTPCMasksCommand:
|
||||
return GetTPCMasks(input, output);
|
||||
return GetTPCMasks(input, output, output2, version);
|
||||
case IoctlCommand::IocGetActiveSlotMaskCommand:
|
||||
return GetActiveSlotMask(input, output);
|
||||
case IoctlCommand::IocZcullGetCtxSizeCommand:
|
||||
@@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||
u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
|
||||
std::vector<u8>& output2, IoctlVersion version) {
|
||||
IoctlGpuGetTpcMasksArgs params{};
|
||||
std::memcpy(¶ms, input.data(), input.size());
|
||||
LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
|
||||
params.mask_buf_addr);
|
||||
// TODO(ogniK): Confirm value on hardware
|
||||
if (params.mask_buf_size)
|
||||
params.tpc_mask_size = 4 * 1; // 4 * num_gpc
|
||||
else
|
||||
params.tpc_mask_size = 0;
|
||||
std::memcpy(output.data(), ¶ms, sizeof(params));
|
||||
LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
|
||||
if (params.mask_buffer_size != 0) {
|
||||
params.tcp_mask = 3;
|
||||
}
|
||||
|
||||
if (version == IoctlVersion::Version3) {
|
||||
std::memcpy(output.data(), input.data(), output.size());
|
||||
std::memcpy(output2.data(), ¶ms.tcp_mask, output2.size());
|
||||
} else {
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -92,16 +92,11 @@ private:
|
||||
"IoctlCharacteristics is incorrect size");
|
||||
|
||||
struct IoctlGpuGetTpcMasksArgs {
|
||||
/// [in] TPC mask buffer size reserved by userspace. Should be at least
|
||||
/// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
|
||||
/// [out] full kernel buffer size
|
||||
u32_le mask_buf_size;
|
||||
u32_le reserved;
|
||||
|
||||
/// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
|
||||
/// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
|
||||
u64_le mask_buf_addr;
|
||||
u64_le tpc_mask_size; // Nintendo add this?
|
||||
u32_le mask_buffer_size{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u64_le mask_buffer_address{};
|
||||
u32_le tcp_mask{};
|
||||
INSERT_PADDING_WORDS(1);
|
||||
};
|
||||
static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
|
||||
"IoctlGpuGetTpcMasksArgs is incorrect size");
|
||||
@@ -166,7 +161,8 @@ private:
|
||||
|
||||
u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
|
||||
std::vector<u8>& output2, IoctlVersion version);
|
||||
u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
|
||||
IoctlVersion version);
|
||||
u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
|
||||
|
||||
@@ -437,7 +437,7 @@ struct Values {
|
||||
bool renderer_debug;
|
||||
int vulkan_device;
|
||||
|
||||
float resolution_factor;
|
||||
u16 resolution_factor{1};
|
||||
int aspect_ratio;
|
||||
int max_anisotropy;
|
||||
bool use_frame_limit;
|
||||
@@ -474,6 +474,7 @@ struct Values {
|
||||
bool reporting_services;
|
||||
bool quest_flag;
|
||||
bool disable_cpu_opt;
|
||||
bool disable_macro_jit;
|
||||
|
||||
// BCAT
|
||||
std::string bcat_backend;
|
||||
|
||||
@@ -25,6 +25,12 @@ add_library(video_core STATIC
|
||||
engines/shader_bytecode.h
|
||||
engines/shader_header.h
|
||||
engines/shader_type.h
|
||||
macro/macro.cpp
|
||||
macro/macro.h
|
||||
macro/macro_interpreter.cpp
|
||||
macro/macro_interpreter.h
|
||||
macro/macro_jit_x64.cpp
|
||||
macro/macro_jit_x64.h
|
||||
fence_manager.h
|
||||
gpu.cpp
|
||||
gpu.h
|
||||
@@ -36,8 +42,6 @@ add_library(video_core STATIC
|
||||
gpu_thread.h
|
||||
guest_driver.cpp
|
||||
guest_driver.h
|
||||
macro_interpreter.cpp
|
||||
macro_interpreter.h
|
||||
memory_manager.cpp
|
||||
memory_manager.h
|
||||
morton.cpp
|
||||
@@ -45,11 +49,11 @@ add_library(video_core STATIC
|
||||
query_cache.h
|
||||
rasterizer_accelerated.cpp
|
||||
rasterizer_accelerated.h
|
||||
rasterizer_cache.cpp
|
||||
rasterizer_cache.h
|
||||
rasterizer_interface.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
renderer_opengl/gl_arb_decompiler.cpp
|
||||
renderer_opengl/gl_arb_decompiler.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_device.cpp
|
||||
@@ -89,6 +93,7 @@ add_library(video_core STATIC
|
||||
renderer_opengl/utils.h
|
||||
sampler_cache.cpp
|
||||
sampler_cache.h
|
||||
shader_cache.h
|
||||
shader/decode/arithmetic.cpp
|
||||
shader/decode/arithmetic_immediate.cpp
|
||||
shader/decode/bfe.cpp
|
||||
|
||||
@@ -15,48 +15,47 @@ namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
bool Overlaps(const VAddr start, const VAddr end) const {
|
||||
bool Overlaps(VAddr start, VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
bool IsInside(const VAddr other_start, const VAddr other_end) const {
|
||||
bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
std::size_t GetOffset(const VAddr in_addr) {
|
||||
std::size_t Offset(VAddr in_addr) const {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
VAddr GetCpuAddr() const {
|
||||
VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
VAddr GetCpuAddrEnd() const {
|
||||
VAddr CpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
void SetCpuAddr(const VAddr new_addr) {
|
||||
void SetCpuAddr(VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
std::size_t Size() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
u64 Epoch() const {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
void SetEpoch(u64 new_epoch) {
|
||||
epoch = new_epoch;
|
||||
}
|
||||
|
||||
u64 GetEpoch() {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
|
||||
SetCpuAddr(cpu_addr);
|
||||
explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
|
||||
SetCpuAddr(cpu_addr_);
|
||||
}
|
||||
~BufferBlock() = default;
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
|
||||
@@ -30,12 +30,16 @@
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
|
||||
template <typename Buffer, typename BufferType, typename StreamBuffer>
|
||||
class BufferCache {
|
||||
using IntervalSet = boost::icl::interval_set<VAddr>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
|
||||
|
||||
static constexpr u64 WRITE_PAGE_BIT = 11;
|
||||
static constexpr u64 BLOCK_PAGE_BITS = 21;
|
||||
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
||||
|
||||
public:
|
||||
using BufferInfo = std::pair<BufferType, u64>;
|
||||
|
||||
@@ -56,29 +60,33 @@ public:
|
||||
if (use_fast_cbuf || size < max_stream_size) {
|
||||
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
|
||||
if (use_fast_cbuf) {
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
return ConstBufferUpload(host_ptr, size);
|
||||
u8* dest;
|
||||
if (is_granular) {
|
||||
dest = memory_manager.GetPointer(gpu_addr);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
return ConstBufferUpload(staging_buffer.data(), size);
|
||||
dest = staging_buffer.data();
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
|
||||
}
|
||||
return ConstBufferUpload(dest, size);
|
||||
}
|
||||
if (is_granular) {
|
||||
u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
|
||||
std::memcpy(dest, host_ptr, size);
|
||||
});
|
||||
} else {
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
return StreamBufferUpload(host_ptr, size, alignment);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
return StreamBufferUpload(staging_buffer.data(), size, alignment);
|
||||
}
|
||||
return StreamBufferUpload(
|
||||
size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OwnerBuffer block = GetBlock(cpu_addr, size);
|
||||
Buffer* const block = GetBlock(cpu_addr, size);
|
||||
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||
if (!map) {
|
||||
return {GetEmptyBuffer(size), 0};
|
||||
@@ -94,41 +102,49 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
|
||||
return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))};
|
||||
}
|
||||
|
||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment = 4) {
|
||||
std::lock_guard lock{mutex};
|
||||
return StreamBufferUpload(raw_pointer, size, alignment);
|
||||
return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
|
||||
std::memcpy(dest, raw_pointer, size);
|
||||
});
|
||||
}
|
||||
|
||||
void Map(std::size_t max_size) {
|
||||
/// Prepares the buffer cache for data uploading
|
||||
/// @param max_size Maximum number of bytes that will be uploaded
|
||||
/// @return True when a stream buffer invalidation was required, false otherwise
|
||||
bool Map(std::size_t max_size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
bool invalidated;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
return invalidated;
|
||||
}
|
||||
|
||||
/// Finishes the upload stream, returns true on bindings invalidation.
|
||||
bool Unmap() {
|
||||
/// Finishes the upload stream
|
||||
void Unmap() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
return std::exchange(invalidated, false);
|
||||
}
|
||||
|
||||
/// Function called at the end of each frame, inteded for deferred operations
|
||||
void TickFrame() {
|
||||
++epoch;
|
||||
|
||||
while (!pending_destruction.empty()) {
|
||||
// Delay at least 4 frames before destruction.
|
||||
// This is due to triple buffering happening on some drivers.
|
||||
static constexpr u64 epochs_to_destroy = 5;
|
||||
if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
|
||||
if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
|
||||
break;
|
||||
}
|
||||
pending_destruction.pop_front();
|
||||
pending_destruction.pop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,23 +259,21 @@ public:
|
||||
|
||||
protected:
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
|
||||
stream_buffer_handle{this->stream_buffer->GetHandle()} {}
|
||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)},
|
||||
stream_buffer_handle{stream_buffer->Handle()} {}
|
||||
|
||||
~BufferCache() = default;
|
||||
|
||||
virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
|
||||
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||
|
||||
virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||
|
||||
virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) = 0;
|
||||
|
||||
virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) = 0;
|
||||
|
||||
virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
|
||||
virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) = 0;
|
||||
|
||||
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
||||
@@ -315,7 +329,7 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
std::size_t size) {
|
||||
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
||||
if (overlaps.empty()) {
|
||||
@@ -323,11 +337,11 @@ private:
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
|
||||
UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
|
||||
UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data());
|
||||
}
|
||||
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
||||
}
|
||||
@@ -370,7 +384,7 @@ private:
|
||||
return map;
|
||||
}
|
||||
|
||||
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
|
||||
void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
|
||||
const VectorMapInterval& overlaps) {
|
||||
const IntervalType base_interval{start, end};
|
||||
IntervalSet interval_set{};
|
||||
@@ -380,13 +394,13 @@ private:
|
||||
interval_set.subtract(subtract);
|
||||
}
|
||||
for (auto& interval : interval_set) {
|
||||
std::size_t size = interval.upper() - interval.lower();
|
||||
if (size > 0) {
|
||||
staging_buffer.resize(size);
|
||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(interval.lower()), size,
|
||||
staging_buffer.data());
|
||||
const std::size_t size = interval.upper() - interval.lower();
|
||||
if (size == 0) {
|
||||
continue;
|
||||
}
|
||||
staging_buffer.resize(size);
|
||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||
UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -416,19 +430,23 @@ private:
|
||||
}
|
||||
|
||||
void FlushMap(MapInterval* map) {
|
||||
const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
|
||||
ASSERT_OR_EXECUTE(it != blocks.end(), return;);
|
||||
|
||||
std::shared_ptr<Buffer> block = it->second;
|
||||
|
||||
const std::size_t size = map->end - map->start;
|
||||
OwnerBuffer block = blocks[map->start >> block_page_bits];
|
||||
staging_buffer.resize(size);
|
||||
DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
|
||||
DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data());
|
||||
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
||||
map->MarkAsModified(false, 0);
|
||||
}
|
||||
|
||||
BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
|
||||
std::size_t alignment) {
|
||||
template <typename Callable>
|
||||
BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
|
||||
AlignBuffer(alignment);
|
||||
const std::size_t uploaded_offset = buffer_offset;
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
callable(buffer_ptr);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
@@ -442,97 +460,89 @@ private:
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
|
||||
const std::size_t old_size = buffer->GetSize();
|
||||
const std::size_t new_size = old_size + block_page_size;
|
||||
const VAddr cpu_addr = buffer->GetCpuAddr();
|
||||
OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
|
||||
CopyBlock(buffer, new_buffer, 0, 0, old_size);
|
||||
buffer->SetEpoch(epoch);
|
||||
pending_destruction.push_back(buffer);
|
||||
std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
|
||||
const std::size_t old_size = buffer->Size();
|
||||
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
|
||||
const VAddr cpu_addr = buffer->CpuAddr();
|
||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
|
||||
CopyBlock(*buffer, *new_buffer, 0, 0, old_size);
|
||||
QueueDestruction(std::move(buffer));
|
||||
|
||||
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
blocks.insert_or_assign(page_start, new_buffer);
|
||||
}
|
||||
|
||||
return new_buffer;
|
||||
}
|
||||
|
||||
OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
|
||||
const std::size_t size_1 = first->GetSize();
|
||||
const std::size_t size_2 = second->GetSize();
|
||||
const VAddr first_addr = first->GetCpuAddr();
|
||||
const VAddr second_addr = second->GetCpuAddr();
|
||||
std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
|
||||
std::shared_ptr<Buffer> second) {
|
||||
const std::size_t size_1 = first->Size();
|
||||
const std::size_t size_2 = second->Size();
|
||||
const VAddr first_addr = first->CpuAddr();
|
||||
const VAddr second_addr = second->CpuAddr();
|
||||
const VAddr new_addr = std::min(first_addr, second_addr);
|
||||
const std::size_t new_size = size_1 + size_2;
|
||||
OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
|
||||
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
|
||||
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
|
||||
first->SetEpoch(epoch);
|
||||
second->SetEpoch(epoch);
|
||||
pending_destruction.push_back(first);
|
||||
pending_destruction.push_back(second);
|
||||
|
||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
|
||||
CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1);
|
||||
CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2);
|
||||
QueueDestruction(std::move(first));
|
||||
QueueDestruction(std::move(second));
|
||||
|
||||
const VAddr cpu_addr_end = new_addr + new_size - 1;
|
||||
u64 page_start = new_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
blocks.insert_or_assign(page_start, new_buffer);
|
||||
}
|
||||
return new_buffer;
|
||||
}
|
||||
|
||||
OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
|
||||
OwnerBuffer found;
|
||||
Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
|
||||
std::shared_ptr<Buffer> found;
|
||||
|
||||
const VAddr cpu_addr_end = cpu_addr + size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
auto it = blocks.find(page_start);
|
||||
if (it == blocks.end()) {
|
||||
if (found) {
|
||||
found = EnlargeBlock(found);
|
||||
} else {
|
||||
const VAddr start_addr = (page_start << block_page_bits);
|
||||
found = CreateBlock(start_addr, block_page_size);
|
||||
blocks[page_start] = found;
|
||||
}
|
||||
} else {
|
||||
if (found) {
|
||||
if (found == it->second) {
|
||||
++page_start;
|
||||
continue;
|
||||
}
|
||||
found = MergeBlocks(found, it->second);
|
||||
} else {
|
||||
found = it->second;
|
||||
continue;
|
||||
}
|
||||
const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
|
||||
found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
|
||||
blocks.insert_or_assign(page_start, found);
|
||||
continue;
|
||||
}
|
||||
if (!found) {
|
||||
found = it->second;
|
||||
continue;
|
||||
}
|
||||
if (found != it->second) {
|
||||
found = MergeBlocks(std::move(found), it->second);
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
return found;
|
||||
return found.get();
|
||||
}
|
||||
|
||||
void MarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
void MarkRegionAsWritten(VAddr start, VAddr end) {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
auto it = written_pages.find(page_start);
|
||||
if (it != written_pages.end()) {
|
||||
it->second = it->second + 1;
|
||||
} else {
|
||||
written_pages[page_start] = 1;
|
||||
written_pages.insert_or_assign(page_start, 1);
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
}
|
||||
|
||||
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
void UnmarkRegionAsWritten(VAddr start, VAddr end) {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
auto it = written_pages.find(page_start);
|
||||
if (it != written_pages.end()) {
|
||||
if (it->second > 1) {
|
||||
@@ -541,22 +551,24 @@ private:
|
||||
written_pages.erase(it);
|
||||
}
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsRegionWritten(const VAddr start, const VAddr end) const {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
bool IsRegionWritten(VAddr start, VAddr end) const {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
if (written_pages.count(page_start) > 0) {
|
||||
return true;
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void QueueDestruction(std::shared_ptr<Buffer> buffer) {
|
||||
buffer->SetEpoch(epoch);
|
||||
pending_destruction.push(std::move(buffer));
|
||||
}
|
||||
|
||||
void MarkForAsyncFlush(MapInterval* map) {
|
||||
if (!uncommitted_flushes) {
|
||||
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
|
||||
@@ -568,9 +580,7 @@ private:
|
||||
Core::System& system;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle{};
|
||||
|
||||
bool invalidated = false;
|
||||
BufferType stream_buffer_handle;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
@@ -580,18 +590,15 @@ private:
|
||||
boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
|
||||
mapped_addresses;
|
||||
|
||||
static constexpr u64 write_page_bit = 11;
|
||||
std::unordered_map<u64, u32> written_pages;
|
||||
std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
|
||||
|
||||
static constexpr u64 block_page_bits = 21;
|
||||
static constexpr u64 block_page_size = 1ULL << block_page_bits;
|
||||
std::unordered_map<u64, OwnerBuffer> blocks;
|
||||
|
||||
std::list<OwnerBuffer> pending_destruction;
|
||||
std::queue<std::shared_ptr<Buffer>> pending_destruction;
|
||||
u64 epoch = 0;
|
||||
u64 modified_ticks = 0;
|
||||
|
||||
std::vector<u8> staging_buffer;
|
||||
|
||||
std::list<MapInterval*> marked_for_unregister;
|
||||
|
||||
std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
|
||||
|
||||
@@ -93,6 +93,7 @@ public:
|
||||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||
virtual u32 GetBoundBuffer() const = 0;
|
||||
|
||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||
|
||||
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
|
||||
@@ -219,6 +219,8 @@ public:
|
||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||
|
||||
u32 GetBoundBuffer() const override {
|
||||
return regs.tex_cb_index;
|
||||
}
|
||||
|
||||
@@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
|
||||
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
||||
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
|
||||
macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
|
||||
dirty.flags.flip();
|
||||
|
||||
InitializeRegisterDefaults();
|
||||
}
|
||||
|
||||
@@ -106,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
regs.rasterize_enable = 1;
|
||||
regs.rt_separate_frag_data = 1;
|
||||
regs.framebuffer_srgb = 1;
|
||||
regs.line_width_aliased = 1.0f;
|
||||
regs.line_width_smooth = 1.0f;
|
||||
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
|
||||
regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
|
||||
regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
|
||||
|
||||
shadow_state = regs;
|
||||
|
||||
@@ -116,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
|
||||
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
|
||||
// Reset the current macro.
|
||||
executing_macro = 0;
|
||||
|
||||
@@ -125,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
|
||||
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
|
||||
|
||||
// Execute the current macro.
|
||||
macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
|
||||
macro_engine->Execute(macro_positions[entry], parameters);
|
||||
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
|
||||
FlushMMEInlineDraw();
|
||||
}
|
||||
@@ -161,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (is_last_call) {
|
||||
CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
return;
|
||||
@@ -197,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(macros.data): {
|
||||
ProcessMacroUpload(arg);
|
||||
macro_engine->AddCode(regs.macros.upload_address, arg);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(macros.bind): {
|
||||
@@ -306,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
|
||||
// Call the macro when there are no more parameters in the command buffer
|
||||
if (amount == methods_pending) {
|
||||
CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
|
||||
CallMacroMethod(executing_macro, macro_params);
|
||||
macro_params.clear();
|
||||
}
|
||||
return;
|
||||
@@ -420,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||
ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
|
||||
"upload_address exceeded macro_memory size!");
|
||||
macro_memory[regs.macros.upload_address++] = data;
|
||||
macro_engine->AddCode(regs.macros.upload_address++, data);
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||
@@ -739,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/macro_interpreter.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -598,6 +598,7 @@ public:
|
||||
BitField<4, 3, u32> block_height;
|
||||
BitField<8, 3, u32> block_depth;
|
||||
BitField<12, 1, InvMemoryLayout> type;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
} memory_layout;
|
||||
union {
|
||||
BitField<0, 16, u32> layers;
|
||||
@@ -1403,6 +1404,8 @@ public:
|
||||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||
|
||||
u32 GetBoundBuffer() const override {
|
||||
return regs.tex_cb_index;
|
||||
}
|
||||
@@ -1411,15 +1414,6 @@ public:
|
||||
|
||||
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
||||
|
||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||
/// we've seen used.
|
||||
using MacroMemory = std::array<u32, 0x40000>;
|
||||
|
||||
/// Gets a reference to macro memory.
|
||||
const MacroMemory& GetMacroMemory() const {
|
||||
return macro_memory;
|
||||
}
|
||||
|
||||
bool ShouldExecute() const {
|
||||
return execute_on;
|
||||
}
|
||||
@@ -1468,16 +1462,13 @@ private:
|
||||
|
||||
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
||||
|
||||
/// Memory for macro code
|
||||
MacroMemory macro_memory;
|
||||
|
||||
/// Macro method that is currently being executed / being fed parameters.
|
||||
u32 executing_macro = 0;
|
||||
/// Parameters that have been submitted to the macro call so far.
|
||||
std::vector<u32> macro_params;
|
||||
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
MacroInterpreter macro_interpreter;
|
||||
std::unique_ptr<MacroEngine> macro_engine;
|
||||
|
||||
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
||||
struct {
|
||||
@@ -1506,7 +1497,7 @@ private:
|
||||
* @param num_parameters Number of arguments
|
||||
* @param parameters Arguments to the method call
|
||||
*/
|
||||
void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
|
||||
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
|
||||
|
||||
/// Handles writes to the macro uploading register.
|
||||
void ProcessMacroUpload(u32 data);
|
||||
|
||||
45
src/video_core/macro/macro.cpp
Normal file
45
src/video_core/macro/macro.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
#include "video_core/macro/macro_jit_x64.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
void MacroEngine::AddCode(u32 method, u32 data) {
|
||||
uploaded_macro_code[method].push_back(data);
|
||||
}
|
||||
|
||||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||
auto compiled_macro = macro_cache.find(method);
|
||||
if (compiled_macro != macro_cache.end()) {
|
||||
compiled_macro->second->Execute(parameters, method);
|
||||
} else {
|
||||
// Macro not compiled, check if it's uploaded and if so, compile it
|
||||
auto macro_code = uploaded_macro_code.find(method);
|
||||
if (macro_code == uploaded_macro_code.end()) {
|
||||
UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
|
||||
return;
|
||||
}
|
||||
macro_cache[method] = Compile(macro_code->second);
|
||||
macro_cache[method]->Execute(parameters, method);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
|
||||
if (Settings::values.disable_macro_jit) {
|
||||
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||
}
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
return std::make_unique<MacroJITx64>(maxwell3d);
|
||||
#else
|
||||
return std::make_unique<MacroInterpreter>(maxwell3d);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
128
src/video_core/macro/macro.h
Normal file
128
src/video_core/macro/macro.h
Normal file
@@ -0,0 +1,128 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
namespace Macro {
|
||||
constexpr std::size_t NUM_MACRO_REGISTERS = 8;
|
||||
enum class Operation : u32 {
|
||||
ALU = 0,
|
||||
AddImmediate = 1,
|
||||
ExtractInsert = 2,
|
||||
ExtractShiftLeftImmediate = 3,
|
||||
ExtractShiftLeftRegister = 4,
|
||||
Read = 5,
|
||||
Unused = 6, // This operation doesn't seem to be a valid encoding.
|
||||
Branch = 7,
|
||||
};
|
||||
|
||||
enum class ALUOperation : u32 {
|
||||
Add = 0,
|
||||
AddWithCarry = 1,
|
||||
Subtract = 2,
|
||||
SubtractWithBorrow = 3,
|
||||
// Operations 4-7 don't seem to be valid encodings.
|
||||
Xor = 8,
|
||||
Or = 9,
|
||||
And = 10,
|
||||
AndNot = 11,
|
||||
Nand = 12
|
||||
};
|
||||
|
||||
enum class ResultOperation : u32 {
|
||||
IgnoreAndFetch = 0,
|
||||
Move = 1,
|
||||
MoveAndSetMethod = 2,
|
||||
FetchAndSend = 3,
|
||||
MoveAndSend = 4,
|
||||
FetchAndSetMethod = 5,
|
||||
MoveAndSetMethodFetchAndSend = 6,
|
||||
MoveAndSetMethodSend = 7
|
||||
};
|
||||
|
||||
enum class BranchCondition : u32 {
|
||||
Zero = 0,
|
||||
NotZero = 1,
|
||||
};
|
||||
|
||||
union Opcode {
|
||||
u32 raw;
|
||||
BitField<0, 3, Operation> operation;
|
||||
BitField<4, 3, ResultOperation> result_operation;
|
||||
BitField<4, 1, BranchCondition> branch_condition;
|
||||
// If set on a branch, then the branch doesn't have a delay slot.
|
||||
BitField<5, 1, u32> branch_annul;
|
||||
BitField<7, 1, u32> is_exit;
|
||||
BitField<8, 3, u32> dst;
|
||||
BitField<11, 3, u32> src_a;
|
||||
BitField<14, 3, u32> src_b;
|
||||
// The signed immediate overlaps the second source operand and the alu operation.
|
||||
BitField<14, 18, s32> immediate;
|
||||
|
||||
BitField<17, 5, ALUOperation> alu_operation;
|
||||
|
||||
// Bitfield instructions data
|
||||
BitField<17, 5, u32> bf_src_bit;
|
||||
BitField<22, 5, u32> bf_size;
|
||||
BitField<27, 5, u32> bf_dst_bit;
|
||||
|
||||
u32 GetBitfieldMask() const {
|
||||
return (1 << bf_size) - 1;
|
||||
}
|
||||
|
||||
s32 GetBranchTarget() const {
|
||||
return static_cast<s32>(immediate * sizeof(u32));
|
||||
}
|
||||
};
|
||||
|
||||
union MethodAddress {
|
||||
u32 raw;
|
||||
BitField<0, 12, u32> address;
|
||||
BitField<12, 6, u32> increment;
|
||||
};
|
||||
|
||||
} // namespace Macro
|
||||
|
||||
class CachedMacro {
|
||||
public:
|
||||
virtual ~CachedMacro() = default;
|
||||
/**
|
||||
* Executes the macro code with the specified input parameters.
|
||||
* @param code The macro byte code to execute
|
||||
* @param parameters The parameters of the macro
|
||||
*/
|
||||
virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
|
||||
};
|
||||
|
||||
class MacroEngine {
|
||||
public:
|
||||
virtual ~MacroEngine() = default;
|
||||
|
||||
// Store the uploaded macro code to compile them when they're called.
|
||||
void AddCode(u32 method, u32 data);
|
||||
|
||||
// Compiles the macro if its not in the cache, and executes the compiled macro
|
||||
void Execute(u32 method, const std::vector<u32>& parameters);
|
||||
|
||||
protected:
|
||||
virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
|
||||
|
||||
private:
|
||||
std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache;
|
||||
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
||||
};
|
||||
|
||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
} // namespace Tegra
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
@@ -6,109 +6,46 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro_interpreter.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
|
||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace Tegra {
|
||||
namespace {
|
||||
enum class Operation : u32 {
|
||||
ALU = 0,
|
||||
AddImmediate = 1,
|
||||
ExtractInsert = 2,
|
||||
ExtractShiftLeftImmediate = 3,
|
||||
ExtractShiftLeftRegister = 4,
|
||||
Read = 5,
|
||||
Unused = 6, // This operation doesn't seem to be a valid encoding.
|
||||
Branch = 7,
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
enum class MacroInterpreter::ALUOperation : u32 {
|
||||
Add = 0,
|
||||
AddWithCarry = 1,
|
||||
Subtract = 2,
|
||||
SubtractWithBorrow = 3,
|
||||
// Operations 4-7 don't seem to be valid encodings.
|
||||
Xor = 8,
|
||||
Or = 9,
|
||||
And = 10,
|
||||
AndNot = 11,
|
||||
Nand = 12
|
||||
};
|
||||
|
||||
enum class MacroInterpreter::ResultOperation : u32 {
|
||||
IgnoreAndFetch = 0,
|
||||
Move = 1,
|
||||
MoveAndSetMethod = 2,
|
||||
FetchAndSend = 3,
|
||||
MoveAndSend = 4,
|
||||
FetchAndSetMethod = 5,
|
||||
MoveAndSetMethodFetchAndSend = 6,
|
||||
MoveAndSetMethodSend = 7
|
||||
};
|
||||
|
||||
enum class MacroInterpreter::BranchCondition : u32 {
|
||||
Zero = 0,
|
||||
NotZero = 1,
|
||||
};
|
||||
|
||||
union MacroInterpreter::Opcode {
|
||||
u32 raw;
|
||||
BitField<0, 3, Operation> operation;
|
||||
BitField<4, 3, ResultOperation> result_operation;
|
||||
BitField<4, 1, BranchCondition> branch_condition;
|
||||
// If set on a branch, then the branch doesn't have a delay slot.
|
||||
BitField<5, 1, u32> branch_annul;
|
||||
BitField<7, 1, u32> is_exit;
|
||||
BitField<8, 3, u32> dst;
|
||||
BitField<11, 3, u32> src_a;
|
||||
BitField<14, 3, u32> src_b;
|
||||
// The signed immediate overlaps the second source operand and the alu operation.
|
||||
BitField<14, 18, s32> immediate;
|
||||
|
||||
BitField<17, 5, ALUOperation> alu_operation;
|
||||
|
||||
// Bitfield instructions data
|
||||
BitField<17, 5, u32> bf_src_bit;
|
||||
BitField<22, 5, u32> bf_size;
|
||||
BitField<27, 5, u32> bf_dst_bit;
|
||||
|
||||
u32 GetBitfieldMask() const {
|
||||
return (1 << bf_size) - 1;
|
||||
}
|
||||
|
||||
s32 GetBranchTarget() const {
|
||||
return static_cast<s32>(immediate * sizeof(u32));
|
||||
}
|
||||
};
|
||||
|
||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
|
||||
void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) {
|
||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
|
||||
const std::vector<u32>& code)
|
||||
: maxwell3d(maxwell3d), code(code) {}
|
||||
|
||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
MICROPROFILE_SCOPE(MacroInterp);
|
||||
Reset();
|
||||
|
||||
registers[1] = parameters[0];
|
||||
num_parameters = parameters.size();
|
||||
|
||||
if (num_parameters > parameters_capacity) {
|
||||
parameters_capacity = num_parameters;
|
||||
this->parameters = std::make_unique<u32[]>(num_parameters);
|
||||
}
|
||||
std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32));
|
||||
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
|
||||
this->num_parameters = num_parameters;
|
||||
|
||||
// Execute the code until we hit an exit condition.
|
||||
bool keep_executing = true;
|
||||
while (keep_executing) {
|
||||
keep_executing = Step(offset, false);
|
||||
keep_executing = Step(false);
|
||||
}
|
||||
|
||||
// Assert the the macro used all the input parameters
|
||||
ASSERT(next_parameter_index == num_parameters);
|
||||
}
|
||||
|
||||
void MacroInterpreter::Reset() {
|
||||
void MacroInterpreterImpl::Reset() {
|
||||
registers = {};
|
||||
pc = 0;
|
||||
delayed_pc = {};
|
||||
@@ -120,10 +57,10 @@ void MacroInterpreter::Reset() {
|
||||
carry_flag = false;
|
||||
}
|
||||
|
||||
bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
bool MacroInterpreterImpl::Step(bool is_delay_slot) {
|
||||
u32 base_address = pc;
|
||||
|
||||
Opcode opcode = GetOpcode(offset);
|
||||
Macro::Opcode opcode = GetOpcode();
|
||||
pc += 4;
|
||||
|
||||
// Update the program counter if we were delayed
|
||||
@@ -134,18 +71,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
}
|
||||
|
||||
switch (opcode.operation) {
|
||||
case Operation::ALU: {
|
||||
case Macro::Operation::ALU: {
|
||||
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
|
||||
GetRegister(opcode.src_b));
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Operation::AddImmediate: {
|
||||
case Macro::Operation::AddImmediate: {
|
||||
ProcessResult(opcode.result_operation, opcode.dst,
|
||||
GetRegister(opcode.src_a) + opcode.immediate);
|
||||
break;
|
||||
}
|
||||
case Operation::ExtractInsert: {
|
||||
case Macro::Operation::ExtractInsert: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
@@ -155,7 +92,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
ProcessResult(opcode.result_operation, opcode.dst, dst);
|
||||
break;
|
||||
}
|
||||
case Operation::ExtractShiftLeftImmediate: {
|
||||
case Macro::Operation::ExtractShiftLeftImmediate: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
@@ -164,7 +101,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Operation::ExtractShiftLeftRegister: {
|
||||
case Macro::Operation::ExtractShiftLeftRegister: {
|
||||
u32 dst = GetRegister(opcode.src_a);
|
||||
u32 src = GetRegister(opcode.src_b);
|
||||
|
||||
@@ -173,12 +110,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Operation::Read: {
|
||||
case Macro::Operation::Read: {
|
||||
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
|
||||
ProcessResult(opcode.result_operation, opcode.dst, result);
|
||||
break;
|
||||
}
|
||||
case Operation::Branch: {
|
||||
case Macro::Operation::Branch: {
|
||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||
u32 value = GetRegister(opcode.src_a);
|
||||
bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
|
||||
@@ -191,7 +128,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
|
||||
delayed_pc = base_address + opcode.GetBranchTarget();
|
||||
// Execute one more instruction due to the delay slot.
|
||||
return Step(offset, true);
|
||||
return Step(true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -204,51 +141,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||
// cause an exit if it's executed inside a delay slot.
|
||||
if (opcode.is_exit && !is_delay_slot) {
|
||||
// Exit has a delay slot, execute the next instruction
|
||||
Step(offset, true);
|
||||
Step(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
|
||||
const auto& macro_memory{maxwell3d.GetMacroMemory()};
|
||||
ASSERT((pc % sizeof(u32)) == 0);
|
||||
ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
|
||||
return {macro_memory[offset + pc / sizeof(u32)]};
|
||||
}
|
||||
|
||||
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) {
|
||||
u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
|
||||
switch (operation) {
|
||||
case ALUOperation::Add: {
|
||||
case Macro::ALUOperation::Add: {
|
||||
const u64 result{static_cast<u64>(src_a) + src_b};
|
||||
carry_flag = result > 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case ALUOperation::AddWithCarry: {
|
||||
case Macro::ALUOperation::AddWithCarry: {
|
||||
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
|
||||
carry_flag = result > 0xffffffff;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case ALUOperation::Subtract: {
|
||||
case Macro::ALUOperation::Subtract: {
|
||||
const u64 result{static_cast<u64>(src_a) - src_b};
|
||||
carry_flag = result < 0x100000000;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case ALUOperation::SubtractWithBorrow: {
|
||||
case Macro::ALUOperation::SubtractWithBorrow: {
|
||||
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
|
||||
carry_flag = result < 0x100000000;
|
||||
return static_cast<u32>(result);
|
||||
}
|
||||
case ALUOperation::Xor:
|
||||
case Macro::ALUOperation::Xor:
|
||||
return src_a ^ src_b;
|
||||
case ALUOperation::Or:
|
||||
case Macro::ALUOperation::Or:
|
||||
return src_a | src_b;
|
||||
case ALUOperation::And:
|
||||
case Macro::ALUOperation::And:
|
||||
return src_a & src_b;
|
||||
case ALUOperation::AndNot:
|
||||
case Macro::ALUOperation::AndNot:
|
||||
return src_a & ~src_b;
|
||||
case ALUOperation::Nand:
|
||||
case Macro::ALUOperation::Nand:
|
||||
return ~(src_a & src_b);
|
||||
|
||||
default:
|
||||
@@ -257,43 +187,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b)
|
||||
}
|
||||
}
|
||||
|
||||
void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) {
|
||||
void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
|
||||
switch (operation) {
|
||||
case ResultOperation::IgnoreAndFetch:
|
||||
case Macro::ResultOperation::IgnoreAndFetch:
|
||||
// Fetch parameter and ignore result.
|
||||
SetRegister(reg, FetchParameter());
|
||||
break;
|
||||
case ResultOperation::Move:
|
||||
case Macro::ResultOperation::Move:
|
||||
// Move result.
|
||||
SetRegister(reg, result);
|
||||
break;
|
||||
case ResultOperation::MoveAndSetMethod:
|
||||
case Macro::ResultOperation::MoveAndSetMethod:
|
||||
// Move result and use as Method Address.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
break;
|
||||
case ResultOperation::FetchAndSend:
|
||||
case Macro::ResultOperation::FetchAndSend:
|
||||
// Fetch parameter and send result.
|
||||
SetRegister(reg, FetchParameter());
|
||||
Send(result);
|
||||
break;
|
||||
case ResultOperation::MoveAndSend:
|
||||
case Macro::ResultOperation::MoveAndSend:
|
||||
// Move and send result.
|
||||
SetRegister(reg, result);
|
||||
Send(result);
|
||||
break;
|
||||
case ResultOperation::FetchAndSetMethod:
|
||||
case Macro::ResultOperation::FetchAndSetMethod:
|
||||
// Fetch parameter and use result as Method Address.
|
||||
SetRegister(reg, FetchParameter());
|
||||
SetMethodAddress(result);
|
||||
break;
|
||||
case ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||
// Move result and use as Method Address, then fetch and send parameter.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
Send(FetchParameter());
|
||||
break;
|
||||
case ResultOperation::MoveAndSetMethodSend:
|
||||
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||
SetRegister(reg, result);
|
||||
SetMethodAddress(result);
|
||||
@@ -304,16 +234,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
|
||||
}
|
||||
}
|
||||
|
||||
u32 MacroInterpreter::FetchParameter() {
|
||||
ASSERT(next_parameter_index < num_parameters);
|
||||
return parameters[next_parameter_index++];
|
||||
bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
|
||||
switch (cond) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
return value == 0;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
return value != 0;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return true;
|
||||
}
|
||||
|
||||
u32 MacroInterpreter::GetRegister(u32 register_id) const {
|
||||
Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
|
||||
ASSERT((pc % sizeof(u32)) == 0);
|
||||
ASSERT(pc < code.size() * sizeof(u32));
|
||||
return {code[pc / sizeof(u32)]};
|
||||
}
|
||||
|
||||
u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
|
||||
return registers.at(register_id);
|
||||
}
|
||||
|
||||
void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
|
||||
void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
|
||||
// Register 0 is hardwired as the zero register.
|
||||
// Ensure no writes to it actually occur.
|
||||
if (register_id == 0) {
|
||||
@@ -323,30 +265,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
|
||||
registers.at(register_id) = value;
|
||||
}
|
||||
|
||||
void MacroInterpreter::SetMethodAddress(u32 address) {
|
||||
void MacroInterpreterImpl::SetMethodAddress(u32 address) {
|
||||
method_address.raw = address;
|
||||
}
|
||||
|
||||
void MacroInterpreter::Send(u32 value) {
|
||||
void MacroInterpreterImpl::Send(u32 value) {
|
||||
maxwell3d.CallMethodFromMME(method_address.address, value);
|
||||
// Increment the method address by the method increment.
|
||||
method_address.address.Assign(method_address.address.Value() +
|
||||
method_address.increment.Value());
|
||||
}
|
||||
|
||||
u32 MacroInterpreter::Read(u32 method) const {
|
||||
u32 MacroInterpreterImpl::Read(u32 method) const {
|
||||
return maxwell3d.GetRegisterValue(method);
|
||||
}
|
||||
|
||||
bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const {
|
||||
switch (cond) {
|
||||
case BranchCondition::Zero:
|
||||
return value == 0;
|
||||
case BranchCondition::NotZero:
|
||||
return value != 0;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return true;
|
||||
u32 MacroInterpreterImpl::FetchParameter() {
|
||||
ASSERT(next_parameter_index < num_parameters);
|
||||
return parameters[next_parameter_index++];
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
@@ -1,44 +1,37 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
class MacroInterpreter final {
|
||||
class MacroInterpreter final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
/**
|
||||
* Executes the macro code with the specified input parameters.
|
||||
* @param offset Offset to start execution at.
|
||||
* @param parameters The parameters of the macro.
|
||||
*/
|
||||
void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
|
||||
private:
|
||||
enum class ALUOperation : u32;
|
||||
enum class BranchCondition : u32;
|
||||
enum class ResultOperation : u32;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
union Opcode;
|
||||
|
||||
union MethodAddress {
|
||||
u32 raw;
|
||||
BitField<0, 12, u32> address;
|
||||
BitField<12, 6, u32> increment;
|
||||
};
|
||||
class MacroInterpreterImpl : public CachedMacro {
|
||||
public:
|
||||
MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
private:
|
||||
/// Resets the execution engine state, zeroing registers, etc.
|
||||
void Reset();
|
||||
|
||||
@@ -49,20 +42,20 @@ private:
|
||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||
* previous instruction.
|
||||
*/
|
||||
bool Step(u32 offset, bool is_delay_slot);
|
||||
bool Step(bool is_delay_slot);
|
||||
|
||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b);
|
||||
u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
|
||||
|
||||
/// Performs the result operation on the input result and stores it in the specified register
|
||||
/// (if necessary).
|
||||
void ProcessResult(ResultOperation operation, u32 reg, u32 result);
|
||||
void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
|
||||
|
||||
/// Evaluates the branch condition and returns whether the branch should be taken or not.
|
||||
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
|
||||
bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
|
||||
|
||||
/// Reads an opcode at the current program counter location.
|
||||
Opcode GetOpcode(u32 offset) const;
|
||||
Macro::Opcode GetOpcode() const;
|
||||
|
||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||
u32 GetRegister(u32 register_id) const;
|
||||
@@ -89,13 +82,11 @@ private:
|
||||
/// Program counter to execute at after the delay slot is executed.
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
static constexpr std::size_t NumMacroRegisters = 8;
|
||||
|
||||
/// General purpose macro registers.
|
||||
std::array<u32, NumMacroRegisters> registers = {};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
|
||||
|
||||
/// Method address to use for the next Send instruction.
|
||||
MethodAddress method_address = {};
|
||||
Macro::MethodAddress method_address = {};
|
||||
|
||||
/// Input parameters of the current macro.
|
||||
std::unique_ptr<u32[]> parameters;
|
||||
@@ -105,5 +96,7 @@ private:
|
||||
u32 next_parameter_index = 0;
|
||||
|
||||
bool carry_flag = false;
|
||||
const std::vector<u32>& code;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
613
src/video_core/macro/macro_jit_x64.cpp
Normal file
613
src/video_core/macro/macro_jit_x64.cpp
Normal file
@@ -0,0 +1,613 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/x64/xbyak_util.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/macro/macro_interpreter.h"
|
||||
#include "video_core/macro/macro_jit_x64.h"
|
||||
|
||||
MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47));
|
||||
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
|
||||
|
||||
namespace Tegra {
|
||||
static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
|
||||
static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
|
||||
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
|
||||
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
|
||||
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
|
||||
|
||||
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
STATE,
|
||||
RESULT,
|
||||
PARAMETERS,
|
||||
METHOD_ADDRESS,
|
||||
BRANCH_HOLDER,
|
||||
});
|
||||
|
||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||
|
||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
|
||||
return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
|
||||
}
|
||||
|
||||
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
|
||||
: Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
|
||||
Compile();
|
||||
}
|
||||
|
||||
MacroJITx64Impl::~MacroJITx64Impl() = default;
|
||||
|
||||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
MICROPROFILE_SCOPE(MacroJitExecute);
|
||||
ASSERT_OR_EXECUTE(program != nullptr, { return; });
|
||||
JITState state{};
|
||||
state.maxwell3d = &maxwell3d;
|
||||
state.registers = {};
|
||||
program(&state, parameters.data());
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||
const bool is_a_zero = opcode.src_a == 0;
|
||||
const bool is_b_zero = opcode.src_b == 0;
|
||||
const bool valid_operation = !is_a_zero && !is_b_zero;
|
||||
const bool is_move_operation = !is_a_zero && is_b_zero;
|
||||
const bool has_zero_register = is_a_zero || is_b_zero;
|
||||
const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||
opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
|
||||
|
||||
Xbyak::Reg32 src_a;
|
||||
Xbyak::Reg32 src_b;
|
||||
|
||||
if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
} else {
|
||||
if (!is_a_zero) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
}
|
||||
if (!is_b_zero) {
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
}
|
||||
}
|
||||
Xbyak::Label skip_carry{};
|
||||
|
||||
bool has_emitted = false;
|
||||
|
||||
switch (opcode.alu_operation) {
|
||||
case Macro::ALUOperation::Add:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
add(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
add(src_a, src_b);
|
||||
}
|
||||
|
||||
if (!optimizer.can_skip_carry) {
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::AddWithCarry:
|
||||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||
adc(src_a, src_b);
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
break;
|
||||
case Macro::ALUOperation::Subtract:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
sub(src_a, src_b);
|
||||
has_emitted = true;
|
||||
}
|
||||
} else {
|
||||
sub(src_a, src_b);
|
||||
has_emitted = true;
|
||||
}
|
||||
if (!optimizer.can_skip_carry && has_emitted) {
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::SubtractWithBorrow:
|
||||
bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
|
||||
sbb(src_a, src_b);
|
||||
setc(byte[STATE + offsetof(JITState, carry_flag)]);
|
||||
break;
|
||||
case Macro::ALUOperation::Xor:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
xor_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
xor_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::Or:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (valid_operation) {
|
||||
or_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
or_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::And:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!has_zero_register) {
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::AndNot:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!is_a_zero) {
|
||||
not_(src_b);
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
} else {
|
||||
not_(src_b);
|
||||
and_(src_a, src_b);
|
||||
}
|
||||
break;
|
||||
case Macro::ALUOperation::Nand:
|
||||
if (optimizer.zero_reg_skip) {
|
||||
if (!is_a_zero) {
|
||||
and_(src_a, src_b);
|
||||
not_(src_a);
|
||||
}
|
||||
} else {
|
||||
and_(src_a, src_b);
|
||||
not_(src_a);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
|
||||
static_cast<std::size_t>(opcode.alu_operation.Value()));
|
||||
break;
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
|
||||
if (optimizer.skip_dummy_addimmediate) {
|
||||
// Games tend to use this as an exit instruction placeholder. It's to encode an instruction
|
||||
// without doing anything. In our case we can just not emit anything.
|
||||
if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Check for redundant moves
|
||||
if (optimizer.optimize_for_method_move &&
|
||||
opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
|
||||
if (next_opcode.has_value()) {
|
||||
const auto next = *next_opcode;
|
||||
if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
|
||||
opcode.dst == next.dst) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||
if (opcode.immediate == 0) {
|
||||
xor_(RESULT, RESULT);
|
||||
} else {
|
||||
mov(RESULT, opcode.immediate);
|
||||
}
|
||||
} else {
|
||||
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
if (opcode.immediate > 2) {
|
||||
add(result, opcode.immediate);
|
||||
} else if (opcode.immediate == 1) {
|
||||
inc(result);
|
||||
} else if (opcode.immediate < 0) {
|
||||
sub(result, opcode.immediate * -1);
|
||||
}
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
|
||||
auto dst = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
auto src = Compile_GetRegister(opcode.src_b, eax);
|
||||
|
||||
if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
|
||||
shr(src, opcode.bf_src_bit);
|
||||
} else if (opcode.bf_src_bit == 31) {
|
||||
xor_(src, src);
|
||||
}
|
||||
// Don't bother masking the whole register since we're using a 32 bit register
|
||||
if (opcode.bf_size != 31 && opcode.bf_size != 0) {
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
} else if (opcode.bf_size == 0) {
|
||||
xor_(src, src);
|
||||
}
|
||||
if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
|
||||
shl(src, opcode.bf_dst_bit);
|
||||
} else if (opcode.bf_dst_bit == 31) {
|
||||
xor_(src, src);
|
||||
}
|
||||
|
||||
const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
|
||||
if (mask != 0xffffffff) {
|
||||
and_(dst, mask);
|
||||
}
|
||||
or_(dst, src);
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
|
||||
auto dst = Compile_GetRegister(opcode.src_a, eax);
|
||||
auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||
|
||||
shr(src, al);
|
||||
if (opcode.bf_size != 0 && opcode.bf_size != 31) {
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
} else if (opcode.bf_size == 0) {
|
||||
xor_(src, src);
|
||||
}
|
||||
|
||||
if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
|
||||
shl(src, opcode.bf_dst_bit);
|
||||
} else if (opcode.bf_dst_bit == 31) {
|
||||
xor_(src, src);
|
||||
}
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
|
||||
auto dst = Compile_GetRegister(opcode.src_a, eax);
|
||||
auto src = Compile_GetRegister(opcode.src_b, RESULT);
|
||||
|
||||
if (opcode.bf_src_bit != 0) {
|
||||
shr(src, opcode.bf_src_bit);
|
||||
}
|
||||
|
||||
if (opcode.bf_size != 31) {
|
||||
and_(src, opcode.GetBitfieldMask());
|
||||
}
|
||||
shl(src, al);
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) {
|
||||
return maxwell3d->GetRegisterValue(method);
|
||||
}
|
||||
|
||||
static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
|
||||
maxwell3d->CallMethodFromMME(method_address.address, value);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
|
||||
if (optimizer.zero_reg_skip && opcode.src_a == 0) {
|
||||
if (opcode.immediate == 0) {
|
||||
xor_(RESULT, RESULT);
|
||||
} else {
|
||||
mov(RESULT, opcode.immediate);
|
||||
}
|
||||
} else {
|
||||
auto result = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
if (opcode.immediate > 2) {
|
||||
add(result, opcode.immediate);
|
||||
} else if (opcode.immediate == 1) {
|
||||
inc(result);
|
||||
} else if (opcode.immediate < 0) {
|
||||
sub(result, opcode.immediate * -1);
|
||||
}
|
||||
}
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(Common::X64::ABI_PARAM1, qword[STATE]);
|
||||
mov(Common::X64::ABI_PARAM2, RESULT);
|
||||
Common::X64::CallFarFunction(*this, &Read);
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(RESULT, Common::X64::ABI_RETURN.cvt32());
|
||||
Compile_ProcessResult(opcode.result_operation, opcode.dst);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
mov(Common::X64::ABI_PARAM1, qword[STATE]);
|
||||
mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
|
||||
mov(Common::X64::ABI_PARAM3, value);
|
||||
Common::X64::CallFarFunction(*this, &Send);
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
|
||||
|
||||
Xbyak::Label dont_process{};
|
||||
// Get increment
|
||||
test(METHOD_ADDRESS, 0x3f000);
|
||||
// If zero, method address doesn't update
|
||||
je(dont_process);
|
||||
|
||||
mov(ecx, METHOD_ADDRESS);
|
||||
and_(METHOD_ADDRESS, 0xfff);
|
||||
shr(ecx, 12);
|
||||
and_(ecx, 0x3f);
|
||||
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
|
||||
sal(ecx, 12);
|
||||
or_(eax, ecx);
|
||||
|
||||
mov(METHOD_ADDRESS, eax);
|
||||
|
||||
L(dont_process);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
|
||||
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
|
||||
const s32 jump_address =
|
||||
static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
|
||||
|
||||
Xbyak::Label end;
|
||||
auto value = Compile_GetRegister(opcode.src_a, eax);
|
||||
test(value, value);
|
||||
if (optimizer.has_delayed_pc) {
|
||||
switch (opcode.branch_condition) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
jne(end, T_NEAR);
|
||||
break;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
je(end, T_NEAR);
|
||||
break;
|
||||
}
|
||||
|
||||
if (opcode.branch_annul) {
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(labels[jump_address], T_NEAR);
|
||||
} else {
|
||||
Xbyak::Label handle_post_exit{};
|
||||
Xbyak::Label skip{};
|
||||
jmp(skip, T_NEAR);
|
||||
if (opcode.is_exit) {
|
||||
L(handle_post_exit);
|
||||
// Execute 1 instruction
|
||||
mov(BRANCH_HOLDER, end_of_code);
|
||||
// Jump to next instruction to skip delay slot check
|
||||
jmp(labels[jump_address], T_NEAR);
|
||||
} else {
|
||||
L(handle_post_exit);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(labels[jump_address], T_NEAR);
|
||||
}
|
||||
L(skip);
|
||||
mov(BRANCH_HOLDER, handle_post_exit);
|
||||
jmp(delay_skip[pc], T_NEAR);
|
||||
}
|
||||
} else {
|
||||
switch (opcode.branch_condition) {
|
||||
case Macro::BranchCondition::Zero:
|
||||
je(labels[jump_address], T_NEAR);
|
||||
break;
|
||||
case Macro::BranchCondition::NotZero:
|
||||
jne(labels[jump_address], T_NEAR);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
L(end);
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
|
||||
optimizer.can_skip_carry = true;
|
||||
optimizer.has_delayed_pc = false;
|
||||
for (auto raw_op : code) {
|
||||
Macro::Opcode op{};
|
||||
op.raw = raw_op;
|
||||
|
||||
if (op.operation == Macro::Operation::ALU) {
|
||||
// Scan for any ALU operations which actually use the carry flag, if they don't exist in
|
||||
// our current code we can skip emitting the carry flag handling operations
|
||||
if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
|
||||
op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
|
||||
optimizer.can_skip_carry = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (op.operation == Macro::Operation::Branch) {
|
||||
if (!op.branch_annul) {
|
||||
optimizer.has_delayed_pc = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile() {
|
||||
MICROPROFILE_SCOPE(MacroJitCompile);
|
||||
bool keep_executing = true;
|
||||
labels.fill(Xbyak::Label());
|
||||
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||
// JIT state
|
||||
mov(STATE, Common::X64::ABI_PARAM1);
|
||||
mov(PARAMETERS, Common::X64::ABI_PARAM2);
|
||||
xor_(RESULT, RESULT);
|
||||
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
|
||||
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
|
||||
|
||||
// Track get register for zero registers and mark it as no-op
|
||||
optimizer.zero_reg_skip = true;
|
||||
|
||||
// AddImmediate tends to be used as a NOP instruction, if we detect this we can
|
||||
// completely skip the entire code path and no emit anything
|
||||
optimizer.skip_dummy_addimmediate = true;
|
||||
|
||||
// SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
|
||||
// one if our register isn't "dirty"
|
||||
optimizer.optimize_for_method_move = true;
|
||||
|
||||
// Check to see if we can skip emitting certain instructions
|
||||
Optimizer_ScanFlags();
|
||||
|
||||
const u32 op_count = static_cast<u32>(code.size());
|
||||
for (u32 i = 0; i < op_count; i++) {
|
||||
if (i < op_count - 1) {
|
||||
pc = i + 1;
|
||||
next_opcode = GetOpCode();
|
||||
} else {
|
||||
next_opcode = {};
|
||||
}
|
||||
pc = i;
|
||||
Compile_NextInstruction();
|
||||
}
|
||||
|
||||
L(end_of_code);
|
||||
|
||||
Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||
ret();
|
||||
ready();
|
||||
program = getCode<ProgramType>();
|
||||
}
|
||||
|
||||
bool MacroJITx64Impl::Compile_NextInstruction() {
|
||||
const auto opcode = GetOpCode();
|
||||
if (labels[pc].getAddress()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
L(labels[pc]);
|
||||
|
||||
switch (opcode.operation) {
|
||||
case Macro::Operation::ALU:
|
||||
Compile_ALU(opcode);
|
||||
break;
|
||||
case Macro::Operation::AddImmediate:
|
||||
Compile_AddImmediate(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractInsert:
|
||||
Compile_ExtractInsert(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractShiftLeftImmediate:
|
||||
Compile_ExtractShiftLeftImmediate(opcode);
|
||||
break;
|
||||
case Macro::Operation::ExtractShiftLeftRegister:
|
||||
Compile_ExtractShiftLeftRegister(opcode);
|
||||
break;
|
||||
case Macro::Operation::Read:
|
||||
Compile_Read(opcode);
|
||||
break;
|
||||
case Macro::Operation::Branch:
|
||||
Compile_Branch(opcode);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
if (optimizer.has_delayed_pc) {
|
||||
if (opcode.is_exit) {
|
||||
mov(rax, end_of_code);
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
cmove(BRANCH_HOLDER, rax);
|
||||
// Jump to next instruction to skip delay slot check
|
||||
je(labels[pc + 1], T_NEAR);
|
||||
} else {
|
||||
// TODO(ogniK): Optimize delay slot branching
|
||||
Xbyak::Label no_delay_slot{};
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
je(no_delay_slot, T_NEAR);
|
||||
mov(rax, BRANCH_HOLDER);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jmp(rax);
|
||||
L(no_delay_slot);
|
||||
}
|
||||
L(delay_skip[pc]);
|
||||
if (opcode.is_exit) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
test(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
jne(end_of_code, T_NEAR);
|
||||
if (opcode.is_exit) {
|
||||
inc(BRANCH_HOLDER);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
|
||||
mov(eax, dword[PARAMETERS]);
|
||||
add(PARAMETERS, sizeof(u32));
|
||||
return eax;
|
||||
}
|
||||
|
||||
Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
|
||||
if (index == 0) {
|
||||
// Register 0 is always zero
|
||||
xor_(dst, dst);
|
||||
} else {
|
||||
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
|
||||
auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
|
||||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
||||
// register.
|
||||
if (reg == 0) {
|
||||
return;
|
||||
}
|
||||
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
|
||||
};
|
||||
auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
|
||||
|
||||
switch (operation) {
|
||||
case Macro::ResultOperation::IgnoreAndFetch:
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::Move:
|
||||
SetRegister(reg, RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethod:
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSend:
|
||||
// Fetch parameter and send result.
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSend:
|
||||
// Move and send result.
|
||||
SetRegister(reg, RESULT);
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::FetchAndSetMethod:
|
||||
// Fetch parameter and use result as Method Address.
|
||||
SetRegister(reg, Compile_FetchParameter());
|
||||
SetMethodAddress(RESULT);
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
|
||||
// Move result and use as Method Address, then fetch and send parameter.
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
Compile_Send(Compile_FetchParameter());
|
||||
break;
|
||||
case Macro::ResultOperation::MoveAndSetMethodSend:
|
||||
// Move result and use as Method Address, then send bits 12:17 of result.
|
||||
SetRegister(reg, RESULT);
|
||||
SetMethodAddress(RESULT);
|
||||
shr(RESULT, 12);
|
||||
and_(RESULT, 0b111111);
|
||||
Compile_Send(RESULT);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
|
||||
}
|
||||
}
|
||||
|
||||
Macro::Opcode MacroJITx64Impl::GetOpCode() const {
|
||||
ASSERT(pc < code.size());
|
||||
return {code[pc]};
|
||||
}
|
||||
|
||||
std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
|
||||
return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
97
src/video_core/macro/macro_jit_x64.h
Normal file
97
src/video_core/macro/macro_jit_x64.h
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <xbyak.h>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
#include "video_core/macro/macro.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
}
|
||||
|
||||
/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
|
||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
|
||||
|
||||
class MacroJITx64 final : public MacroEngine {
|
||||
public:
|
||||
explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
|
||||
|
||||
private:
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
|
||||
public:
|
||||
MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
|
||||
~MacroJITx64Impl();
|
||||
|
||||
void Execute(const std::vector<u32>& parameters, u32 method) override;
|
||||
|
||||
void Compile_ALU(Macro::Opcode opcode);
|
||||
void Compile_AddImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractInsert(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
|
||||
void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
|
||||
void Compile_Read(Macro::Opcode opcode);
|
||||
void Compile_Branch(Macro::Opcode opcode);
|
||||
|
||||
private:
|
||||
void Optimizer_ScanFlags();
|
||||
|
||||
void Compile();
|
||||
bool Compile_NextInstruction();
|
||||
|
||||
Xbyak::Reg32 Compile_FetchParameter();
|
||||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||
|
||||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||
void Compile_Send(Xbyak::Reg32 value);
|
||||
|
||||
Macro::Opcode GetOpCode() const;
|
||||
std::bitset<32> PersistentCallerSavedRegs() const;
|
||||
|
||||
struct JITState {
|
||||
Engines::Maxwell3D* maxwell3d{};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||
u32 carry_flag{};
|
||||
};
|
||||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||
using ProgramType = void (*)(JITState*, const u32*);
|
||||
|
||||
struct OptimizerState {
|
||||
bool can_skip_carry{};
|
||||
bool has_delayed_pc{};
|
||||
bool zero_reg_skip{};
|
||||
bool skip_dummy_addimmediate{};
|
||||
bool optimize_for_method_move{};
|
||||
};
|
||||
OptimizerState optimizer{};
|
||||
|
||||
std::optional<Macro::Opcode> next_opcode{};
|
||||
ProgramType program{nullptr};
|
||||
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||
Xbyak::Label end_of_code{};
|
||||
|
||||
bool is_delay_slot{};
|
||||
u32 pc{};
|
||||
std::optional<u32> delayed_pc;
|
||||
|
||||
const std::vector<u32>& code;
|
||||
Engines::Maxwell3D& maxwell3d;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
@@ -1,7 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
|
||||
RasterizerCacheObject::~RasterizerCacheObject() = default;
|
||||
@@ -1,253 +0,0 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/range/iterator_range_core.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
class RasterizerCacheObject {
|
||||
public:
|
||||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
|
||||
|
||||
virtual ~RasterizerCacheObject();
|
||||
|
||||
VAddr GetCpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
virtual std::size_t GetSizeInBytes() const = 0;
|
||||
|
||||
/// Sets whether the cached object should be considered registered
|
||||
void SetIsRegistered(bool registered) {
|
||||
is_registered = registered;
|
||||
}
|
||||
|
||||
/// Returns true if the cached object is registered
|
||||
bool IsRegistered() const {
|
||||
return is_registered;
|
||||
}
|
||||
|
||||
/// Returns true if the cached object is dirty
|
||||
bool IsDirty() const {
|
||||
return is_dirty;
|
||||
}
|
||||
|
||||
/// Returns ticks from when this cached object was last modified
|
||||
u64 GetLastModifiedTicks() const {
|
||||
return last_modified_ticks;
|
||||
}
|
||||
|
||||
/// Marks an object as recently modified, used to specify whether it is clean or dirty
|
||||
template <class T>
|
||||
void MarkAsModified(bool dirty, T& cache) {
|
||||
is_dirty = dirty;
|
||||
last_modified_ticks = cache.GetModifiedTicks();
|
||||
}
|
||||
|
||||
void SetMemoryMarked(bool is_memory_marked_) {
|
||||
is_memory_marked = is_memory_marked_;
|
||||
}
|
||||
|
||||
bool IsMemoryMarked() const {
|
||||
return is_memory_marked;
|
||||
}
|
||||
|
||||
void SetSyncPending(bool is_sync_pending_) {
|
||||
is_sync_pending = is_sync_pending_;
|
||||
}
|
||||
|
||||
bool IsSyncPending() const {
|
||||
return is_sync_pending;
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
||||
bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
|
||||
bool is_sync_pending{}; ///< Whether the object is pending deletion.
|
||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
||||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class RasterizerCache : NonCopyable {
|
||||
friend class RasterizerCacheObject;
|
||||
|
||||
public:
|
||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
||||
|
||||
/// Write any cached resources overlapping the specified region back to memory
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
FlushObject(object);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark the specified region as being invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
if (!object->IsRegistered()) {
|
||||
// Skip duplicates
|
||||
continue;
|
||||
}
|
||||
Unregister(object);
|
||||
}
|
||||
}
|
||||
|
||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
|
||||
if (object->IsRegistered()) {
|
||||
UnmarkMemory(object);
|
||||
object->SetSyncPending(true);
|
||||
marked_for_unregister.emplace_back(object);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SyncGuestHost() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
for (const auto& object : marked_for_unregister) {
|
||||
if (object->IsRegistered()) {
|
||||
object->SetSyncPending(false);
|
||||
Unregister(object);
|
||||
}
|
||||
}
|
||||
marked_for_unregister.clear();
|
||||
}
|
||||
|
||||
/// Invalidates everything in the cache
|
||||
void InvalidateAll() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
while (interval_cache.begin() != interval_cache.end()) {
|
||||
Unregister(*interval_cache.begin()->second.begin());
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Tries to get an object from the cache with the specified cache address
|
||||
T TryGet(VAddr addr) const {
|
||||
const auto iter = map_cache.find(addr);
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Register an object into the cache
|
||||
virtual void Register(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
object->SetIsRegistered(true);
|
||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.insert({object->GetCpuAddr(), object});
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
||||
object->SetMemoryMarked(true);
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
virtual void Unregister(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
UnmarkMemory(object);
|
||||
object->SetIsRegistered(false);
|
||||
if (object->IsSyncPending()) {
|
||||
marked_for_unregister.remove(object);
|
||||
object->SetSyncPending(false);
|
||||
}
|
||||
const VAddr addr = object->GetCpuAddr();
|
||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.erase(addr);
|
||||
}
|
||||
|
||||
void UnmarkMemory(const T& object) {
|
||||
if (!object->IsMemoryMarked()) {
|
||||
return;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||
object->SetMemoryMarked(false);
|
||||
}
|
||||
|
||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
||||
u64 GetModifiedTicks() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
virtual void FlushObjectInner(const T& object) = 0;
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
FlushObjectInner(object);
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<T> objects;
|
||||
const ObjectInterval interval{addr, addr + size};
|
||||
for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
|
||||
for (auto& cached_object : pair.second) {
|
||||
if (!cached_object) {
|
||||
continue;
|
||||
}
|
||||
objects.push_back(cached_object);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
|
||||
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
|
||||
});
|
||||
|
||||
return objects;
|
||||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
using ObjectInterval = typename IntervalCache::interval_type;
|
||||
|
||||
static auto GetInterval(const T& object) {
|
||||
return ObjectInterval::right_open(object->GetCpuAddr(),
|
||||
object->GetCpuAddr() + object->GetSizeInBytes());
|
||||
}
|
||||
|
||||
ObjectCache map_cache;
|
||||
IntervalCache interval_cache; ///< Cache of objects
|
||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
std::list<T> marked_for_unregister;
|
||||
};
|
||||
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
File diff suppressed because it is too large
Load Diff
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -22,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
const Device& device, std::size_t stream_size)
|
||||
@@ -48,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
}
|
||||
|
||||
Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(cpu_addr, size);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
|
||||
return buffer->GetHandle();
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(cpu_addr, size);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
@@ -62,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
|
||||
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) {
|
||||
glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
||||
glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
@@ -70,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
||||
glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) {
|
||||
glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
|
||||
glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint& cbuf = cbufs[cbuf_cursor++];
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0};
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
@@ -24,17 +23,12 @@ class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CachedBufferBlock;
|
||||
|
||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
|
||||
class CachedBufferBlock : public VideoCommon::BufferBlock {
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
explicit Buffer(VAddr cpu_addr, const std::size_t size);
|
||||
~Buffer();
|
||||
|
||||
GLuint GetHandle() const {
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
@@ -42,6 +36,7 @@ private:
|
||||
OGLBuffer gl_buffer;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
@@ -55,9 +50,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
GLuint ToHandle(const Buffer& buffer) override;
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
|
||||
GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
|
||||
constexpr std::array LimitUBOs = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSSBOs = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSamplers = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS};
|
||||
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||
|
||||
constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
|
||||
GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
|
||||
constexpr std::array LimitImages = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
return std::exchange(base, base + amount);
|
||||
}
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||
|
||||
@@ -112,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
u32 base_images = 0;
|
||||
|
||||
// Reserve more image bindings on fragment and vertex stages.
|
||||
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||
// fragment stage, and at least 1 for the rest of the stages.
|
||||
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
|
||||
bindings[0].image =
|
||||
Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
||||
// Reserve the other image bindings.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 2);
|
||||
for (std::size_t i = 2; i < NumStages; ++i) {
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
if (stage == 4) {
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||
}
|
||||
@@ -133,6 +152,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
}
|
||||
|
||||
bool IsASTCSupported() {
|
||||
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
|
||||
static constexpr std::array formats = {
|
||||
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
||||
@@ -149,25 +169,43 @@ bool IsASTCSupported() {
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
||||
};
|
||||
return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
|
||||
GLint supported;
|
||||
glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
|
||||
&supported);
|
||||
return supported == GL_TRUE;
|
||||
}) == formats.end();
|
||||
static constexpr std::array required_support = {
|
||||
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
|
||||
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
|
||||
};
|
||||
|
||||
for (const GLenum target : targets) {
|
||||
for (const GLenum format : formats) {
|
||||
for (const GLenum support : required_support) {
|
||||
GLint value;
|
||||
glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value);
|
||||
if (value != GL_FULL_SUPPORT) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||
const bool is_intel = vendor == "Intel";
|
||||
const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
|
||||
|
||||
bool disable_fast_buffer_sub_data = false;
|
||||
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
@@ -182,10 +220,11 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_broken_compute = is_intel_proprietary;
|
||||
has_fast_buffer_sub_data = is_nvidia;
|
||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||
GLAD_GL_NV_compute_program5;
|
||||
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
||||
GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||
@@ -197,7 +236,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
uniform_buffer_alignment = 0;
|
||||
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
||||
uniform_buffer_alignment = 4;
|
||||
shader_storage_alignment = 4;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_warp_intrinsics = true;
|
||||
@@ -205,9 +246,6 @@ Device::Device(std::nullptr_t) {
|
||||
has_vertex_viewport_layer = true;
|
||||
has_image_load_formatted = true;
|
||||
has_variable_aoffi = true;
|
||||
has_component_indexing_bug = false;
|
||||
has_broken_compute = false;
|
||||
has_precise_bug = false;
|
||||
}
|
||||
|
||||
bool Device::TestVariableAoffi() {
|
||||
|
||||
@@ -24,6 +24,10 @@ public:
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
@@ -80,14 +84,14 @@ public:
|
||||
return has_precise_bug;
|
||||
}
|
||||
|
||||
bool HasBrokenCompute() const {
|
||||
return has_broken_compute;
|
||||
}
|
||||
|
||||
bool HasFastBufferSubData() const {
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
bool HasNvViewportArray2() const {
|
||||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
@@ -96,7 +100,8 @@ private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
@@ -109,8 +114,8 @@ private:
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_broken_compute{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool use_assembly_shaders{};
|
||||
};
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@@ -54,15 +55,33 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr std::size_t NumSupportedVertexAttributes = 16;
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
if (entry.is_bindless) {
|
||||
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(tex_handle);
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
@@ -87,6 +106,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
return buffer.size;
|
||||
}
|
||||
|
||||
/// Translates hardware transform feedback indices
|
||||
/// @param location Hardware location
|
||||
/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
|
||||
/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
|
||||
std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
|
||||
const u8 index = location / 4;
|
||||
if (index >= 8 && index <= 39) {
|
||||
return {GL_GENERIC_ATTRIB_NV, index - 8};
|
||||
}
|
||||
if (index >= 48 && index <= 55) {
|
||||
return {GL_TEXTURE_COORD_NV, index - 48};
|
||||
}
|
||||
switch (index) {
|
||||
case 7:
|
||||
return {GL_POSITION, 0};
|
||||
case 40:
|
||||
return {GL_PRIMARY_COLOR_NV, 0};
|
||||
case 41:
|
||||
return {GL_SECONDARY_COLOR_NV, 0};
|
||||
case 42:
|
||||
return {GL_BACK_PRIMARY_COLOR_NV, 0};
|
||||
case 43:
|
||||
return {GL_BACK_SECONDARY_COLOR_NV, 0};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
|
||||
return {GL_POSITION, 0};
|
||||
}
|
||||
|
||||
void oglEnable(GLenum cap, bool state) {
|
||||
(state ? glEnable : glDisable)(cap);
|
||||
}
|
||||
@@ -104,6 +151,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||
CheckExtensions();
|
||||
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
for (const GLuint cbuf : staging_cbufs) {
|
||||
@@ -273,7 +323,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
Shader* const shader = shader_cache.GetStageProgram(program);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
||||
@@ -567,7 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
buffer_cache.Map(buffer_size);
|
||||
const bool invalidated = buffer_cache.Map(buffer_size);
|
||||
|
||||
if (invalidated) {
|
||||
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
|
||||
auto& dirty = gpu.dirty.flags;
|
||||
dirty[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
dirty[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
@@ -655,10 +714,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
if (device.HasBrokenCompute()) {
|
||||
return;
|
||||
}
|
||||
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
@@ -837,7 +892,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT = {
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
@@ -846,34 +901,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
const auto& entries = shader->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||
|
||||
u32 binding =
|
||||
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
|
||||
for (const auto& entry : shader->GetEntries().const_buffers) {
|
||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
|
||||
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const u32 index = entry.GetIndex();
|
||||
const auto& buffer = shader_stage.const_buffers[index];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||
entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : kernel->GetEntries().const_buffers) {
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry) {
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
if (device.UseAssemblyShaders()) {
|
||||
@@ -889,23 +966,32 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
// UBO alignment requirements.
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const auto alignment = device.GetUniformBufferAlignment();
|
||||
auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
||||
device.HasFastBufferSubData());
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
||||
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||
|
||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||
const GPUVAddr gpu_addr = buffer.address;
|
||||
auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UNIMPLEMENTED_IF(use_unified);
|
||||
if (offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
||||
cbuf = staging_cbuf;
|
||||
offset = 0;
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
||||
return;
|
||||
}
|
||||
if (offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
||||
cbuf = staging_cbuf;
|
||||
offset = 0;
|
||||
|
||||
if (use_unified) {
|
||||
glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||
@@ -920,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||
@@ -943,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
@@ -956,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& compute = system.GPU().KeplerCompute();
|
||||
u32 binding = 0;
|
||||
@@ -985,7 +1071,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
@@ -995,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||
const auto& compute = system.GPU().KeplerCompute();
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
@@ -1024,6 +1110,26 @@ void RasterizerOpenGL::SyncViewport() {
|
||||
const auto& regs = gpu.regs;
|
||||
|
||||
const bool dirty_viewport = flags[Dirty::Viewports];
|
||||
const bool dirty_clip_control = flags[Dirty::ClipControl];
|
||||
|
||||
if (dirty_clip_control || flags[Dirty::FrontFace]) {
|
||||
flags[Dirty::FrontFace] = false;
|
||||
|
||||
GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
|
||||
if (regs.screen_y_control.triangle_rast_flip != 0 &&
|
||||
regs.viewport_transform[0].scale_y < 0.0f) {
|
||||
switch (mode) {
|
||||
case GL_CW:
|
||||
mode = GL_CCW;
|
||||
break;
|
||||
case GL_CCW:
|
||||
mode = GL_CW;
|
||||
break;
|
||||
}
|
||||
}
|
||||
glFrontFace(mode);
|
||||
}
|
||||
|
||||
if (dirty_viewport || flags[Dirty::ClipControl]) {
|
||||
flags[Dirty::ClipControl] = false;
|
||||
|
||||
@@ -1121,11 +1227,6 @@ void RasterizerOpenGL::SyncCullMode() {
|
||||
glDisable(GL_CULL_FACE);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags[Dirty::FrontFace]) {
|
||||
flags[Dirty::FrontFace] = false;
|
||||
glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncPrimitiveRestart() {
|
||||
@@ -1496,12 +1597,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
||||
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncTransformFeedback() {
|
||||
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
|
||||
// when this is required.
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
|
||||
static constexpr std::size_t STRIDE = 3;
|
||||
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
|
||||
std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
|
||||
|
||||
GLint* cursor = attribs.data();
|
||||
GLint* current_stream = streams.data();
|
||||
|
||||
for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
|
||||
const auto& layout = regs.tfb_layouts[feedback];
|
||||
UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
|
||||
if (layout.varying_count == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*current_stream = static_cast<GLint>(feedback);
|
||||
if (current_stream != streams.data()) {
|
||||
// When stepping one stream, push the expected token
|
||||
cursor[0] = GL_NEXT_BUFFER_NV;
|
||||
cursor[1] = 0;
|
||||
cursor[2] = 0;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
++current_stream;
|
||||
|
||||
const auto& locations = regs.tfb_varying_locs[feedback];
|
||||
std::optional<u8> current_index;
|
||||
for (u32 offset = 0; offset < layout.varying_count; ++offset) {
|
||||
const u8 location = locations[offset];
|
||||
const u8 index = location / 4;
|
||||
|
||||
if (current_index == index) {
|
||||
// Increase number of components of the previous attachment
|
||||
++cursor[-2];
|
||||
continue;
|
||||
}
|
||||
current_index = index;
|
||||
|
||||
std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
|
||||
cursor[1] = 1;
|
||||
cursor += STRIDE;
|
||||
}
|
||||
}
|
||||
|
||||
const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
|
||||
const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
|
||||
glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
|
||||
GL_INTERLEAVED_ATTRIBS);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
SyncTransformFeedback();
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||
@@ -1528,6 +1687,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
// We may have to call BeginTransformFeedbackNV here since they seem to call different
|
||||
// implementations on Nvidia's driver (the pointer is different) but we are using
|
||||
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
|
||||
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "video_core/engines/const_buffer_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
@@ -100,40 +99,41 @@ private:
|
||||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(const Shader& kernel);
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry);
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(const Shader& kernel);
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(const Shader& kernel);
|
||||
void SetupComputeTextures(Shader* kernel);
|
||||
|
||||
/// Configures a texture.
|
||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader& shader);
|
||||
void SetupComputeImages(Shader* shader);
|
||||
|
||||
/// Configures an image.
|
||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
@@ -201,6 +201,10 @@ private:
|
||||
/// Syncs the framebuffer sRGB state to match the guest state
|
||||
void SyncFramebufferSRGB();
|
||||
|
||||
/// Syncs transform feedback state to match guest state
|
||||
/// @note Only valid on assembly shaders
|
||||
void SyncTransformFeedback();
|
||||
|
||||
/// Begin a transform feedback
|
||||
void BeginTransformFeedback(GLenum primitive_mode);
|
||||
|
||||
@@ -253,6 +257,7 @@ private:
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
@@ -29,6 +30,7 @@
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@@ -147,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
const std::string arb = "Not implemented";
|
||||
const std::string arb =
|
||||
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
|
||||
|
||||
GLuint& arb_prog = program->assembly_program.handle;
|
||||
|
||||
@@ -194,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, ProgramSharedPtr program_)
|
||||
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
||||
size_in_bytes{size_in_bytes}, program{std::move(program_)} {
|
||||
// Assign either the assembly program or source program. We can't have both.
|
||||
Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
|
||||
ProgramSharedPtr program_)
|
||||
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
|
||||
handle = program->assembly_program.handle;
|
||||
if (handle == 0) {
|
||||
handle = program->source_program.handle;
|
||||
@@ -207,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
ASSERT(handle != 0);
|
||||
}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
Shader::~Shader() = default;
|
||||
|
||||
GLuint CachedShader::GetHandle() const {
|
||||
GLuint Shader::GetHandle() const {
|
||||
DEBUG_ASSERT(registry->IsConsistent());
|
||||
return handle;
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type, ProgramCode code,
|
||||
ProgramCode code_b) {
|
||||
std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode code, ProgramCode code_b) {
|
||||
const auto shader_type = GetShaderType(program_type);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
@@ -241,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
||||
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code) {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto& engine = params.system.GPU().KeplerCompute();
|
||||
@@ -265,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute),
|
||||
std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader,
|
||||
std::size_t size_in_bytes) {
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
|
||||
precompiled_shader.entries, precompiled_shader.program));
|
||||
std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader) {
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
Core::Frontend::EmuWindow& emu_window, const Device& device)
|
||||
: RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
|
||||
disk_cache{system} {}
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
|
||||
emu_window{emu_window}, device{device}, disk_cache{system} {}
|
||||
|
||||
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
|
||||
|
||||
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
PrecompiledShader shader;
|
||||
shader.program = std::move(program);
|
||||
shader.registry = std::move(registry);
|
||||
shader.entries = MakeEntries(ir);
|
||||
shader.entries = MakeEntries(device, ir, entry.type);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
if (callback) {
|
||||
@@ -434,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||
return program;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
|
||||
return last_shaders[static_cast<std::size_t>(program)];
|
||||
}
|
||||
@@ -444,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
|
||||
// Look up shader in the cache based on address
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||
Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
|
||||
if (shader) {
|
||||
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
@@ -466,30 +467,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> shader;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
|
||||
std::move(code_b));
|
||||
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
|
||||
} else {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||
shader = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = shader.get();
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
|
||||
} else {
|
||||
null_shader = shader;
|
||||
null_shader = std::move(shader);
|
||||
}
|
||||
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
return last_shaders[static_cast<std::size_t>(program)] = result;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
||||
|
||||
auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||
if (kernel) {
|
||||
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
@@ -501,20 +501,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> kernel;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
|
||||
kernel = Shader::CreateKernelFromMemory(params, std::move(code));
|
||||
} else {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||
kernel = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = kernel.get();
|
||||
if (cpu_addr) {
|
||||
Register(kernel);
|
||||
Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
|
||||
} else {
|
||||
null_kernel = kernel;
|
||||
null_kernel = std::move(kernel);
|
||||
}
|
||||
return kernel;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -18,12 +18,12 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
@@ -35,12 +35,10 @@ class EmuWindow;
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class CachedShader;
|
||||
class Device;
|
||||
class RasterizerOpenGL;
|
||||
struct UnspecializedShader;
|
||||
|
||||
using Shader = std::shared_ptr<CachedShader>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ProgramHandle {
|
||||
@@ -64,62 +62,53 @@ struct ShaderParameters {
|
||||
u64 unique_identifier;
|
||||
};
|
||||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
class Shader final {
|
||||
public:
|
||||
~CachedShader();
|
||||
~Shader();
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
|
||||
/// Returns the size in bytes of the shader
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size_in_bytes;
|
||||
}
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
static Shader CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b);
|
||||
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code,
|
||||
ProgramCode program_code_b);
|
||||
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code);
|
||||
|
||||
static Shader CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader,
|
||||
std::size_t size_in_bytes);
|
||||
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader);
|
||||
|
||||
private:
|
||||
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, ProgramSharedPtr program);
|
||||
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||
ProgramSharedPtr program);
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
std::size_t size_in_bytes = 0;
|
||||
ProgramSharedPtr program;
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
Core::Frontend::EmuWindow& emu_window, const Device& device);
|
||||
~ShaderCacheOpenGL() override;
|
||||
|
||||
/// Loads disk cache for the current game
|
||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
private:
|
||||
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||
@@ -132,10 +121,10 @@ private:
|
||||
ShaderDiskCacheOpenGL disk_cache;
|
||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||
|
||||
Shader null_shader{};
|
||||
Shader null_kernel{};
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
@@ -61,8 +61,8 @@ struct TextureDerivates {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||
|
||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||
#define ftou floatBitsToUint
|
||||
@@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||
// We waste one UBO for emulation
|
||||
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||
return num_ubos > num_available_ubos;
|
||||
}
|
||||
|
||||
struct GenericVaryingDescription {
|
||||
std::string name;
|
||||
u8 first_element = 0;
|
||||
@@ -412,8 +419,9 @@ class GLSLDecompiler final {
|
||||
public:
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
||||
: device{device}, ir{ir}, registry{registry}, stage{stage},
|
||||
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
|
||||
: device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
|
||||
suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||
UseUnifiedUniforms(device, ir, stage)} {
|
||||
if (stage != ShaderType::Compute) {
|
||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||
}
|
||||
@@ -618,7 +626,9 @@ private:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
|
||||
|
||||
if (stage != ShaderType::Geometry &&
|
||||
(stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
|
||||
if (ir.UsesLayer()) {
|
||||
code.AddLine("int gl_Layer;");
|
||||
}
|
||||
@@ -647,6 +657,16 @@ private:
|
||||
--code.scope;
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
|
||||
if (stage == ShaderType::Geometry) {
|
||||
if (ir.UsesLayer()) {
|
||||
code.AddLine("out int gl_Layer;");
|
||||
}
|
||||
if (ir.UsesViewportIndex()) {
|
||||
code.AddLine("out int gl_ViewportIndex;");
|
||||
}
|
||||
}
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
void DeclareRegisters() {
|
||||
@@ -834,12 +854,24 @@ private:
|
||||
}
|
||||
|
||||
void DeclareConstantBuffers() {
|
||||
if (use_unified_uniforms) {
|
||||
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||
binding);
|
||||
code.AddLine(" uint cbufs[];");
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
return;
|
||||
}
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
for (const auto& buffers : ir.GetConstantBuffers()) {
|
||||
const auto index = buffers.first;
|
||||
for (const auto [index, info] : ir.GetConstantBuffers()) {
|
||||
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
|
||||
const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
|
||||
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
|
||||
GetConstBufferBlock(index));
|
||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
|
||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
}
|
||||
@@ -1038,42 +1070,51 @@ private:
|
||||
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||
const Node offset = cbuf->GetOffset();
|
||||
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
// Direct access
|
||||
const u32 offset_imm = immediate->GetValue();
|
||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||
Type::Uint};
|
||||
} else {
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||
Visit(offset).AsUint()),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
if (std::holds_alternative<OperationNode>(*offset)) {
|
||||
// Indirect access
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
|
||||
if (!device.HasComponentIndexingBug()) {
|
||||
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset, final_offset),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||
const std::string pack = code.GenerateTemporary();
|
||||
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset);
|
||||
|
||||
const std::string result = code.GenerateTemporary();
|
||||
code.AddLine("uint {};", result);
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
|
||||
pack, GetSwizzle(swizzle));
|
||||
}
|
||||
return {result, Type::Uint};
|
||||
if (!device.HasComponentIndexingBug()) {
|
||||
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset, final_offset),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||
const std::string pack = code.GenerateTemporary();
|
||||
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset);
|
||||
|
||||
const std::string result = code.GenerateTemporary();
|
||||
code.AddLine("uint {};", result);
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
|
||||
GetSwizzle(swizzle));
|
||||
}
|
||||
return {result, Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
@@ -2710,6 +2751,7 @@ private:
|
||||
const std::string_view identifier;
|
||||
const std::string_view suffix;
|
||||
const Header header;
|
||||
const bool use_unified_uniforms;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
ShaderWriter code;
|
||||
@@ -2905,7 +2947,7 @@ void GLSLDecompiler::DecompileAST() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
ShaderEntries entries;
|
||||
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
||||
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
|
||||
@@ -2926,6 +2968,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||
}
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
@@ -53,11 +53,13 @@ struct ShaderEntries {
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
u32 clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
|
||||
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
|
||||
|
||||
namespace {
|
||||
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
@@ -37,18 +39,26 @@ struct ConstBufferKey {
|
||||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerKey {
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerKey {
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 20;
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
||||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||
flat_bound_samplers.size() ||
|
||||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||
flat_separate_samplers.size() ||
|
||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||
flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& key : flat_keys) {
|
||||
keys.insert({{key.cbuf, key.offset}, key.value});
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& key : flat_bound_samplers) {
|
||||
bound_samplers.emplace(key.offset, key.sampler);
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& key : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||
return false;
|
||||
}
|
||||
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
||||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers;
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers;
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerKey{address.first, address.second, sampler});
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||
flat_bound_samplers.size() &&
|
||||
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||
flat_separate_samplers.size() &&
|
||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||
flat_bindless_samplers.size();
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
|
||||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
|
||||
@@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
GLuint OGLStreamBuffer::GetHandle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
GLsizeiptr OGLStreamBuffer::GetSize() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
|
||||
@@ -17,9 +17,6 @@ public:
|
||||
bool use_persistent = true);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
GLsizeiptr GetSize() const;
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
@@ -32,6 +29,14 @@ public:
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
|
||||
@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
|
||||
target = GetTextureTarget(params.target);
|
||||
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
||||
DecorateSurfaceName();
|
||||
main_view = CreateViewInner(
|
||||
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
|
||||
true);
|
||||
|
||||
u32 num_layers = 1;
|
||||
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
|
||||
num_layers = params.depth;
|
||||
}
|
||||
|
||||
main_view =
|
||||
CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
|
||||
}
|
||||
|
||||
CachedSurface::~CachedSurface() = default;
|
||||
@@ -404,8 +409,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
|
||||
|
||||
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
|
||||
bool is_proxy)
|
||||
: VideoCommon::ViewBase(params), surface{surface},
|
||||
format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
|
||||
: VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
|
||||
target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
|
||||
if (!is_proxy) {
|
||||
main_view = CreateTextureView();
|
||||
@@ -414,37 +418,40 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
||||
void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
|
||||
ASSERT(params.num_levels == 1);
|
||||
|
||||
if (params.num_layers > 1) {
|
||||
// Layered framebuffer attachments
|
||||
UNIMPLEMENTED_IF(params.base_layer != 0);
|
||||
|
||||
switch (params.target) {
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
glFramebufferTexture(target, attachment, GetTexture(), 0);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED();
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
if (params.num_layers > 1) {
|
||||
ASSERT(params.base_layer == 0);
|
||||
glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
|
||||
} else {
|
||||
glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
|
||||
params.base_level, params.base_layer);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (params.num_layers > 1) {
|
||||
UNIMPLEMENTED_IF(params.base_layer != 0);
|
||||
glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const GLenum view_target = surface.GetTarget();
|
||||
const GLuint texture = surface.GetTexture();
|
||||
switch (surface.GetSurfaceParams().target) {
|
||||
case SurfaceTarget::Texture1D:
|
||||
glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
|
||||
glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture2D:
|
||||
glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
|
||||
glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
|
||||
break;
|
||||
case SurfaceTarget::Texture1DArray:
|
||||
case SurfaceTarget::Texture2DArray:
|
||||
case SurfaceTarget::TextureCubemap:
|
||||
case SurfaceTarget::TextureCubeArray:
|
||||
glFramebufferTextureLayer(target, attachment, texture, params.base_level,
|
||||
glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
|
||||
params.base_layer);
|
||||
break;
|
||||
default:
|
||||
@@ -501,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||
OGLTextureView texture_view;
|
||||
texture_view.Create();
|
||||
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
|
||||
params.num_levels, params.base_layer, params.num_layers);
|
||||
if (target == GL_TEXTURE_3D) {
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||
params.base_level, params.num_levels, 0, 1);
|
||||
} else {
|
||||
glTextureView(texture_view.handle, target, surface.texture.handle, format,
|
||||
params.base_level, params.num_levels, params.base_layer, params.num_layers);
|
||||
}
|
||||
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
|
||||
|
||||
return texture_view;
|
||||
@@ -545,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
const auto& src_params{src_view->GetSurfaceParams()};
|
||||
const auto& dst_params{dst_view->GetSurfaceParams()};
|
||||
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
|
||||
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
|
||||
UNIMPLEMENTED_IF(src_params.depth != 1);
|
||||
UNIMPLEMENTED_IF(dst_params.depth != 1);
|
||||
|
||||
state_tracker.NotifyScissor0();
|
||||
state_tracker.NotifyFramebuffer();
|
||||
|
||||
@@ -80,8 +80,10 @@ public:
|
||||
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
|
||||
~CachedSurfaceView();
|
||||
|
||||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
|
||||
void Attach(GLenum attachment, GLenum target) const;
|
||||
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
|
||||
/// @param attachment Attachment to bind textures to
|
||||
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
|
||||
void Attach(GLenum attachment, GLenum fb_target) const;
|
||||
|
||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
|
||||
@@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_UNSIGNED_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||
switch (attrib.size) {
|
||||
@@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::Float:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
@@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_FLOAT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
@@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_UNSIGNED_SHORT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedScaled:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
@@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_SHORT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
|
||||
return {};
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
|
||||
attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
@@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return GL_UNSIGNED_INT;
|
||||
}
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
|
||||
UNREACHABLE();
|
||||
UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -180,10 +169,20 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
}
|
||||
|
||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
|
||||
Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
|
||||
switch (filter_mode) {
|
||||
case Tegra::Texture::TextureFilter::Linear: {
|
||||
switch (mip_filter_mode) {
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_NEAREST_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_NEAREST_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_LINEAR;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
@@ -193,20 +192,9 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Tegra::Texture::TextureFilter::Nearest: {
|
||||
switch (mip_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_NEAREST_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_NEAREST_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
|
||||
return GL_LINEAR;
|
||||
UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
|
||||
static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
|
||||
return GL_NEAREST;
|
||||
}
|
||||
|
||||
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
@@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
} else {
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
|
||||
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
@@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
case Tegra::Texture::DepthCompareFunc::Always:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
|
||||
static_cast<u32>(func));
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
|
||||
return GL_GREATER;
|
||||
}
|
||||
|
||||
@@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
|
||||
case Maxwell::Blend::Equation::MaxGL:
|
||||
return GL_MAX;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||
return GL_FUNC_ADD;
|
||||
}
|
||||
|
||||
@@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
||||
return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
@@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
|
||||
case Tegra::Texture::SwizzleSource::OneFloat:
|
||||
return GL_ONE;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
|
||||
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
@@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
case Maxwell::ComparisonOp::AlwaysOld:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
|
||||
@@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
|
||||
case Maxwell::StencilOp::DecrWrapOGL:
|
||||
return GL_DECR_WRAP;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
|
||||
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
|
||||
return GL_KEEP;
|
||||
}
|
||||
|
||||
@@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
|
||||
case Maxwell::FrontFace::CounterClockWise:
|
||||
return GL_CCW;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
|
||||
UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
|
||||
return GL_CCW;
|
||||
}
|
||||
|
||||
@@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
|
||||
case Maxwell::CullFace::FrontAndBack:
|
||||
return GL_FRONT_AND_BACK;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||
return GL_BACK;
|
||||
}
|
||||
|
||||
@@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
|
||||
case Maxwell::LogicOperation::Set:
|
||||
return GL_SET;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
|
||||
return GL_COPY;
|
||||
}
|
||||
|
||||
|
||||
@@ -751,8 +751,9 @@ void RendererOpenGL::RenderScreenshot() {
|
||||
}
|
||||
|
||||
bool RendererOpenGL::Init() {
|
||||
if (GLAD_GL_KHR_debug) {
|
||||
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
|
||||
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
|
||||
|
||||
u32 packed_front_face = PackFrontFace(regs.front_face);
|
||||
if (regs.screen_y_control.triangle_rast_flip != 0 &&
|
||||
regs.viewport_transform[0].scale_y > 0.0f) {
|
||||
if (regs.screen_y_control.triangle_rast_flip != 0) {
|
||||
// Flip front face
|
||||
packed_front_face = 1 - packed_front_face;
|
||||
}
|
||||
|
||||
@@ -21,29 +21,29 @@ namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
return VK_FILTER_LINEAR;
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
return VK_FILTER_NEAREST;
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
return VK_FILTER_LINEAR;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
|
||||
UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
|
||||
switch (mipmap_filter) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
// TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
|
||||
// (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
|
||||
// use an image view with a single mipmap level to emulate this.
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
// There are no Vulkan filter modes that directly correspond to OpenGL minification filters
|
||||
// of GL_LINEAR or GL_NEAREST, but they can be emulated using
|
||||
// VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
|
||||
// VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
|
||||
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||
UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
|
||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||
UNIMPLEMENTED();
|
||||
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
||||
@@ -149,7 +148,7 @@ struct FormatTuple {
|
||||
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
|
||||
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
|
||||
{VK_FORMAT_UNDEFINED}, // R16S
|
||||
{VK_FORMAT_UNDEFINED}, // R16UI
|
||||
{VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
|
||||
{VK_FORMAT_UNDEFINED}, // R16I
|
||||
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
|
||||
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
|
||||
@@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
||||
|
||||
@@ -37,8 +37,8 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VAddr cpu_addr, std::size_t size)
|
||||
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
||||
std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
VkBufferCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
@@ -54,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
|
||||
buffer.commit = memory_manager.Commit(buffer.handle, false);
|
||||
}
|
||||
|
||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
@@ -67,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
|
||||
}
|
||||
|
||||
VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
|
||||
return buffer->GetHandle();
|
||||
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
|
||||
}
|
||||
|
||||
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||
@@ -91,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||
std::memcpy(staging.commit->Map(size), data, size);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
|
||||
|
||||
@@ -114,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
u8* data) {
|
||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
@@ -141,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) {
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
|
||||
dst_offset, size](vk::CommandBuffer cmdbuf) {
|
||||
scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
||||
|
||||
std::array<VkBufferMemoryBarrier, 2> barriers;
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
@@ -24,13 +23,13 @@ class VKDevice;
|
||||
class VKMemoryManager;
|
||||
class VKScheduler;
|
||||
|
||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
||||
class Buffer final : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VAddr cpu_addr, std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
||||
std::size_t size);
|
||||
~Buffer();
|
||||
|
||||
VkBuffer GetHandle() const {
|
||||
VkBuffer Handle() const {
|
||||
return *buffer.handle;
|
||||
}
|
||||
|
||||
@@ -38,8 +37,6 @@ private:
|
||||
VKBuffer buffer;
|
||||
};
|
||||
|
||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||
public:
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
@@ -50,9 +47,7 @@ public:
|
||||
VkBuffer GetEmptyBuffer(std::size_t size) override;
|
||||
|
||||
protected:
|
||||
VkBuffer ToHandle(const Buffer& buffer) override;
|
||||
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
|
||||
};
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
|
||||
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo ci;
|
||||
|
||||
@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
|
||||
|
||||
VkDescriptorPoolCreateInfo ci;
|
||||
|
||||
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
|
||||
|
||||
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
||||
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
|
||||
static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_UINT_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
||||
VK_FORMAT_B5G6R5_UNORM_PACK16,
|
||||
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32A32_UINT,
|
||||
VK_FORMAT_R32G32_SFLOAT,
|
||||
VK_FORMAT_R32G32_UINT,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16_UNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16_SFLOAT,
|
||||
VK_FORMAT_R16_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_SRGB,
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
VK_FORMAT_R8G8_SNORM,
|
||||
VK_FORMAT_R8G8_UINT,
|
||||
VK_FORMAT_R8_UNORM,
|
||||
VK_FORMAT_R8_UINT,
|
||||
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
|
||||
VK_FORMAT_R32_SFLOAT,
|
||||
VK_FORMAT_R32_UINT,
|
||||
VK_FORMAT_R32_SINT,
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
|
||||
VK_FORMAT_BC2_UNORM_BLOCK,
|
||||
VK_FORMAT_BC3_UNORM_BLOCK,
|
||||
VK_FORMAT_BC4_UNORM_BLOCK,
|
||||
VK_FORMAT_BC5_UNORM_BLOCK,
|
||||
VK_FORMAT_BC5_SNORM_BLOCK,
|
||||
VK_FORMAT_BC7_UNORM_BLOCK,
|
||||
VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
||||
VK_FORMAT_BC6H_SFLOAT_BLOCK,
|
||||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
|
||||
VK_FORMAT_BC2_SRGB_BLOCK,
|
||||
VK_FORMAT_BC3_SRGB_BLOCK,
|
||||
VK_FORMAT_BC7_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
|
||||
static constexpr std::array formats{
|
||||
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_UINT_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
|
||||
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
|
||||
VK_FORMAT_B5G6R5_UNORM_PACK16,
|
||||
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32A32_UINT,
|
||||
VK_FORMAT_R32G32_SFLOAT,
|
||||
VK_FORMAT_R32G32_UINT,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16_UNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16_SFLOAT,
|
||||
VK_FORMAT_R16_UNORM,
|
||||
VK_FORMAT_R16_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SRGB,
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
VK_FORMAT_R8G8_SNORM,
|
||||
VK_FORMAT_R8G8_UINT,
|
||||
VK_FORMAT_R8_UNORM,
|
||||
VK_FORMAT_R8_UINT,
|
||||
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
|
||||
VK_FORMAT_R32_SFLOAT,
|
||||
VK_FORMAT_R32_UINT,
|
||||
VK_FORMAT_R32_SINT,
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
VK_FORMAT_D16_UNORM_S8_UINT,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
|
||||
VK_FORMAT_BC2_UNORM_BLOCK,
|
||||
VK_FORMAT_BC3_UNORM_BLOCK,
|
||||
VK_FORMAT_BC4_UNORM_BLOCK,
|
||||
VK_FORMAT_BC5_UNORM_BLOCK,
|
||||
VK_FORMAT_BC5_SNORM_BLOCK,
|
||||
VK_FORMAT_BC7_UNORM_BLOCK,
|
||||
VK_FORMAT_BC6H_UFLOAT_BLOCK,
|
||||
VK_FORMAT_BC6H_SFLOAT_BLOCK,
|
||||
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
|
||||
VK_FORMAT_BC2_SRGB_BLOCK,
|
||||
VK_FORMAT_BC3_SRGB_BLOCK,
|
||||
VK_FORMAT_BC7_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
|
||||
};
|
||||
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
|
||||
for (const auto format : formats) {
|
||||
format_properties.emplace(format, physical.GetFormatProperties(format));
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
||||
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
|
||||
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
||||
@@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||
u32 binding = base_binding;
|
||||
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
|
||||
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
|
||||
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
|
||||
AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
|
||||
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
|
||||
AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
|
||||
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
|
||||
return binding;
|
||||
}
|
||||
@@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
|
||||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
||||
u32 main_offset)
|
||||
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||
Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
|
||||
: gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
|
||||
compiler_settings, registry},
|
||||
entries{GenerateShaderEntries(shader_ir)} {}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
Shader::~Shader() = default;
|
||||
|
||||
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
|
||||
Core::System& system, Tegra::Engines::ShaderType stage) {
|
||||
if (stage == Tegra::Engines::ShaderType::Compute) {
|
||||
Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
|
||||
Tegra::Engines::ShaderType stage) {
|
||||
if (stage == ShaderType::Compute) {
|
||||
return system.GPU().KeplerCompute();
|
||||
} else {
|
||||
return system.GPU().Maxwell3D();
|
||||
@@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache)
|
||||
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
|
||||
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
|
||||
renderpass_cache{renderpass_cache} {}
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
|
||||
scheduler{scheduler}, descriptor_pool{descriptor_pool},
|
||||
update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
|
||||
|
||||
VKPipelineCache::~VKPipelineCache() = default;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
||||
@@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
||||
if (!shader) {
|
||||
|
||||
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||
if (!result) {
|
||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||
|
||||
// No shader found - create a new one
|
||||
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
|
||||
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
|
||||
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
|
||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
|
||||
stage_offset);
|
||||
result = shader.get();
|
||||
|
||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
||||
std::move(code), stage_offset);
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader), *cpu_addr, size_in_bytes);
|
||||
} else {
|
||||
null_shader = shader;
|
||||
null_shader = std::move(shader);
|
||||
}
|
||||
}
|
||||
shaders[index] = std::move(shader);
|
||||
shaders[index] = result;
|
||||
}
|
||||
return last_shaders = shaders;
|
||||
}
|
||||
@@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||
|
||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
||||
program_addr, *cpu_addr, std::move(code),
|
||||
KERNEL_MAIN_OFFSET);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
|
||||
std::move(code), KERNEL_MAIN_OFFSET);
|
||||
shader = shader_info.get();
|
||||
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader_info), *cpu_addr, size_in_bytes);
|
||||
} else {
|
||||
null_kernel = shader;
|
||||
null_kernel = std::move(shader_info);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||
return *entry;
|
||||
}
|
||||
|
||||
void VKPipelineCache::Unregister(const Shader& shader) {
|
||||
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
|
||||
bool finished = false;
|
||||
const auto Finish = [&] {
|
||||
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
|
||||
@@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
|
||||
Finish();
|
||||
it = compute_cache.erase(it);
|
||||
}
|
||||
|
||||
RasterizerCache::Unregister(shader);
|
||||
}
|
||||
|
||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
|
||||
@@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||
ASSERT(point_size != 0.0f);
|
||||
}
|
||||
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
|
||||
const auto& attribute = fixed_state.vertex_input.attributes[i];
|
||||
specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
|
||||
specialization.attribute_types[i] = attribute.Type();
|
||||
}
|
||||
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
||||
|
||||
@@ -328,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||
}
|
||||
|
||||
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
||||
ASSERT(shader);
|
||||
const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||
const auto program_type = GetShaderType(program_enum);
|
||||
const ShaderType program_type = GetShaderType(program_enum);
|
||||
const auto& entries = shader->GetEntries();
|
||||
program[stage] = {
|
||||
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
|
||||
@@ -375,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
|
||||
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
|
||||
// crash.
|
||||
if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
|
||||
descriptor_type == STORAGE_TEXEL_BUFFER) {
|
||||
// Nvidia has a bug where updating multiple texels at once causes the driver to crash.
|
||||
// Note: Fixed in driver Windows 443.24, Linux 440.66.15
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
|
||||
entry.dstBinding = binding + i;
|
||||
entry.dstArrayElement = 0;
|
||||
entry.descriptorCount = 1;
|
||||
entry.descriptorType = descriptor_type;
|
||||
entry.offset = offset + i * entry_size;
|
||||
entry.offset = static_cast<std::size_t>(offset + i * entry_size);
|
||||
entry.stride = entry_size;
|
||||
}
|
||||
} else if (count > 0) {
|
||||
@@ -405,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries(
|
||||
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
|
||||
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
|
||||
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
|
||||
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
|
||||
AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
|
||||
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
|
||||
AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
|
||||
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
@@ -26,6 +25,7 @@
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
@@ -41,8 +41,6 @@ class VKFence;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class CachedShader;
|
||||
using Shader = std::shared_ptr<CachedShader>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
class Shader {
|
||||
public:
|
||||
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
|
||||
u32 main_offset);
|
||||
~CachedShader();
|
||||
explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
|
||||
~Shader();
|
||||
|
||||
GPUVAddr GetGpuAddr() const {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return program_code.size() * sizeof(u64);
|
||||
}
|
||||
|
||||
VideoCommon::Shader::ShaderIR& GetIR() {
|
||||
return shader_ir;
|
||||
}
|
||||
@@ -144,25 +137,23 @@ private:
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
class VKPipelineCache final : public RasterizerCache<Shader> {
|
||||
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
||||
const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache);
|
||||
~VKPipelineCache();
|
||||
~VKPipelineCache() override;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
|
||||
|
||||
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
|
||||
|
||||
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||
|
||||
protected:
|
||||
void Unregister(const Shader& shader) override;
|
||||
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
void OnShaderRemoval(Shader* shader) final;
|
||||
|
||||
private:
|
||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
|
||||
@@ -175,10 +166,10 @@ private:
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
VKRenderPassCache& renderpass_cache;
|
||||
|
||||
Shader null_shader{};
|
||||
Shader null_kernel{};
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
|
||||
GraphicsPipelineCacheKey last_graphics_key;
|
||||
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
|
||||
}
|
||||
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
|
||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
|
||||
for (std::size_t i = 0; i < std::size(addresses); ++i) {
|
||||
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
|
||||
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
std::size_t stage, std::size_t index = 0) {
|
||||
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(tex_handle);
|
||||
@@ -468,8 +480,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||
const auto& entries = pipeline.GetEntries();
|
||||
SetupComputeConstBuffers(entries);
|
||||
SetupComputeGlobalBuffers(entries);
|
||||
SetupComputeTexelBuffers(entries);
|
||||
SetupComputeUniformTexels(entries);
|
||||
SetupComputeTextures(entries);
|
||||
SetupComputeStorageTexels(entries);
|
||||
SetupComputeImages(entries);
|
||||
|
||||
buffer_cache.Unmap();
|
||||
@@ -715,7 +728,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
|
||||
if (!view) {
|
||||
return false;
|
||||
}
|
||||
key.views.push_back(view->GetHandle());
|
||||
key.views.push_back(view->GetAttachment());
|
||||
key.width = std::min(key.width, view->GetWidth());
|
||||
key.height = std::min(key.height, view->GetHeight());
|
||||
key.layers = std::min(key.layers, view->GetNumLayers());
|
||||
@@ -775,20 +788,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupShaderDescriptors(
|
||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||
texture_cache.GuardSamplers(true);
|
||||
|
||||
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
// Skip VertexA stage
|
||||
const auto& shader = shaders[stage + 1];
|
||||
Shader* const shader = shaders[stage + 1];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
const auto& entries = shader->GetEntries();
|
||||
SetupGraphicsConstBuffers(entries, stage);
|
||||
SetupGraphicsGlobalBuffers(entries, stage);
|
||||
SetupGraphicsTexelBuffers(entries, stage);
|
||||
SetupGraphicsUniformTexels(entries, stage);
|
||||
SetupGraphicsTextures(entries, stage);
|
||||
SetupGraphicsStorageTexels(entries, stage);
|
||||
SetupGraphicsImages(entries, stage);
|
||||
}
|
||||
texture_cache.GuardSamplers(false);
|
||||
@@ -838,6 +852,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||
@@ -866,6 +884,9 @@ void RasterizerVulkan::EndTransformFeedback() {
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
if (!device.IsExtTransformFeedbackSupported()) {
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.Record(
|
||||
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
|
||||
@@ -877,14 +898,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||
const auto& attrib = regs.vertex_attrib_format[index];
|
||||
if (!attrib.IsValid()) {
|
||||
if (attrib.IsConstant()) {
|
||||
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
|
||||
continue;
|
||||
}
|
||||
|
||||
[[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
|
||||
ASSERT(buffer.IsEnabled());
|
||||
|
||||
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
|
||||
attrib.size.Value());
|
||||
}
|
||||
@@ -980,12 +997,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
|
||||
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
for (const auto& entry : entries.texel_buffers) {
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||
SetupTexelBuffer(image, entry);
|
||||
SetupUniformTexels(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1000,6 +1017,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, stage).tic;
|
||||
SetupStorageTexel(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
@@ -1032,12 +1058,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
|
||||
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
for (const auto& entry : entries.texel_buffers) {
|
||||
for (const auto& entry : entries.uniform_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||
SetupTexelBuffer(image, entry);
|
||||
SetupUniformTexels(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1052,6 +1078,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Textures);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
for (const auto& entry : entries.storage_texels) {
|
||||
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
|
||||
SetupStorageTexel(image, entry);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Images);
|
||||
const auto& gpu = system.GPU().KeplerCompute();
|
||||
@@ -1101,8 +1136,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
|
||||
update_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
|
||||
const TexelBufferEntry& entry) {
|
||||
void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
|
||||
const UniformTexelEntry& entry) {
|
||||
const auto view = texture_cache.GetTextureSurface(tic, entry);
|
||||
ASSERT(view->IsBufferView());
|
||||
|
||||
@@ -1114,8 +1149,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
|
||||
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||
ASSERT(!view->IsBufferView());
|
||||
|
||||
const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
const auto sampler = sampler_cache.GetSampler(texture.tsc);
|
||||
update_descriptor_queue.AddSampledImage(sampler, image_view);
|
||||
|
||||
@@ -1124,6 +1159,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
|
||||
sampled_views.push_back(ImageView{std::move(view), image_layout});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
|
||||
const StorageTexelEntry& entry) {
|
||||
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
ASSERT(view->IsBufferView());
|
||||
|
||||
update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
|
||||
auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
|
||||
@@ -1133,7 +1176,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
|
||||
|
||||
UNIMPLEMENTED_IF(tic.IsBuffer());
|
||||
|
||||
const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
const VkImageView image_view =
|
||||
view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
update_descriptor_queue.AddImage(image_view);
|
||||
|
||||
const auto image_layout = update_descriptor_queue.GetLastImageLayout();
|
||||
|
||||
@@ -168,7 +168,7 @@ private:
|
||||
bool is_indexed, bool is_instanced);
|
||||
|
||||
/// Setup descriptors in the graphics pipeline.
|
||||
void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||
|
||||
void SetupImageTransitions(Texceptions texceptions,
|
||||
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
|
||||
@@ -193,12 +193,15 @@ private:
|
||||
/// Setup global buffers in the graphics pipeline.
|
||||
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup texel buffers in the graphics pipeline.
|
||||
void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
|
||||
/// Setup uniform texels in the graphics pipeline.
|
||||
void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup textures in the graphics pipeline.
|
||||
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup storage texels in the graphics pipeline.
|
||||
void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
/// Setup images in the graphics pipeline.
|
||||
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
|
||||
|
||||
@@ -209,11 +212,14 @@ private:
|
||||
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
|
||||
|
||||
/// Setup texel buffers in the compute pipeline.
|
||||
void SetupComputeTexelBuffers(const ShaderEntries& entries);
|
||||
void SetupComputeUniformTexels(const ShaderEntries& entries);
|
||||
|
||||
/// Setup textures in the compute pipeline.
|
||||
void SetupComputeTextures(const ShaderEntries& entries);
|
||||
|
||||
/// Setup storage texels in the compute pipeline.
|
||||
void SetupComputeStorageTexels(const ShaderEntries& entries);
|
||||
|
||||
/// Setup images in the compute pipeline.
|
||||
void SetupComputeImages(const ShaderEntries& entries);
|
||||
|
||||
@@ -222,10 +228,12 @@ private:
|
||||
|
||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||
|
||||
void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
|
||||
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
|
||||
|
||||
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
|
||||
|
||||
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
|
||||
|
||||
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
|
||||
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
|
||||
ci.compareEnable = tsc.depth_compare_enabled;
|
||||
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
|
||||
ci.minLod = tsc.GetMinLod();
|
||||
ci.maxLod = tsc.GetMaxLod();
|
||||
ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
|
||||
ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
|
||||
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
|
||||
ci.unnormalizedCoordinates = VK_FALSE;
|
||||
return device.GetLogical().CreateSampler(ci);
|
||||
|
||||
@@ -400,8 +400,9 @@ private:
|
||||
u32 binding = specialization.base_binding;
|
||||
binding = DeclareConstantBuffers(binding);
|
||||
binding = DeclareGlobalBuffers(binding);
|
||||
binding = DeclareTexelBuffers(binding);
|
||||
binding = DeclareUniformTexels(binding);
|
||||
binding = DeclareSamplers(binding);
|
||||
binding = DeclareStorageTexels(binding);
|
||||
binding = DeclareImages(binding);
|
||||
|
||||
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
|
||||
@@ -741,8 +742,10 @@ private:
|
||||
if (!IsGenericAttribute(index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 location = GetGenericAttributeLocation(index);
|
||||
if (!IsAttributeEnabled(location)) {
|
||||
continue;
|
||||
}
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
Id type;
|
||||
if (IsInputAttributeArray()) {
|
||||
@@ -887,7 +890,7 @@ private:
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareTexelBuffers(u32 binding) {
|
||||
u32 DeclareUniformTexels(u32 binding) {
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
if (!sampler.is_buffer) {
|
||||
continue;
|
||||
@@ -908,7 +911,7 @@ private:
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
|
||||
texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
|
||||
uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
@@ -943,31 +946,48 @@ private:
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareImages(u32 binding) {
|
||||
u32 DeclareStorageTexels(u32 binding) {
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
const auto [dim, arrayed] = GetImageDim(image);
|
||||
constexpr int depth = 0;
|
||||
constexpr bool ms = false;
|
||||
constexpr int sampled = 2; // This won't be accessed with a sampler
|
||||
constexpr auto format = spv::ImageFormat::Unknown;
|
||||
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
||||
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
||||
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
||||
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
if (image.is_read && !image.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
} else if (image.is_written && !image.is_read) {
|
||||
Decorate(id, spv::Decoration::NonReadable);
|
||||
if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
images.emplace(image.index, StorageImage{image_type, id});
|
||||
DeclareImage(image, binding);
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
|
||||
u32 DeclareImages(u32 binding) {
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||
continue;
|
||||
}
|
||||
DeclareImage(image, binding);
|
||||
}
|
||||
return binding;
|
||||
}
|
||||
|
||||
void DeclareImage(const Image& image, u32& binding) {
|
||||
const auto [dim, arrayed] = GetImageDim(image);
|
||||
constexpr int depth = 0;
|
||||
constexpr bool ms = false;
|
||||
constexpr int sampled = 2; // This won't be accessed with a sampler
|
||||
const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
|
||||
const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
|
||||
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
|
||||
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
|
||||
AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
|
||||
|
||||
Decorate(id, spv::Decoration::Binding, binding++);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
|
||||
if (image.is_read && !image.is_written) {
|
||||
Decorate(id, spv::Decoration::NonWritable);
|
||||
} else if (image.is_written && !image.is_read) {
|
||||
Decorate(id, spv::Decoration::NonReadable);
|
||||
}
|
||||
|
||||
images.emplace(image.index, StorageImage{image_type, id});
|
||||
}
|
||||
|
||||
bool IsRenderTargetEnabled(u32 rt) const {
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
|
||||
@@ -986,6 +1006,10 @@ private:
|
||||
return stage == ShaderType::TesselationControl;
|
||||
}
|
||||
|
||||
bool IsAttributeEnabled(u32 location) const {
|
||||
return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
|
||||
}
|
||||
|
||||
u32 GetNumInputVertices() const {
|
||||
switch (stage) {
|
||||
case ShaderType::Geometry:
|
||||
@@ -1201,16 +1225,20 @@ private:
|
||||
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
|
||||
return {v_float_zero, Type::Float};
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
const u32 location = GetGenericAttributeLocation(attribute);
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
const Type type = type_descriptor.type;
|
||||
const Id attribute_id = input_attributes.at(attribute);
|
||||
const std::vector elements = {element};
|
||||
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
||||
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
||||
if (!IsGenericAttribute(attribute)) {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
const u32 location = GetGenericAttributeLocation(attribute);
|
||||
if (!IsAttributeEnabled(location)) {
|
||||
// Disabled attributes (also known as constant attributes) always return zero.
|
||||
return {v_float_zero, Type::Float};
|
||||
}
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
const Type type = type_descriptor.type;
|
||||
const Id attribute_id = input_attributes.at(attribute);
|
||||
const std::vector elements = {element};
|
||||
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
||||
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
|
||||
return {v_float_zero, Type::Float};
|
||||
@@ -1246,7 +1274,7 @@ private:
|
||||
} else {
|
||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||
}
|
||||
pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
|
||||
pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
|
||||
buffer_element);
|
||||
}
|
||||
return {OpLoad(t_float, pointer), Type::Float};
|
||||
@@ -1601,7 +1629,7 @@ private:
|
||||
|
||||
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
|
||||
const Id carry = OpCompositeExtract(t_uint, result, 1);
|
||||
return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
|
||||
return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
|
||||
}
|
||||
|
||||
Expression LogicalAssign(Operation operation) {
|
||||
@@ -1664,7 +1692,7 @@ private:
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
const u32 index = meta.sampler.index;
|
||||
if (meta.sampler.is_buffer) {
|
||||
const auto& entry = texel_buffers.at(index);
|
||||
const auto& entry = uniform_texels.at(index);
|
||||
return OpLoad(entry.image_type, entry.image);
|
||||
} else {
|
||||
const auto& entry = sampled_images.at(index);
|
||||
@@ -1941,39 +1969,20 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageAdd(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
Expression AtomicImage(Operation operation) {
|
||||
const auto& meta{std::get<MetaImage>(operation.GetMeta())};
|
||||
ASSERT(meta.values.size() == 1);
|
||||
|
||||
Expression AtomicImageMin(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
const Id coordinate = GetCoordinates(operation, Type::Int);
|
||||
const Id image = images.at(meta.image.index).image;
|
||||
const Id sample = v_uint_zero;
|
||||
const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
|
||||
|
||||
Expression AtomicImageMax(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageAnd(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageOr(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageXor(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression AtomicImageExchange(Operation operation) {
|
||||
UNIMPLEMENTED();
|
||||
return {};
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = v_uint_zero;
|
||||
const Id value = AsUint(Visit(meta.values[0]));
|
||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||
}
|
||||
|
||||
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
|
||||
@@ -1988,7 +1997,7 @@ private:
|
||||
return {v_float_zero, Type::Float};
|
||||
}
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = Constant(t_uint, 0);
|
||||
const Id semantics = v_uint_zero;
|
||||
const Id value = AsUint(Visit(operation[1]));
|
||||
|
||||
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
|
||||
@@ -2612,11 +2621,11 @@ private:
|
||||
|
||||
&SPIRVDecompiler::ImageLoad,
|
||||
&SPIRVDecompiler::ImageStore,
|
||||
&SPIRVDecompiler::AtomicImageAdd,
|
||||
&SPIRVDecompiler::AtomicImageAnd,
|
||||
&SPIRVDecompiler::AtomicImageOr,
|
||||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
|
||||
&SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
|
||||
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
|
||||
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
|
||||
@@ -2758,8 +2767,11 @@ private:
|
||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||
|
||||
const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
|
||||
|
||||
const Id v_float_zero = Constant(t_float, 0.0f);
|
||||
const Id v_float_one = Constant(t_float, 1.0f);
|
||||
const Id v_uint_zero = Constant(t_uint, 0);
|
||||
|
||||
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
|
||||
const Id v_varying_default =
|
||||
@@ -2784,15 +2796,16 @@ private:
|
||||
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
|
||||
std::map<u32, Id> constant_buffers;
|
||||
std::map<GlobalMemoryBase, Id> global_buffers;
|
||||
std::map<u32, TexelBuffer> texel_buffers;
|
||||
std::map<u32, TexelBuffer> uniform_texels;
|
||||
std::map<u32, SampledImage> sampled_images;
|
||||
std::map<u32, TexelBuffer> storage_texels;
|
||||
std::map<u32, StorageImage> images;
|
||||
|
||||
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
||||
Id instance_index{};
|
||||
Id vertex_index{};
|
||||
Id base_instance{};
|
||||
Id base_vertex{};
|
||||
std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
|
||||
Id frag_depth{};
|
||||
Id frag_coord{};
|
||||
Id front_facing{};
|
||||
@@ -3048,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
}
|
||||
for (const auto& sampler : ir.GetSamplers()) {
|
||||
if (sampler.is_buffer) {
|
||||
entries.texel_buffers.emplace_back(sampler);
|
||||
entries.uniform_texels.emplace_back(sampler);
|
||||
} else {
|
||||
entries.samplers.emplace_back(sampler);
|
||||
}
|
||||
}
|
||||
for (const auto& image : ir.GetImages()) {
|
||||
entries.images.emplace_back(image);
|
||||
if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
|
||||
entries.storage_texels.emplace_back(image);
|
||||
} else {
|
||||
entries.images.emplace_back(image);
|
||||
}
|
||||
}
|
||||
for (const auto& attribute : ir.GetInputAttributes()) {
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
|
||||
@@ -21,8 +21,9 @@ class VKDevice;
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using TexelBufferEntry = VideoCommon::Shader::Sampler;
|
||||
using UniformTexelEntry = VideoCommon::Shader::Sampler;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using StorageTexelEntry = VideoCommon::Shader::Image;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
|
||||
constexpr u32 DESCRIPTOR_SET = 0;
|
||||
@@ -66,13 +67,15 @@ private:
|
||||
struct ShaderEntries {
|
||||
u32 NumBindings() const {
|
||||
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
|
||||
texel_buffers.size() + samplers.size() + images.size());
|
||||
uniform_texels.size() + samplers.size() + storage_texels.size() +
|
||||
images.size());
|
||||
}
|
||||
|
||||
std::vector<ConstBufferEntry> const_buffers;
|
||||
std::vector<GlobalBufferEntry> global_buffers;
|
||||
std::vector<TexelBufferEntry> texel_buffers;
|
||||
std::vector<UniformTexelEntry> uniform_texels;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<StorageTexelEntry> storage_texels;
|
||||
std::vector<ImageEntry> images;
|
||||
std::set<u32> attributes;
|
||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||
@@ -88,7 +91,8 @@ struct Specialization final {
|
||||
u32 shared_memory_size{};
|
||||
|
||||
// Graphics specific
|
||||
std::optional<float> point_size{};
|
||||
std::optional<float> point_size;
|
||||
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
|
||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||
bool ndc_minus_one_to_one{};
|
||||
};
|
||||
|
||||
@@ -35,7 +35,7 @@ public:
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Unmap(u64 size);
|
||||
|
||||
VkBuffer GetHandle() const {
|
||||
VkBuffer Handle() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
|
||||
@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
|
||||
ci.pNext = nullptr;
|
||||
ci.flags = 0;
|
||||
ci.size = static_cast<VkDeviceSize>(host_memory_size);
|
||||
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
ci.queueFamilyIndexCount = 0;
|
||||
ci.pQueueFamilyIndices = nullptr;
|
||||
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
|
||||
ci.extent = {params.width, params.height, 1};
|
||||
break;
|
||||
case SurfaceTarget::Texture3D:
|
||||
ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
ci.extent = {params.width, params.height, params.depth};
|
||||
break;
|
||||
case SurfaceTarget::TextureBuffer:
|
||||
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
|
||||
return ci;
|
||||
}
|
||||
|
||||
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
|
||||
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Move this to a virtual function.
|
||||
main_view = CreateViewInner(
|
||||
ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
|
||||
true);
|
||||
u32 num_layers = 1;
|
||||
if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
|
||||
num_layers = params.depth;
|
||||
}
|
||||
main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
|
||||
}
|
||||
|
||||
CachedSurface::~CachedSurface() = default;
|
||||
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
|
||||
}
|
||||
|
||||
View CachedSurface::CreateView(const ViewParams& params) {
|
||||
return CreateViewInner(params, false);
|
||||
}
|
||||
|
||||
View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
|
||||
// TODO(Rodrigo): Add name decorations
|
||||
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
|
||||
return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
|
||||
}
|
||||
|
||||
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
|
||||
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
|
||||
}
|
||||
|
||||
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
|
||||
const ViewParams& params, bool is_proxy)
|
||||
const ViewParams& params)
|
||||
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
|
||||
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
|
||||
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
|
||||
base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
|
||||
num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
|
||||
: VK_IMAGE_VIEW_TYPE_1D} {}
|
||||
base_level{params.base_level}, num_levels{params.num_levels},
|
||||
image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
base_layer = 0;
|
||||
num_layers = 1;
|
||||
base_slice = params.base_layer;
|
||||
num_slices = params.num_layers;
|
||||
} else {
|
||||
base_layer = params.base_layer;
|
||||
num_layers = params.num_layers;
|
||||
}
|
||||
}
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (last_image_view && last_swizzle == new_swizzle) {
|
||||
return last_image_view;
|
||||
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
});
|
||||
}
|
||||
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
ASSERT(base_slice == 0);
|
||||
ASSERT(num_slices == params.depth);
|
||||
}
|
||||
|
||||
VkImageViewCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
ci.pNext = nullptr;
|
||||
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
return last_image_view = *image_view;
|
||||
}
|
||||
|
||||
VkImageView CachedSurfaceView::GetAttachment() {
|
||||
if (render_target) {
|
||||
return *render_target;
|
||||
}
|
||||
|
||||
VkImageViewCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
ci.pNext = nullptr;
|
||||
ci.flags = 0;
|
||||
ci.image = surface.GetImageHandle();
|
||||
ci.format = surface.GetImage().GetFormat();
|
||||
ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
|
||||
ci.subresourceRange.aspectMask = aspect_mask;
|
||||
ci.subresourceRange.baseMipLevel = base_level;
|
||||
ci.subresourceRange.levelCount = num_levels;
|
||||
if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
|
||||
ci.subresourceRange.baseArrayLayer = base_slice;
|
||||
ci.subresourceRange.layerCount = num_slices;
|
||||
} else {
|
||||
ci.viewType = image_view_type;
|
||||
ci.subresourceRange.baseArrayLayer = base_layer;
|
||||
ci.subresourceRange.layerCount = num_layers;
|
||||
}
|
||||
render_target = device.GetLogical().CreateImageView(ci);
|
||||
return *render_target;
|
||||
}
|
||||
|
||||
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
const VKDevice& device, VKResourceManager& resource_manager,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler,
|
||||
|
||||
@@ -91,7 +91,6 @@ protected:
|
||||
void DecorateSurfaceName();
|
||||
|
||||
View CreateView(const ViewParams& params) override;
|
||||
View CreateViewInner(const ViewParams& params, bool is_proxy);
|
||||
|
||||
private:
|
||||
void UploadBuffer(const std::vector<u8>& staging_buffer);
|
||||
@@ -120,23 +119,20 @@ private:
|
||||
class CachedSurfaceView final : public VideoCommon::ViewBase {
|
||||
public:
|
||||
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
|
||||
const ViewParams& params, bool is_proxy);
|
||||
const ViewParams& params);
|
||||
~CachedSurfaceView();
|
||||
|
||||
VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
|
||||
VkImageView GetAttachment();
|
||||
|
||||
bool IsSameSurface(const CachedSurfaceView& rhs) const {
|
||||
return &surface == &rhs.surface;
|
||||
}
|
||||
|
||||
VkImageView GetHandle() {
|
||||
return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
|
||||
Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
|
||||
}
|
||||
|
||||
u32 GetWidth() const {
|
||||
return params.GetMipWidth(base_level);
|
||||
}
|
||||
@@ -180,14 +176,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source) {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
// Store a copy of these values to avoid double dereference when reading them
|
||||
const SurfaceParams params;
|
||||
const VkImage image;
|
||||
@@ -196,15 +184,18 @@ private:
|
||||
|
||||
const VKDevice& device;
|
||||
CachedSurface& surface;
|
||||
const u32 base_layer;
|
||||
const u32 num_layers;
|
||||
const u32 base_level;
|
||||
const u32 num_levels;
|
||||
const VkImageViewType image_view_type;
|
||||
u32 base_layer = 0;
|
||||
u32 num_layers = 0;
|
||||
u32 base_slice = 0;
|
||||
u32 num_slices = 0;
|
||||
|
||||
VkImageView last_image_view = nullptr;
|
||||
u32 last_swizzle = 0;
|
||||
|
||||
vk::ImageView render_target;
|
||||
std::unordered_map<u32, vk::ImageView> view_cache;
|
||||
};
|
||||
|
||||
|
||||
@@ -725,8 +725,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
|
||||
return supported == VK_TRUE;
|
||||
}
|
||||
|
||||
VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
|
||||
noexcept {
|
||||
VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
|
||||
VkSurfaceCapabilitiesKHR capabilities;
|
||||
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
|
||||
return capabilities;
|
||||
|
||||
@@ -779,7 +779,7 @@ public:
|
||||
|
||||
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
|
||||
|
||||
VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
|
||||
VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
|
||||
|
||||
std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
return Operation(OperationCode::YNegate);
|
||||
case SystemVariable::InvocationInfo:
|
||||
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
|
||||
return Immediate(0U);
|
||||
return Immediate(0x00ff'0000U);
|
||||
case SystemVariable::WscaleFactorXY:
|
||||
UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
|
||||
return Immediate(0U);
|
||||
|
||||
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||
std::optional<u32> buffer) {
|
||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
|
||||
SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
|
||||
if (info.IsComplete()) {
|
||||
return info;
|
||||
}
|
||||
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
|
||||
: registry.ObtainBoundSampler(offset);
|
||||
if (!sampler) {
|
||||
LOG_WARNING(HW_GPU, "Unknown sampler info");
|
||||
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
|
||||
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||
|
||||
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
|
||||
SamplerInfo sampler_info) {
|
||||
const auto offset = static_cast<u32>(sampler.index.Value());
|
||||
const auto info = GetSamplerInfo(sampler_info, offset);
|
||||
const u32 offset = static_cast<u32>(sampler.index.Value());
|
||||
const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||
const Node sampler_register = GetRegister(reg);
|
||||
const auto [base_node, tracked_sampler_info] =
|
||||
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||
ASSERT(base_node != nullptr);
|
||||
if (base_node == nullptr) {
|
||||
if (!base_node) {
|
||||
UNREACHABLE();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (const auto bindless_sampler_info =
|
||||
std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 buffer = bindless_sampler_info->GetIndex();
|
||||
const u32 offset = bindless_sampler_info->GetOffset();
|
||||
info = GetSamplerInfo(info, offset, buffer);
|
||||
if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 buffer = sampler_info->index;
|
||||
const u32 offset = sampler_info->offset;
|
||||
info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[buffer = buffer, offset = offset](const Sampler& entry) {
|
||||
[buffer, offset](const Sampler& entry) {
|
||||
return entry.buffer == buffer && entry.offset == offset;
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer, false);
|
||||
}
|
||||
if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
|
||||
index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
|
||||
info = GetSamplerInfo(info, base_offset);
|
||||
if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
|
||||
const std::pair indices = sampler_info->indices;
|
||||
const std::pair offsets = sampler_info->offsets;
|
||||
info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
|
||||
|
||||
// Try to use an already created sampler if it exists
|
||||
const auto it = std::find_if(
|
||||
used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
|
||||
return offsets == std::pair{entry.offset, entry.secondary_offset} &&
|
||||
indices == std::pair{entry.buffer, entry.secondary_buffer};
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const u32 next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer);
|
||||
}
|
||||
if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 base_offset = sampler_info->base_offset / 4;
|
||||
index_var = GetCustomVariable(sampler_info->bindless_var);
|
||||
info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(
|
||||
|
||||
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
|
||||
using Node4 = std::array<Node, 4>;
|
||||
using NodeBlock = std::vector<Node>;
|
||||
|
||||
class BindlessSamplerNode;
|
||||
class ArraySamplerNode;
|
||||
struct ArraySamplerNode;
|
||||
struct BindlessSamplerNode;
|
||||
struct SeparateSamplerNode;
|
||||
|
||||
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
|
||||
using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
|
||||
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
||||
|
||||
struct Sampler {
|
||||
@@ -288,63 +289,51 @@ struct Sampler {
|
||||
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
||||
is_buffer{is_buffer}, is_indexed{is_indexed} {}
|
||||
|
||||
/// Separate sampler constructor
|
||||
constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
|
||||
Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
|
||||
bool is_buffer)
|
||||
: index{index}, offset{offsets.first}, secondary_offset{offsets.second},
|
||||
buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
|
||||
is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
|
||||
|
||||
/// Bindless samplers constructor
|
||||
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
||||
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
||||
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
|
||||
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
|
||||
|
||||
u32 index = 0; ///< Emulated index given for the this sampler.
|
||||
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
||||
u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
|
||||
u32 size = 1; ///< Size of the sampler.
|
||||
u32 index = 0; ///< Emulated index given for the this sampler.
|
||||
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
||||
u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
|
||||
u32 buffer = 0; ///< Buffer where the bindless sampler is read.
|
||||
u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
|
||||
u32 size = 1; ///< Size of the sampler.
|
||||
|
||||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
||||
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
|
||||
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
|
||||
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
||||
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
||||
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
||||
bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
|
||||
bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
|
||||
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
||||
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
||||
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
||||
bool is_separated = false; ///< Whether the image and sampler is separated or not.
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
class ArraySamplerNode final {
|
||||
public:
|
||||
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
|
||||
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
|
||||
|
||||
constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
constexpr u32 GetBaseOffset() const {
|
||||
return base_offset;
|
||||
}
|
||||
|
||||
constexpr u32 GetIndexVar() const {
|
||||
return bindless_var;
|
||||
}
|
||||
|
||||
private:
|
||||
struct ArraySamplerNode {
|
||||
u32 index;
|
||||
u32 base_offset;
|
||||
u32 bindless_var;
|
||||
};
|
||||
|
||||
/// Represents a tracked separate sampler image pair that was folded statically
|
||||
struct SeparateSamplerNode {
|
||||
std::pair<u32, u32> indices;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
class BindlessSamplerNode final {
|
||||
public:
|
||||
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
|
||||
|
||||
constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
constexpr u32 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindlessSamplerNode {
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
|
||||
template <typename T, typename... Args>
|
||||
TrackSampler MakeTrackSampler(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
||||
return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
|
||||
return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
|
||||
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = buffers;
|
||||
key.offsets = offsets;
|
||||
const auto iter = separate_samplers.find(key);
|
||||
if (iter != separate_samplers.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
|
||||
const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
|
||||
const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
|
||||
separate_samplers.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
|
||||
u32 offset) {
|
||||
const std::pair key = {buffer, offset};
|
||||
|
||||
@@ -19,8 +19,39 @@
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
struct SeparateSamplerKey {
|
||||
std::pair<u32, u32> buffers;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
|
||||
return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
|
||||
key.offsets.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
|
||||
const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
|
||||
return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
|
||||
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
|
||||
using SeparateSamplerMap =
|
||||
std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
|
||||
using BindlessSamplerMap =
|
||||
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
||||
|
||||
@@ -73,6 +104,9 @@ public:
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||
|
||||
/// Inserts a key.
|
||||
@@ -128,6 +162,7 @@ private:
|
||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||
KeyMap keys;
|
||||
BoundSamplerMap bound_samplers;
|
||||
SeparateSamplerMap separate_samplers;
|
||||
BindlessSamplerMap bindless_samplers;
|
||||
u32 bound_buffer;
|
||||
GraphicsInfo graphics_info;
|
||||
|
||||
@@ -330,8 +330,8 @@ private:
|
||||
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
|
||||
|
||||
/// Queries the missing sampler info from the execution context.
|
||||
SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||
std::optional<u32> buffer = std::nullopt);
|
||||
SamplerInfo GetSamplerInfo(SamplerInfo info,
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> sampler);
|
||||
|
||||
/// Accesses a texture sampler.
|
||||
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
|
||||
@@ -409,8 +409,14 @@ private:
|
||||
|
||||
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
|
||||
const OperationNode& operation,
|
||||
Node gpr, Node base_offset,
|
||||
Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
|
||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
operation->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
} else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
conditional->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
}
|
||||
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
const u32 cbuf_index = cbuf->GetIndex();
|
||||
|
||||
// Constant buffer found, test if it's an immediate
|
||||
const auto& offset = cbuf->GetOffset();
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
auto track =
|
||||
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
|
||||
auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
|
||||
return {tracked, track};
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
||||
const u32 bound_buffer = registry.GetBoundBuffer();
|
||||
if (bound_buffer != cbuf->GetIndex()) {
|
||||
if (bound_buffer != cbuf_index) {
|
||||
return {};
|
||||
}
|
||||
const auto pair = DecoupleIndirectRead(*operation);
|
||||
if (!pair) {
|
||||
return {};
|
||||
if (const std::optional pair = DecoupleIndirectRead(*operation)) {
|
||||
auto [gpr, base_offset] = *pair;
|
||||
return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
|
||||
code, cursor);
|
||||
}
|
||||
auto [gpr, base_offset] = *pair;
|
||||
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
|
||||
const auto& gpu_driver = registry.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
Node op =
|
||||
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
|
||||
|
||||
const Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
|
||||
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
|
||||
AmendNodeCv(amend_index, code[cursor]);
|
||||
// TODO Implement Bindless Index custom variable
|
||||
auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
|
||||
offset_inm->GetValue(), bindless_cv);
|
||||
return {tracked, track};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
||||
return TrackBindlessSampler(source, code, new_cursor);
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
|
||||
if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
|
||||
std::get<0>(found)) {
|
||||
// Cbuf found in operand.
|
||||
const OperationNode& op = *operation;
|
||||
|
||||
const OperationCode opcode = operation->GetCode();
|
||||
if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
|
||||
ASSERT(op.GetOperandsCount() == 2);
|
||||
auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
|
||||
auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
|
||||
if (node_a && node_b) {
|
||||
auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
|
||||
std::pair{offset_a, offset_b});
|
||||
return {tracked, std::move(track)};
|
||||
}
|
||||
}
|
||||
std::size_t i = op.GetOperandsCount();
|
||||
while (i--) {
|
||||
if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
|
||||
// Constant buffer found in operand.
|
||||
return found;
|
||||
}
|
||||
}
|
||||
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
||||
return {};
|
||||
}
|
||||
|
||||
std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
|
||||
const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
|
||||
const NodeBlock& code, s64 cursor) {
|
||||
const auto offset_imm = std::get<ImmediateNode>(*base_offset);
|
||||
const auto& gpu_driver = registry.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
|
||||
Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
|
||||
|
||||
Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
|
||||
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
|
||||
AmendNodeCv(amend_index, code[cursor]);
|
||||
|
||||
// TODO: Implement bindless index custom variable
|
||||
auto track =
|
||||
MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
|
||||
return {tracked, track};
|
||||
}
|
||||
|
||||
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
|
||||
228
src/video_core/shader_cache.h
Normal file
228
src/video_core/shader_cache.h
Normal file
@@ -0,0 +1,228 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class T>
|
||||
class ShaderCache {
|
||||
static constexpr u64 PAGE_SHIFT = 14;
|
||||
|
||||
struct Entry {
|
||||
VAddr addr_start;
|
||||
VAddr addr_end;
|
||||
T* data;
|
||||
|
||||
bool is_memory_marked = true;
|
||||
|
||||
constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
|
||||
return start < addr_end && addr_start < end;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
virtual ~ShaderCache() = default;
|
||||
|
||||
/// @brief Removes shaders inside a given region
|
||||
/// @note Checks for ranges
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param size Number of bytes of the invalidation
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||
/// @param addr Start address of the CPU write operation
|
||||
/// @param size Number of bytes of the CPU write operation
|
||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
}
|
||||
|
||||
/// @brief Flushes delayed removal operations
|
||||
void SyncGuestHost() {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
/// @brief Tries to obtain a cached shader starting in a given address
|
||||
/// @note Doesn't check for ranges, the given address has to be the start of the shader
|
||||
/// @param addr Start address of the shader, this doesn't cache for region
|
||||
/// @return Pointer to a valid shader, nullptr when nothing is found
|
||||
T* TryGet(VAddr addr) const {
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
const auto it = lookup_cache.find(addr);
|
||||
if (it == lookup_cache.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second->data;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
|
||||
|
||||
/// @brief Register in the cache a given entry
|
||||
/// @param data Shader to store in the cache
|
||||
/// @param addr Start address of the shader that will be registered
|
||||
/// @param size Size in bytes of the shader
|
||||
void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
|
||||
|
||||
const VAddr addr_end = addr + size;
|
||||
Entry* const entry = NewEntry(addr, addr_end, data.get());
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
invalidation_cache[page].push_back(entry);
|
||||
}
|
||||
|
||||
storage.push_back(std::move(data));
|
||||
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, 1);
|
||||
}
|
||||
|
||||
/// @brief Called when a shader is going to be removed
|
||||
/// @param shader Shader that will be removed
|
||||
/// @pre invalidation_cache is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
|
||||
|
||||
private:
|
||||
/// @brief Invalidate pages in a given region
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
|
||||
const VAddr addr_end = addr + size;
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto it = invalidation_cache.find(page);
|
||||
if (it == invalidation_cache.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<Entry*>& entries = it->second;
|
||||
InvalidatePageEntries(entries, addr, addr_end);
|
||||
|
||||
// If there's nothing else in this page, remove it to avoid overpopulating the hash map.
|
||||
if (entries.empty()) {
|
||||
invalidation_cache.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Remove shaders marked for deletion
|
||||
/// @pre invalidation_mutex is locked
|
||||
void RemovePendingShaders() {
|
||||
if (marked_for_removal.empty()) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
std::vector<T*> removed_shaders;
|
||||
removed_shaders.reserve(marked_for_removal.size());
|
||||
|
||||
for (Entry* const entry : marked_for_removal) {
|
||||
if (lookup_cache.erase(entry->addr_start) > 0) {
|
||||
removed_shaders.push_back(entry->data);
|
||||
}
|
||||
}
|
||||
marked_for_removal.clear();
|
||||
|
||||
if (!removed_shaders.empty()) {
|
||||
RemoveShadersFromStorage(std::move(removed_shaders));
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Invalidates entries in a given range for the passed page
|
||||
/// @param entries Vector of entries in the page, it will be modified on overlaps
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param addr_end Non-inclusive end address of the invalidation
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
|
||||
auto it = entries.begin();
|
||||
while (it != entries.end()) {
|
||||
Entry* const entry = *it;
|
||||
if (!entry->Overlaps(addr, addr_end)) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
UnmarkMemory(entry);
|
||||
marked_for_removal.push_back(entry);
|
||||
|
||||
it = entries.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Unmarks an entry from the rasterizer cache
|
||||
/// @param entry Entry to unmark from memory
|
||||
void UnmarkMemory(Entry* entry) {
|
||||
if (!entry->is_memory_marked) {
|
||||
return;
|
||||
}
|
||||
entry->is_memory_marked = false;
|
||||
|
||||
const VAddr addr = entry->addr_start;
|
||||
const std::size_t size = entry->addr_end - addr;
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, -1);
|
||||
}
|
||||
|
||||
/// @brief Removes a vector of shaders from a list
|
||||
/// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
|
||||
/// @pre invalidation_mutex is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
|
||||
// Remove duplicates
|
||||
std::sort(removed_shaders.begin(), removed_shaders.end());
|
||||
removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
|
||||
removed_shaders.end());
|
||||
|
||||
// Now that there are no duplicates, we can notify removals
|
||||
for (T* const shader : removed_shaders) {
|
||||
OnShaderRemoval(shader);
|
||||
}
|
||||
|
||||
// Remove them from the cache
|
||||
const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
|
||||
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
|
||||
removed_shaders.end();
|
||||
};
|
||||
storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
|
||||
}
|
||||
|
||||
/// @brief Creates a new entry in the lookup cache and returns its pointer
|
||||
/// @pre lookup_mutex is locked
|
||||
Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
|
||||
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
|
||||
Entry* const entry_pointer = entry.get();
|
||||
|
||||
lookup_cache.emplace(addr, std::move(entry));
|
||||
return entry_pointer;
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
mutable std::mutex lookup_mutex;
|
||||
std::mutex invalidation_mutex;
|
||||
|
||||
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
|
||||
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
|
||||
std::vector<std::unique_ptr<T>> storage;
|
||||
std::vector<Entry*> marked_for_removal;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
@@ -41,7 +41,7 @@ struct Table {
|
||||
ComponentType alpha_component;
|
||||
bool is_srgb;
|
||||
};
|
||||
constexpr std::array<Table, 77> DefinitionTable = {{
|
||||
constexpr std::array<Table, 78> DefinitionTable = {{
|
||||
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
|
||||
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
|
||||
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
|
||||
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
|
||||
{TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
|
||||
{TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
|
||||
{TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
|
||||
{TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
|
||||
{TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
|
||||
|
||||
{TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
|
||||
|
||||
@@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
|
||||
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
// Special case for 3D Texture Segments
|
||||
const bool must_read_current_data =
|
||||
params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
if (must_read_current_data) {
|
||||
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
// Special case for 3D texture segments
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -217,8 +217,8 @@ public:
|
||||
}
|
||||
|
||||
bool IsProtected() const {
|
||||
// Only 3D Slices are to be protected
|
||||
return is_target && params.block_depth > 0;
|
||||
// Only 3D slices are to be protected
|
||||
return is_target && params.target == SurfaceTarget::Texture3D;
|
||||
}
|
||||
|
||||
bool IsRenderTarget() const {
|
||||
@@ -250,6 +250,11 @@ public:
|
||||
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
|
||||
}
|
||||
|
||||
TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
|
||||
return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
|
||||
base_level, num_levels));
|
||||
}
|
||||
|
||||
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
|
||||
const GPUVAddr view_addr,
|
||||
const std::size_t candidate_size, const u32 mipmap,
|
||||
@@ -272,8 +277,8 @@ public:
|
||||
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
|
||||
const std::size_t candidate_size) {
|
||||
if (params.target == SurfaceTarget::Texture3D ||
|
||||
(params.num_levels == 1 && !params.is_layered) ||
|
||||
view_params.target == SurfaceTarget::Texture3D) {
|
||||
view_params.target == SurfaceTarget::Texture3D ||
|
||||
(params.num_levels == 1 && !params.is_layered)) {
|
||||
return {};
|
||||
}
|
||||
const auto layer_mipmap{GetLayerMipmap(view_addr)};
|
||||
|
||||
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
|
||||
params.num_levels = 1;
|
||||
params.emulated_levels = 1;
|
||||
|
||||
const bool is_layered = config.layers > 1 && params.block_depth == 0;
|
||||
params.is_layered = is_layered;
|
||||
params.depth = is_layered ? config.layers.Value() : 1;
|
||||
params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
|
||||
if (config.memory_layout.is_3d != 0) {
|
||||
params.depth = config.layers.Value();
|
||||
params.is_layered = false;
|
||||
params.target = SurfaceTarget::Texture3D;
|
||||
} else if (config.layers > 1) {
|
||||
params.depth = config.layers.Value();
|
||||
params.is_layered = true;
|
||||
params.target = SurfaceTarget::Texture2DArray;
|
||||
} else {
|
||||
params.depth = 1;
|
||||
params.is_layered = false;
|
||||
params.target = SurfaceTarget::Texture2D;
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
|
||||
params.width = config.width;
|
||||
params.height = config.height;
|
||||
params.pitch = config.pitch;
|
||||
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
|
||||
// TODO(Rodrigo): Try to guess texture arrays from parameters
|
||||
params.target = SurfaceTarget::Texture2D;
|
||||
params.depth = 1;
|
||||
params.num_levels = 1;
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
|
||||
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
|
||||
|
||||
template <typename TSurface, typename TView>
|
||||
class TextureCache {
|
||||
using VectorSurface = boost::container::small_vector<TSurface, 1>;
|
||||
|
||||
public:
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
@@ -296,30 +298,30 @@ public:
|
||||
const GPUVAddr src_gpu_addr = src_config.Address();
|
||||
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
||||
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
||||
const std::optional<VAddr> dst_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
|
||||
const std::optional<VAddr> src_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
|
||||
std::pair<TSurface, TView> dst_surface =
|
||||
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
|
||||
std::pair<TSurface, TView> src_surface =
|
||||
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
|
||||
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
||||
|
||||
const auto& memory_manager = system.GPU().MemoryManager();
|
||||
const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
|
||||
const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
|
||||
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
|
||||
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
|
||||
ImageBlit(src_surface, dst_surface.second, copy_config);
|
||||
dst_surface.first->MarkAsModified(true, Tick());
|
||||
}
|
||||
|
||||
TSurface TryFindFramebufferSurface(VAddr addr) {
|
||||
TSurface TryFindFramebufferSurface(VAddr addr) const {
|
||||
if (!addr) {
|
||||
return nullptr;
|
||||
}
|
||||
const VAddr page = addr >> registry_page_bits;
|
||||
std::vector<TSurface>& list = registry[page];
|
||||
for (auto& surface : list) {
|
||||
if (surface->GetCpuAddr() == addr) {
|
||||
return surface;
|
||||
}
|
||||
const auto it = registry.find(page);
|
||||
if (it == registry.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return nullptr;
|
||||
const auto& list = it->second;
|
||||
const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
|
||||
return surface->GetCpuAddr() == addr;
|
||||
});
|
||||
return found != list.end() ? *found : nullptr;
|
||||
}
|
||||
|
||||
u64 Tick() {
|
||||
@@ -498,18 +500,18 @@ private:
|
||||
* @param untopological Indicates to the recycler that the texture has no way
|
||||
* to match the overlaps due to topological reasons.
|
||||
**/
|
||||
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
|
||||
RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
// 3D Textures decision
|
||||
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
for (const auto& s : overlaps) {
|
||||
const auto& s_params = s->GetSurfaceParams();
|
||||
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
|
||||
if (s_params.target == SurfaceTarget::Texture3D) {
|
||||
return RecycleStrategy::Flush;
|
||||
}
|
||||
}
|
||||
@@ -538,9 +540,8 @@ private:
|
||||
* @param untopological Indicates to the recycler that the texture has no way to match the
|
||||
* overlaps due to topological reasons.
|
||||
**/
|
||||
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
|
||||
const SurfaceParams& params, const GPUVAddr gpu_addr,
|
||||
const bool preserve_contents,
|
||||
std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr, const bool preserve_contents,
|
||||
const MatchTopologyResult untopological) {
|
||||
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
|
||||
for (auto& surface : overlaps) {
|
||||
@@ -650,47 +651,65 @@ private:
|
||||
* @param params The parameters on the new surface.
|
||||
* @param gpu_addr The starting address of the new surface.
|
||||
**/
|
||||
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
|
||||
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr) {
|
||||
GPUVAddr gpu_addr) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
return {};
|
||||
return std::nullopt;
|
||||
}
|
||||
bool modified = false;
|
||||
const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
|
||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
u32 passed_tests = 0;
|
||||
|
||||
if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
|
||||
LoadSurface(new_surface);
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
Register(new_surface);
|
||||
return {{new_surface, new_surface->GetMainView()}};
|
||||
}
|
||||
|
||||
std::size_t passed_tests = 0;
|
||||
for (auto& surface : overlaps) {
|
||||
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||
if (src_params.is_layered || src_params.num_levels > 1) {
|
||||
// We send this cases to recycle as they are more complex to handle
|
||||
return {};
|
||||
}
|
||||
const std::size_t candidate_size = surface->GetSizeInBytes();
|
||||
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
|
||||
const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
|
||||
if (!mipmap_layer) {
|
||||
continue;
|
||||
}
|
||||
const auto [layer, mipmap] = *mipmap_layer;
|
||||
if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
|
||||
const auto [base_layer, base_mipmap] = *mipmap_layer;
|
||||
if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
|
||||
continue;
|
||||
}
|
||||
modified |= surface->IsModified();
|
||||
// Now we got all the data set up
|
||||
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
|
||||
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
|
||||
passed_tests++;
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
++passed_tests;
|
||||
|
||||
// Copy all mipmaps and layers
|
||||
const u32 block_width = params.GetDefaultBlockWidth();
|
||||
const u32 block_height = params.GetDefaultBlockHeight();
|
||||
for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
|
||||
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
|
||||
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
|
||||
if (width < block_width || height < block_height) {
|
||||
// Current APIs forbid copying small compressed textures, avoid errors
|
||||
break;
|
||||
}
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
|
||||
src_params.depth);
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
}
|
||||
}
|
||||
if (passed_tests == 0) {
|
||||
return {};
|
||||
// In Accurate GPU all tests should pass, else we recycle
|
||||
} else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
|
||||
return {};
|
||||
return std::nullopt;
|
||||
}
|
||||
if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
|
||||
// In Accurate GPU all tests should pass, else we recycle
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
|
||||
new_surface->MarkAsModified(modified, Tick());
|
||||
Register(new_surface);
|
||||
return {{new_surface, new_surface->GetMainView()}};
|
||||
@@ -708,53 +727,11 @@ private:
|
||||
* @param preserve_contents Indicates that the new surface should be loaded from memory or
|
||||
* left blank.
|
||||
*/
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr,
|
||||
const VAddr cpu_addr,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
bool preserve_contents) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
bool failed = false;
|
||||
if (params.num_levels > 1) {
|
||||
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
|
||||
return std::nullopt;
|
||||
}
|
||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
bool modified = false;
|
||||
for (auto& surface : overlaps) {
|
||||
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||
if (src_params.target != SurfaceTarget::Texture2D) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
if (src_params.height != params.height) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
if (src_params.block_depth != params.block_depth ||
|
||||
src_params.block_height != params.block_height) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
|
||||
const auto offsets = params.GetBlockOffsetXYZ(offset);
|
||||
const auto z = std::get<2>(offsets);
|
||||
modified |= surface->IsModified();
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
|
||||
1);
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
}
|
||||
if (failed) {
|
||||
return std::nullopt;
|
||||
}
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
new_surface->MarkAsModified(modified, Tick());
|
||||
Register(new_surface);
|
||||
auto view = new_surface->GetMainView();
|
||||
return {{std::move(new_surface), view}};
|
||||
} else {
|
||||
if (params.target != SurfaceTarget::Texture3D) {
|
||||
for (const auto& surface : overlaps) {
|
||||
if (!surface->MatchTarget(params.target)) {
|
||||
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
|
||||
@@ -770,11 +747,60 @@ private:
|
||||
continue;
|
||||
}
|
||||
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
|
||||
return {{surface, surface->GetMainView()}};
|
||||
return std::make_pair(surface, surface->GetMainView());
|
||||
}
|
||||
}
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
}
|
||||
|
||||
if (params.num_levels > 1) {
|
||||
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (overlaps.size() == 1) {
|
||||
const auto& surface = overlaps[0];
|
||||
const SurfaceParams& overlap_params = surface->GetSurfaceParams();
|
||||
// Don't attempt to render to textures with more than one level for now
|
||||
// The texture has to be to the right or the sample address if we want to render to it
|
||||
if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
|
||||
const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
|
||||
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
|
||||
if (slice < overlap_params.depth) {
|
||||
auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
|
||||
return std::make_pair(std::move(surface), std::move(view));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
bool modified = false;
|
||||
|
||||
for (auto& surface : overlaps) {
|
||||
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||
if (src_params.target != SurfaceTarget::Texture2D ||
|
||||
src_params.height != params.height ||
|
||||
src_params.block_depth != params.block_depth ||
|
||||
src_params.block_height != params.block_height) {
|
||||
return std::nullopt;
|
||||
}
|
||||
modified |= surface->IsModified();
|
||||
|
||||
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
|
||||
const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
|
||||
const u32 width = params.width;
|
||||
const u32 height = params.height;
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
|
||||
ImageCopy(surface, new_surface, copy_params);
|
||||
}
|
||||
for (const auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
new_surface->MarkAsModified(modified, Tick());
|
||||
Register(new_surface);
|
||||
|
||||
TView view = new_surface->GetMainView();
|
||||
return std::make_pair(std::move(new_surface), std::move(view));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -810,7 +836,7 @@ private:
|
||||
TSurface& current_surface = iter->second;
|
||||
const auto topological_result = current_surface->MatchesTopology(params);
|
||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||
std::vector<TSurface> overlaps{current_surface};
|
||||
VectorSurface overlaps{current_surface};
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
topological_result);
|
||||
}
|
||||
@@ -852,7 +878,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's a 3D texture
|
||||
// Manage 3D textures
|
||||
if (params.block_depth > 0) {
|
||||
auto surface =
|
||||
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
|
||||
@@ -868,12 +894,9 @@ private:
|
||||
// two things either the candidate surface is a supertexture of the overlap
|
||||
// or they don't match in any known way.
|
||||
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
|
||||
if (current_surface->GetGpuAddr() == gpu_addr) {
|
||||
std::optional<std::pair<TSurface, TView>> view =
|
||||
TryReconstructSurface(overlaps, params, gpu_addr);
|
||||
if (view) {
|
||||
return *view;
|
||||
}
|
||||
const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
|
||||
if (view) {
|
||||
return *view;
|
||||
}
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
MatchTopologyResult::FullMatch);
|
||||
@@ -1126,23 +1149,25 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
|
||||
VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
VAddr start = cpu_addr >> registry_page_bits;
|
||||
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
|
||||
std::vector<TSurface> surfaces;
|
||||
while (start <= end) {
|
||||
std::vector<TSurface>& list = registry[start];
|
||||
for (auto& surface : list) {
|
||||
if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
|
||||
surface->MarkAsPicked(true);
|
||||
surfaces.push_back(surface);
|
||||
}
|
||||
VectorSurface surfaces;
|
||||
for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
|
||||
const auto it = registry.find(start);
|
||||
if (it == registry.end()) {
|
||||
continue;
|
||||
}
|
||||
for (auto& surface : it->second) {
|
||||
if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
|
||||
continue;
|
||||
}
|
||||
surface->MarkAsPicked(true);
|
||||
surfaces.push_back(surface);
|
||||
}
|
||||
start++;
|
||||
}
|
||||
for (auto& surface : surfaces) {
|
||||
surface->MarkAsPicked(false);
|
||||
|
||||
@@ -106,6 +106,9 @@ public:
|
||||
format.setVersion(4, 3);
|
||||
format.setProfile(QSurfaceFormat::CompatibilityProfile);
|
||||
format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
|
||||
if (Settings::values.renderer_debug) {
|
||||
format.setOption(QSurfaceFormat::FormatOption::DebugContext);
|
||||
}
|
||||
// TODO: expose a setting for buffer value (ie default/single/double/triple)
|
||||
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
|
||||
format.setSwapInterval(0);
|
||||
|
||||
@@ -533,6 +533,8 @@ void Config::ReadDebuggingValues() {
|
||||
Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
|
||||
Settings::values.disable_cpu_opt =
|
||||
ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool();
|
||||
Settings::values.disable_macro_jit =
|
||||
ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool();
|
||||
|
||||
qt_config->endGroup();
|
||||
}
|
||||
@@ -629,13 +631,11 @@ void Config::ReadRendererValues() {
|
||||
static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
|
||||
Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
|
||||
Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
|
||||
Settings::values.resolution_factor =
|
||||
ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
|
||||
Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
|
||||
Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
|
||||
Settings::values.use_frame_limit =
|
||||
ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
|
||||
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
|
||||
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
|
||||
Settings::values.use_disk_shader_cache =
|
||||
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
|
||||
const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
|
||||
@@ -720,8 +720,6 @@ void Config::ReadUIValues() {
|
||||
.toString();
|
||||
UISettings::values.enable_discord_presence =
|
||||
ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
|
||||
UISettings::values.screenshot_resolution_factor =
|
||||
static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
|
||||
UISettings::values.select_user_on_boot =
|
||||
ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
|
||||
|
||||
@@ -1011,6 +1009,7 @@ void Config::SaveDebuggingValues() {
|
||||
WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
|
||||
WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
|
||||
WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false);
|
||||
WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false);
|
||||
|
||||
qt_config->endGroup();
|
||||
}
|
||||
@@ -1079,8 +1078,6 @@ void Config::SaveRendererValues() {
|
||||
WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
|
||||
WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
|
||||
WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
|
||||
WriteSetting(QStringLiteral("resolution_factor"),
|
||||
static_cast<double>(Settings::values.resolution_factor), 1.0);
|
||||
WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
|
||||
WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
|
||||
WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
|
||||
@@ -1156,8 +1153,6 @@ void Config::SaveUIValues() {
|
||||
QString::fromUtf8(UISettings::themes[0].second));
|
||||
WriteSetting(QStringLiteral("enable_discord_presence"),
|
||||
UISettings::values.enable_discord_presence, true);
|
||||
WriteSetting(QStringLiteral("screenshot_resolution_factor"),
|
||||
UISettings::values.screenshot_resolution_factor, 0);
|
||||
WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
|
||||
false);
|
||||
|
||||
|
||||
@@ -39,6 +39,8 @@ void ConfigureDebug::SetConfiguration() {
|
||||
ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt);
|
||||
ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
|
||||
ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit);
|
||||
}
|
||||
|
||||
void ConfigureDebug::ApplyConfiguration() {
|
||||
@@ -51,6 +53,7 @@ void ConfigureDebug::ApplyConfiguration() {
|
||||
Settings::values.quest_flag = ui->quest_flag->isChecked();
|
||||
Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked();
|
||||
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
|
||||
Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
|
||||
Debugger::ToggleConsole();
|
||||
Log::Filter filter;
|
||||
filter.ParseFilterString(Settings::values.log_filter);
|
||||
|
||||
@@ -148,6 +148,19 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="disable_macro_jit">
|
||||
<property name="enabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="whatsThis">
|
||||
<string>When checked, it disables the macro Just In Time compiler. Enabled this makes games run slower</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Disable Macro JIT</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
||||
@@ -19,47 +19,6 @@
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
enum class Resolution : int {
|
||||
Auto,
|
||||
Scale1x,
|
||||
Scale2x,
|
||||
Scale3x,
|
||||
Scale4x,
|
||||
};
|
||||
|
||||
float ToResolutionFactor(Resolution option) {
|
||||
switch (option) {
|
||||
case Resolution::Auto:
|
||||
return 0.f;
|
||||
case Resolution::Scale1x:
|
||||
return 1.f;
|
||||
case Resolution::Scale2x:
|
||||
return 2.f;
|
||||
case Resolution::Scale3x:
|
||||
return 3.f;
|
||||
case Resolution::Scale4x:
|
||||
return 4.f;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
Resolution FromResolutionFactor(float factor) {
|
||||
if (factor == 0.f) {
|
||||
return Resolution::Auto;
|
||||
} else if (factor == 1.f) {
|
||||
return Resolution::Scale1x;
|
||||
} else if (factor == 2.f) {
|
||||
return Resolution::Scale2x;
|
||||
} else if (factor == 3.f) {
|
||||
return Resolution::Scale3x;
|
||||
} else if (factor == 4.f) {
|
||||
return Resolution::Scale4x;
|
||||
}
|
||||
return Resolution::Auto;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
: QWidget(parent), ui(new Ui::ConfigureGraphics) {
|
||||
vulkan_device = Settings::values.vulkan_device;
|
||||
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
|
||||
ui->api->setEnabled(runtime_lock);
|
||||
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
|
||||
ui->resolution_factor_combobox->setCurrentIndex(
|
||||
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
|
||||
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
|
||||
ui->use_disk_shader_cache->setEnabled(runtime_lock);
|
||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
|
||||
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
void ConfigureGraphics::ApplyConfiguration() {
|
||||
Settings::values.renderer_backend = GetCurrentGraphicsBackend();
|
||||
Settings::values.vulkan_device = vulkan_device;
|
||||
Settings::values.resolution_factor =
|
||||
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
|
||||
Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
|
||||
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
|
||||
@@ -84,46 +84,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||
<item>
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>Internal Resolution:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QComboBox" name="resolution_factor_combobox">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Auto (Window Size)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Native (1280x720)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>2x Native (2560x1440)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>3x Native (3840x2160)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>4x Native (5120x2880)</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_6">
|
||||
<item>
|
||||
|
||||
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
|
||||
|
||||
ui->setupUi(this);
|
||||
|
||||
// TODO: Remove this after assembly shaders are fully integrated
|
||||
ui->use_assembly_shaders->setVisible(false);
|
||||
|
||||
SetConfiguration();
|
||||
}
|
||||
|
||||
|
||||
@@ -480,7 +480,9 @@ void ConfigureInputPlayer::RestoreDefaults() {
|
||||
SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]);
|
||||
}
|
||||
}
|
||||
|
||||
UpdateButtonLabels();
|
||||
ApplyConfiguration();
|
||||
}
|
||||
|
||||
void ConfigureInputPlayer::ClearAll() {
|
||||
@@ -505,6 +507,7 @@ void ConfigureInputPlayer::ClearAll() {
|
||||
}
|
||||
|
||||
UpdateButtonLabels();
|
||||
ApplyConfiguration();
|
||||
}
|
||||
|
||||
void ConfigureInputPlayer::UpdateButtonLabels() {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user