Compare commits
37 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf137ea40b | ||
|
|
de644d506f | ||
|
|
bfa6193eb9 | ||
|
|
778043a44c | ||
|
|
2f420618ea | ||
|
|
be660e7749 | ||
|
|
8868fb745f | ||
|
|
edb2114bac | ||
|
|
1394a581f2 | ||
|
|
52bcfac116 | ||
|
|
109df7705f | ||
|
|
32a127faaa | ||
|
|
a8ac99b619 | ||
|
|
b660ef6c8a | ||
|
|
0f57bbfa3f | ||
|
|
2a3d4cad63 | ||
|
|
798ec003ce | ||
|
|
f22d02083c | ||
|
|
e2f5d16540 | ||
|
|
ed2cd9d8f3 | ||
|
|
0bd9bc7201 | ||
|
|
256cb2979b | ||
|
|
cf0aad7d6a | ||
|
|
1799f4e774 | ||
|
|
c09a9e5cc7 | ||
|
|
79aa7b3ace | ||
|
|
35db6e1c68 | ||
|
|
389549b80d | ||
|
|
a6a43a5ae0 | ||
|
|
5c9dee2c94 | ||
|
|
6e5d8aac4d | ||
|
|
87011a97f9 | ||
|
|
a63a0daa5e | ||
|
|
d89888389d | ||
|
|
6508cdd003 | ||
|
|
03fad5ebe8 | ||
|
|
ea14af2164 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -13,6 +13,9 @@
|
||||
[submodule "soundtouch"]
|
||||
path = externals/soundtouch
|
||||
url = https://github.com/citra-emu/ext-soundtouch.git
|
||||
[submodule "libressl"]
|
||||
path = externals/libressl
|
||||
url = https://github.com/citra-emu/ext-libressl-portable.git
|
||||
[submodule "discord-rpc"]
|
||||
path = externals/discord-rpc
|
||||
url = https://github.com/discordapp/discord-rpc.git
|
||||
|
||||
@@ -152,7 +152,6 @@ macro(yuzu_find_packages)
|
||||
"Boost 1.71 boost/1.72.0"
|
||||
"Catch2 2.11 catch2/2.11.0"
|
||||
"fmt 6.2 fmt/6.2.0"
|
||||
"OpenSSL 1.1 openssl/1.1.1f"
|
||||
# can't use until https://github.com/bincrafters/community/issues/1173
|
||||
#"libzip 1.5 libzip/1.5.2@bincrafters/stable"
|
||||
"lz4 1.8 lz4/1.9.2"
|
||||
@@ -312,15 +311,6 @@ elseif (TARGET Boost::boost)
|
||||
add_library(boost ALIAS Boost::boost)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET OpenSSL::SSL)
|
||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
|
||||
endif()
|
||||
if (NOT TARGET OpenSSL::Crypto)
|
||||
set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
|
||||
endif()
|
||||
|
||||
if (TARGET sdl2::sdl2)
|
||||
# imported from the conan generated sdl2Config.cmake
|
||||
set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
|
||||
@@ -51,6 +51,8 @@ endif()
|
||||
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
|
||||
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
|
||||
set(HASH_FILES
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
4
dist/qt_themes/qdarkstyle/style.qss
vendored
4
dist/qt_themes/qdarkstyle/style.qss
vendored
@@ -673,10 +673,6 @@ QTabWidget::pane {
|
||||
border-bottom-left-radius: 2px;
|
||||
}
|
||||
|
||||
QTabWidget::tab-bar {
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
QTabBar {
|
||||
qproperty-drawBase: 0;
|
||||
border-radius: 3px;
|
||||
|
||||
26
externals/CMakeLists.txt
vendored
26
externals/CMakeLists.txt
vendored
@@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
|
||||
include(DownloadExternals)
|
||||
|
||||
# xbyak
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
|
||||
# Catch
|
||||
add_library(catch-single-include INTERFACE)
|
||||
target_include_directories(catch-single-include INTERFACE catch/single_include)
|
||||
@@ -66,6 +73,15 @@ if (NOT LIBZIP_FOUND)
|
||||
endif()
|
||||
|
||||
if (ENABLE_WEB_SERVICE)
|
||||
# LibreSSL
|
||||
set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
|
||||
add_subdirectory(libressl EXCLUDE_FROM_ALL)
|
||||
target_include_directories(ssl INTERFACE ./libressl/include)
|
||||
target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
|
||||
get_directory_property(OPENSSL_LIBRARIES
|
||||
DIRECTORY libressl
|
||||
DEFINITION OPENSSL_LIBS)
|
||||
|
||||
# lurlparser
|
||||
add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
|
||||
|
||||
@@ -73,13 +89,5 @@ if (ENABLE_WEB_SERVICE)
|
||||
add_library(httplib INTERFACE)
|
||||
target_include_directories(httplib INTERFACE ./httplib)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET xbyak)
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
|
||||
endif()
|
||||
|
||||
1
externals/libressl
vendored
Submodule
1
externals/libressl
vendored
Submodule
Submodule externals/libressl added at 7d01cb01cb
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
||||
DEPENDS
|
||||
# WARNING! It was too much work to try and make a common location for this list,
|
||||
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
|
||||
|
||||
@@ -50,7 +50,8 @@ public:
|
||||
}
|
||||
|
||||
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
|
||||
UNIMPLEMENTED();
|
||||
UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
|
||||
MemoryReadCode(pc));
|
||||
}
|
||||
|
||||
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
||||
@@ -89,8 +90,6 @@ public:
|
||||
|
||||
ARM_Dynarmic_32& parent;
|
||||
std::size_t num_interpreted_instructions{};
|
||||
u64 tpidrro_el0{};
|
||||
u64 tpidr_el0{};
|
||||
};
|
||||
|
||||
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
|
||||
@@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
|
||||
config.callbacks = cb.get();
|
||||
// TODO(bunnei): Implement page table for 32-bit
|
||||
// config.page_table = &page_table.pointers;
|
||||
config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
|
||||
config.coprocessors[15] = cp15;
|
||||
config.define_unpredictable_behaviour = true;
|
||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||
}
|
||||
@@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::Step() {
|
||||
cb->InterpreterFallback(jit->Regs()[15], 1);
|
||||
jit->Step();
|
||||
}
|
||||
|
||||
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
|
||||
std::size_t core_index)
|
||||
: ARM_Interface{system},
|
||||
cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
|
||||
: ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
|
||||
cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
|
||||
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
|
||||
|
||||
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
|
||||
@@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
|
||||
}
|
||||
|
||||
u64 ARM_Dynarmic_32::GetTlsAddress() const {
|
||||
return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
||||
return cp15->uro;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
|
||||
CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
|
||||
cp15->uro = static_cast<u32>(address);
|
||||
}
|
||||
|
||||
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
|
||||
return cb->tpidr_el0;
|
||||
return cp15->uprw;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
|
||||
cb->tpidr_el0 = value;
|
||||
cp15->uprw = static_cast<u32>(value);
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
|
||||
|
||||
@@ -22,6 +22,7 @@ class Memory;
|
||||
namespace Core {
|
||||
|
||||
class DynarmicCallbacks32;
|
||||
class DynarmicCP15;
|
||||
class DynarmicExclusiveMonitor;
|
||||
class System;
|
||||
|
||||
@@ -66,12 +67,14 @@ private:
|
||||
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
|
||||
|
||||
friend class DynarmicCallbacks32;
|
||||
friend class DynarmicCP15;
|
||||
|
||||
std::unique_ptr<DynarmicCallbacks32> cb;
|
||||
JitCacheType jit_cache;
|
||||
std::shared_ptr<Dynarmic::A32::Jit> jit;
|
||||
std::shared_ptr<DynarmicCP15> cp15;
|
||||
std::size_t core_index;
|
||||
DynarmicExclusiveMonitor& exclusive_monitor;
|
||||
std::array<u32, 84> CP15_regs{};
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -2,79 +2,132 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/logging/log.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_32.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
|
||||
using Callback = Dynarmic::A32::Coprocessor::Callback;
|
||||
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
|
||||
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Dynarmic::A32::CoprocReg> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
template <typename FormatContext>
|
||||
auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
|
||||
return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
|
||||
}
|
||||
};
|
||||
|
||||
namespace Core {
|
||||
|
||||
static u32 dummy_value;
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
|
||||
CoprocReg CRd, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
|
||||
CRm, opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
// TODO(merry): Privileged CP15 registers
|
||||
|
||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
|
||||
// CP15_FLUSH_PREFETCH_BUFFER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
|
||||
return &dummy_value;
|
||||
}
|
||||
|
||||
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
|
||||
switch (opc2) {
|
||||
case 4:
|
||||
// CP15_DATA_SYNC_BARRIER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
|
||||
return &dummy_value;
|
||||
case 5:
|
||||
// CP15_DATA_MEMORY_BARRIER
|
||||
// This is a dummy write, we ignore the value written here.
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
|
||||
default:
|
||||
return {};
|
||||
return &dummy_value;
|
||||
}
|
||||
}
|
||||
|
||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
||||
// CP15_THREAD_UPRW
|
||||
return &uprw;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||
opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
|
||||
CoprocReg CRm, unsigned opc2) {
|
||||
// TODO(merry): Privileged CP15 registers
|
||||
|
||||
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
|
||||
switch (opc2) {
|
||||
case 2:
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
|
||||
// CP15_THREAD_UPRW
|
||||
return &uprw;
|
||||
case 3:
|
||||
return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
|
||||
default:
|
||||
return {};
|
||||
// CP15_THREAD_URO
|
||||
return &uro;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
|
||||
opc2);
|
||||
return {};
|
||||
}
|
||||
|
||||
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
|
||||
if (!two && opc == 0 && CRm == CoprocReg::C14) {
|
||||
// CNTPCT
|
||||
const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
|
||||
[](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
|
||||
ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
|
||||
return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
|
||||
});
|
||||
return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) {
|
||||
if (option) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd, *option);
|
||||
} else {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) {
|
||||
if (option) {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd, *option);
|
||||
} else {
|
||||
LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
|
||||
long_transfer ? "l" : "", CRd);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -10,128 +10,15 @@
|
||||
#include <dynarmic/A32/coprocessor.h>
|
||||
#include "common/common_types.h"
|
||||
|
||||
enum class CP15Register {
|
||||
// c0 - Information registers
|
||||
CP15_MAIN_ID,
|
||||
CP15_CACHE_TYPE,
|
||||
CP15_TCM_STATUS,
|
||||
CP15_TLB_TYPE,
|
||||
CP15_CPU_ID,
|
||||
CP15_PROCESSOR_FEATURE_0,
|
||||
CP15_PROCESSOR_FEATURE_1,
|
||||
CP15_DEBUG_FEATURE_0,
|
||||
CP15_AUXILIARY_FEATURE_0,
|
||||
CP15_MEMORY_MODEL_FEATURE_0,
|
||||
CP15_MEMORY_MODEL_FEATURE_1,
|
||||
CP15_MEMORY_MODEL_FEATURE_2,
|
||||
CP15_MEMORY_MODEL_FEATURE_3,
|
||||
CP15_ISA_FEATURE_0,
|
||||
CP15_ISA_FEATURE_1,
|
||||
CP15_ISA_FEATURE_2,
|
||||
CP15_ISA_FEATURE_3,
|
||||
CP15_ISA_FEATURE_4,
|
||||
namespace Core {
|
||||
|
||||
// c1 - Control registers
|
||||
CP15_CONTROL,
|
||||
CP15_AUXILIARY_CONTROL,
|
||||
CP15_COPROCESSOR_ACCESS_CONTROL,
|
||||
|
||||
// c2 - Translation table registers
|
||||
CP15_TRANSLATION_BASE_TABLE_0,
|
||||
CP15_TRANSLATION_BASE_TABLE_1,
|
||||
CP15_TRANSLATION_BASE_CONTROL,
|
||||
CP15_DOMAIN_ACCESS_CONTROL,
|
||||
CP15_RESERVED,
|
||||
|
||||
// c5 - Fault status registers
|
||||
CP15_FAULT_STATUS,
|
||||
CP15_INSTR_FAULT_STATUS,
|
||||
CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
|
||||
CP15_INST_FSR,
|
||||
|
||||
// c6 - Fault Address registers
|
||||
CP15_FAULT_ADDRESS,
|
||||
CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
|
||||
CP15_WFAR,
|
||||
CP15_IFAR,
|
||||
|
||||
// c7 - Cache operation registers
|
||||
CP15_WAIT_FOR_INTERRUPT,
|
||||
CP15_PHYS_ADDRESS,
|
||||
CP15_INVALIDATE_INSTR_CACHE,
|
||||
CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
|
||||
CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
|
||||
CP15_FLUSH_PREFETCH_BUFFER,
|
||||
CP15_FLUSH_BRANCH_TARGET_CACHE,
|
||||
CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
|
||||
CP15_INVALIDATE_DATA_CACHE,
|
||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
||||
CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
|
||||
CP15_CLEAN_DATA_CACHE,
|
||||
CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
|
||||
CP15_DATA_SYNC_BARRIER,
|
||||
CP15_DATA_MEMORY_BARRIER,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
|
||||
CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
|
||||
|
||||
// c8 - TLB operations
|
||||
CP15_INVALIDATE_ITLB,
|
||||
CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
|
||||
CP15_INVALIDATE_DTLB,
|
||||
CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
|
||||
CP15_INVALIDATE_UTLB,
|
||||
CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
|
||||
CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
|
||||
CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
|
||||
|
||||
// c9 - Data cache lockdown register
|
||||
CP15_DATA_CACHE_LOCKDOWN,
|
||||
|
||||
// c10 - TLB/Memory map registers
|
||||
CP15_TLB_LOCKDOWN,
|
||||
CP15_PRIMARY_REGION_REMAP,
|
||||
CP15_NORMAL_REGION_REMAP,
|
||||
|
||||
// c13 - Thread related registers
|
||||
CP15_PID,
|
||||
CP15_CONTEXT_ID,
|
||||
CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
|
||||
CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
|
||||
CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
|
||||
|
||||
// c15 - Performance and TLB lockdown registers
|
||||
CP15_PERFORMANCE_MONITOR_CONTROL,
|
||||
CP15_CYCLE_COUNTER,
|
||||
CP15_COUNT_0,
|
||||
CP15_COUNT_1,
|
||||
CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
|
||||
CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
|
||||
CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
|
||||
CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
|
||||
CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
|
||||
CP15_TLB_DEBUG_CONTROL,
|
||||
|
||||
// Skyeye defined
|
||||
CP15_TLB_FAULT_ADDR,
|
||||
CP15_TLB_FAULT_STATUS,
|
||||
|
||||
// Not an actual register.
|
||||
// All registers should be defined above this.
|
||||
CP15_REGISTER_COUNT,
|
||||
};
|
||||
class ARM_Dynarmic_32;
|
||||
|
||||
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
|
||||
public:
|
||||
using CoprocReg = Dynarmic::A32::CoprocReg;
|
||||
|
||||
explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
|
||||
explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
|
||||
|
||||
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
|
||||
CoprocReg CRn, CoprocReg CRm,
|
||||
@@ -147,6 +34,9 @@ public:
|
||||
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
|
||||
std::optional<u8> option) override;
|
||||
|
||||
private:
|
||||
u32* CP15{};
|
||||
ARM_Dynarmic_32& parent;
|
||||
u32 uprw;
|
||||
u32 uro;
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -437,7 +437,7 @@ struct Values {
|
||||
bool renderer_debug;
|
||||
int vulkan_device;
|
||||
|
||||
float resolution_factor;
|
||||
u16 resolution_factor{1};
|
||||
int aspect_ratio;
|
||||
int max_anisotropy;
|
||||
bool use_frame_limit;
|
||||
|
||||
@@ -52,6 +52,8 @@ add_library(video_core STATIC
|
||||
rasterizer_interface.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
renderer_opengl/gl_arb_decompiler.cpp
|
||||
renderer_opengl/gl_arb_decompiler.h
|
||||
renderer_opengl/gl_buffer_cache.cpp
|
||||
renderer_opengl/gl_buffer_cache.h
|
||||
renderer_opengl/gl_device.cpp
|
||||
|
||||
@@ -15,48 +15,47 @@ namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
bool Overlaps(const VAddr start, const VAddr end) const {
|
||||
bool Overlaps(VAddr start, VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
bool IsInside(const VAddr other_start, const VAddr other_end) const {
|
||||
bool IsInside(VAddr other_start, VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
std::size_t GetOffset(const VAddr in_addr) {
|
||||
std::size_t Offset(VAddr in_addr) const {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
VAddr GetCpuAddr() const {
|
||||
VAddr CpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
VAddr GetCpuAddrEnd() const {
|
||||
VAddr CpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
void SetCpuAddr(const VAddr new_addr) {
|
||||
void SetCpuAddr(VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
std::size_t Size() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
u64 Epoch() const {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
void SetEpoch(u64 new_epoch) {
|
||||
epoch = new_epoch;
|
||||
}
|
||||
|
||||
u64 GetEpoch() {
|
||||
return epoch;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
|
||||
SetCpuAddr(cpu_addr);
|
||||
explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
|
||||
SetCpuAddr(cpu_addr_);
|
||||
}
|
||||
~BufferBlock() = default;
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
|
||||
@@ -30,12 +30,16 @@
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
|
||||
template <typename Buffer, typename BufferType, typename StreamBuffer>
|
||||
class BufferCache {
|
||||
using IntervalSet = boost::icl::interval_set<VAddr>;
|
||||
using IntervalType = typename IntervalSet::interval_type;
|
||||
using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
|
||||
|
||||
static constexpr u64 WRITE_PAGE_BIT = 11;
|
||||
static constexpr u64 BLOCK_PAGE_BITS = 21;
|
||||
static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
|
||||
|
||||
public:
|
||||
using BufferInfo = std::pair<BufferType, u64>;
|
||||
|
||||
@@ -82,7 +86,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
OwnerBuffer block = GetBlock(cpu_addr, size);
|
||||
Buffer* const block = GetBlock(cpu_addr, size);
|
||||
MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||
if (!map) {
|
||||
return {GetEmptyBuffer(size), 0};
|
||||
@@ -98,7 +102,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
|
||||
return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))};
|
||||
}
|
||||
|
||||
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
|
||||
@@ -129,16 +133,18 @@ public:
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
||||
/// Function called at the end of each frame, inteded for deferred operations
|
||||
void TickFrame() {
|
||||
++epoch;
|
||||
|
||||
while (!pending_destruction.empty()) {
|
||||
// Delay at least 4 frames before destruction.
|
||||
// This is due to triple buffering happening on some drivers.
|
||||
static constexpr u64 epochs_to_destroy = 5;
|
||||
if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
|
||||
if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
|
||||
break;
|
||||
}
|
||||
pending_destruction.pop_front();
|
||||
pending_destruction.pop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,23 +259,21 @@ public:
|
||||
|
||||
protected:
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer)
|
||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
|
||||
stream_buffer_handle{this->stream_buffer->GetHandle()} {}
|
||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
||||
: rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)},
|
||||
stream_buffer_handle{stream_buffer->Handle()} {}
|
||||
|
||||
~BufferCache() = default;
|
||||
|
||||
virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
|
||||
virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||
|
||||
virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||
|
||||
virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) = 0;
|
||||
|
||||
virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) = 0;
|
||||
|
||||
virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
|
||||
virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) = 0;
|
||||
|
||||
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
|
||||
@@ -325,7 +329,7 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||
std::size_t size) {
|
||||
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
|
||||
if (overlaps.empty()) {
|
||||
@@ -333,11 +337,11 @@ private:
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
|
||||
UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
|
||||
UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data());
|
||||
}
|
||||
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
|
||||
}
|
||||
@@ -380,7 +384,7 @@ private:
|
||||
return map;
|
||||
}
|
||||
|
||||
void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
|
||||
void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
|
||||
const VectorMapInterval& overlaps) {
|
||||
const IntervalType base_interval{start, end};
|
||||
IntervalSet interval_set{};
|
||||
@@ -390,13 +394,13 @@ private:
|
||||
interval_set.subtract(subtract);
|
||||
}
|
||||
for (auto& interval : interval_set) {
|
||||
std::size_t size = interval.upper() - interval.lower();
|
||||
if (size > 0) {
|
||||
staging_buffer.resize(size);
|
||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(interval.lower()), size,
|
||||
staging_buffer.data());
|
||||
const std::size_t size = interval.upper() - interval.lower();
|
||||
if (size == 0) {
|
||||
continue;
|
||||
}
|
||||
staging_buffer.resize(size);
|
||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||
UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -426,10 +430,14 @@ private:
|
||||
}
|
||||
|
||||
void FlushMap(MapInterval* map) {
|
||||
const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
|
||||
ASSERT_OR_EXECUTE(it != blocks.end(), return;);
|
||||
|
||||
std::shared_ptr<Buffer> block = it->second;
|
||||
|
||||
const std::size_t size = map->end - map->start;
|
||||
OwnerBuffer block = blocks[map->start >> block_page_bits];
|
||||
staging_buffer.resize(size);
|
||||
DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
|
||||
DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data());
|
||||
system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
|
||||
map->MarkAsModified(false, 0);
|
||||
}
|
||||
@@ -452,97 +460,89 @@ private:
|
||||
buffer_offset = offset_aligned;
|
||||
}
|
||||
|
||||
OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
|
||||
const std::size_t old_size = buffer->GetSize();
|
||||
const std::size_t new_size = old_size + block_page_size;
|
||||
const VAddr cpu_addr = buffer->GetCpuAddr();
|
||||
OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
|
||||
CopyBlock(buffer, new_buffer, 0, 0, old_size);
|
||||
buffer->SetEpoch(epoch);
|
||||
pending_destruction.push_back(buffer);
|
||||
std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
|
||||
const std::size_t old_size = buffer->Size();
|
||||
const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
|
||||
const VAddr cpu_addr = buffer->CpuAddr();
|
||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
|
||||
CopyBlock(*buffer, *new_buffer, 0, 0, old_size);
|
||||
QueueDestruction(std::move(buffer));
|
||||
|
||||
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
blocks.insert_or_assign(page_start, new_buffer);
|
||||
}
|
||||
|
||||
return new_buffer;
|
||||
}
|
||||
|
||||
OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
|
||||
const std::size_t size_1 = first->GetSize();
|
||||
const std::size_t size_2 = second->GetSize();
|
||||
const VAddr first_addr = first->GetCpuAddr();
|
||||
const VAddr second_addr = second->GetCpuAddr();
|
||||
std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
|
||||
std::shared_ptr<Buffer> second) {
|
||||
const std::size_t size_1 = first->Size();
|
||||
const std::size_t size_2 = second->Size();
|
||||
const VAddr first_addr = first->CpuAddr();
|
||||
const VAddr second_addr = second->CpuAddr();
|
||||
const VAddr new_addr = std::min(first_addr, second_addr);
|
||||
const std::size_t new_size = size_1 + size_2;
|
||||
OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
|
||||
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
|
||||
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
|
||||
first->SetEpoch(epoch);
|
||||
second->SetEpoch(epoch);
|
||||
pending_destruction.push_back(first);
|
||||
pending_destruction.push_back(second);
|
||||
|
||||
std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
|
||||
CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1);
|
||||
CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2);
|
||||
QueueDestruction(std::move(first));
|
||||
QueueDestruction(std::move(second));
|
||||
|
||||
const VAddr cpu_addr_end = new_addr + new_size - 1;
|
||||
u64 page_start = new_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
blocks.insert_or_assign(page_start, new_buffer);
|
||||
}
|
||||
return new_buffer;
|
||||
}
|
||||
|
||||
OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
|
||||
OwnerBuffer found;
|
||||
Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
|
||||
std::shared_ptr<Buffer> found;
|
||||
|
||||
const VAddr cpu_addr_end = cpu_addr + size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
|
||||
for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
|
||||
auto it = blocks.find(page_start);
|
||||
if (it == blocks.end()) {
|
||||
if (found) {
|
||||
found = EnlargeBlock(found);
|
||||
} else {
|
||||
const VAddr start_addr = (page_start << block_page_bits);
|
||||
found = CreateBlock(start_addr, block_page_size);
|
||||
blocks[page_start] = found;
|
||||
}
|
||||
} else {
|
||||
if (found) {
|
||||
if (found == it->second) {
|
||||
++page_start;
|
||||
continue;
|
||||
}
|
||||
found = MergeBlocks(found, it->second);
|
||||
} else {
|
||||
found = it->second;
|
||||
continue;
|
||||
}
|
||||
const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
|
||||
found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
|
||||
blocks.insert_or_assign(page_start, found);
|
||||
continue;
|
||||
}
|
||||
if (!found) {
|
||||
found = it->second;
|
||||
continue;
|
||||
}
|
||||
if (found != it->second) {
|
||||
found = MergeBlocks(std::move(found), it->second);
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
return found;
|
||||
return found.get();
|
||||
}
|
||||
|
||||
void MarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
void MarkRegionAsWritten(VAddr start, VAddr end) {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
auto it = written_pages.find(page_start);
|
||||
if (it != written_pages.end()) {
|
||||
it->second = it->second + 1;
|
||||
} else {
|
||||
written_pages[page_start] = 1;
|
||||
written_pages.insert_or_assign(page_start, 1);
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
}
|
||||
|
||||
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
void UnmarkRegionAsWritten(VAddr start, VAddr end) {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
auto it = written_pages.find(page_start);
|
||||
if (it != written_pages.end()) {
|
||||
if (it->second > 1) {
|
||||
@@ -551,22 +551,24 @@ private:
|
||||
written_pages.erase(it);
|
||||
}
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsRegionWritten(const VAddr start, const VAddr end) const {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
bool IsRegionWritten(VAddr start, VAddr end) const {
|
||||
const u64 page_end = end >> WRITE_PAGE_BIT;
|
||||
for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
|
||||
if (written_pages.count(page_start) > 0) {
|
||||
return true;
|
||||
}
|
||||
++page_start;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void QueueDestruction(std::shared_ptr<Buffer> buffer) {
|
||||
buffer->SetEpoch(epoch);
|
||||
pending_destruction.push(std::move(buffer));
|
||||
}
|
||||
|
||||
void MarkForAsyncFlush(MapInterval* map) {
|
||||
if (!uncommitted_flushes) {
|
||||
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
|
||||
@@ -578,7 +580,7 @@ private:
|
||||
Core::System& system;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle{};
|
||||
BufferType stream_buffer_handle;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
@@ -588,18 +590,15 @@ private:
|
||||
boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
|
||||
mapped_addresses;
|
||||
|
||||
static constexpr u64 write_page_bit = 11;
|
||||
std::unordered_map<u64, u32> written_pages;
|
||||
std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
|
||||
|
||||
static constexpr u64 block_page_bits = 21;
|
||||
static constexpr u64 block_page_size = 1ULL << block_page_bits;
|
||||
std::unordered_map<u64, OwnerBuffer> blocks;
|
||||
|
||||
std::list<OwnerBuffer> pending_destruction;
|
||||
std::queue<std::shared_ptr<Buffer>> pending_destruction;
|
||||
u64 epoch = 0;
|
||||
u64 modified_ticks = 0;
|
||||
|
||||
std::vector<u8> staging_buffer;
|
||||
|
||||
std::list<MapInterval*> marked_for_unregister;
|
||||
|
||||
std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
|
||||
|
||||
@@ -14,22 +14,16 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
|
||||
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
|
||||
|
||||
namespace Tegra {
|
||||
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
|
||||
static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
|
||||
static const Xbyak::Reg64 STATE = Xbyak::util::r11;
|
||||
static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
|
||||
static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
|
||||
static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
|
||||
static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
|
||||
static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
|
||||
static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
|
||||
static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
|
||||
static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
|
||||
static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
|
||||
|
||||
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
|
||||
PARAMETERS,
|
||||
REGISTERS,
|
||||
STATE,
|
||||
NEXT_PARAMETER,
|
||||
RESULT,
|
||||
PARAMETERS,
|
||||
METHOD_ADDRESS,
|
||||
BRANCH_HOLDER,
|
||||
});
|
||||
@@ -53,8 +47,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
|
||||
JITState state{};
|
||||
state.maxwell3d = &maxwell3d;
|
||||
state.registers = {};
|
||||
state.parameters = parameters.data();
|
||||
program(&state);
|
||||
program(&state, parameters.data());
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||
@@ -64,18 +57,18 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
|
||||
const bool is_move_operation = !is_a_zero && is_b_zero;
|
||||
const bool has_zero_register = is_a_zero || is_b_zero;
|
||||
|
||||
Xbyak::Reg64 src_a;
|
||||
Xbyak::Reg32 src_a;
|
||||
Xbyak::Reg32 src_b;
|
||||
|
||||
if (!optimizer.zero_reg_skip) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
|
||||
src_b = Compile_GetRegister(opcode.src_b, ebx);
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
} else {
|
||||
if (!is_a_zero) {
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
|
||||
src_a = Compile_GetRegister(opcode.src_a, RESULT);
|
||||
}
|
||||
if (!is_b_zero) {
|
||||
src_b = Compile_GetRegister(opcode.src_b, ebx);
|
||||
src_b = Compile_GetRegister(opcode.src_b, eax);
|
||||
}
|
||||
}
|
||||
Xbyak::Label skip_carry{};
|
||||
@@ -329,7 +322,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
|
||||
and_(METHOD_ADDRESS, 0xfff);
|
||||
shr(ecx, 12);
|
||||
and_(ecx, 0x3f);
|
||||
lea(eax, ptr[rcx + METHOD_ADDRESS_64]);
|
||||
lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
|
||||
sal(ecx, 12);
|
||||
or_(eax, ecx);
|
||||
|
||||
@@ -424,16 +417,12 @@ void MacroJITx64Impl::Compile() {
|
||||
Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
|
||||
// JIT state
|
||||
mov(STATE, Common::X64::ABI_PARAM1);
|
||||
mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 +
|
||||
static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
|
||||
mov(REGISTERS, Common::X64::ABI_PARAM1);
|
||||
add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
|
||||
mov(PARAMETERS, Common::X64::ABI_PARAM2);
|
||||
xor_(RESULT, RESULT);
|
||||
xor_(METHOD_ADDRESS, METHOD_ADDRESS);
|
||||
xor_(NEXT_PARAMETER, NEXT_PARAMETER);
|
||||
xor_(BRANCH_HOLDER, BRANCH_HOLDER);
|
||||
|
||||
mov(dword[REGISTERS + 4], Compile_FetchParameter());
|
||||
mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
|
||||
|
||||
// Track get register for zero registers and mark it as no-op
|
||||
optimizer.zero_reg_skip = true;
|
||||
@@ -537,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
|
||||
}
|
||||
|
||||
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
|
||||
mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]);
|
||||
inc(NEXT_PARAMETER);
|
||||
mov(eax, dword[PARAMETERS]);
|
||||
add(PARAMETERS, sizeof(u32));
|
||||
return eax;
|
||||
}
|
||||
|
||||
@@ -547,31 +536,12 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
|
||||
// Register 0 is always zero
|
||||
xor_(dst, dst);
|
||||
} else {
|
||||
mov(dst, dword[REGISTERS + index * sizeof(u32)]);
|
||||
mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
|
||||
if (index == 0) {
|
||||
// Register 0 is always zero
|
||||
xor_(dst, dst);
|
||||
} else {
|
||||
mov(dst, dword[REGISTERS + index * sizeof(u32)]);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
|
||||
Xbyak::Label zero{}, end{};
|
||||
xor_(ecx, ecx);
|
||||
shr(dst, 32);
|
||||
setne(cl);
|
||||
mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
|
||||
}
|
||||
|
||||
void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
|
||||
auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
|
||||
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
|
||||
@@ -579,7 +549,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
|
||||
if (reg == 0) {
|
||||
return;
|
||||
}
|
||||
mov(dword[REGISTERS + reg * sizeof(u32)], result);
|
||||
mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
|
||||
};
|
||||
auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
|
||||
|
||||
|
||||
@@ -55,8 +55,6 @@ private:
|
||||
|
||||
Xbyak::Reg32 Compile_FetchParameter();
|
||||
Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
|
||||
Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
|
||||
void Compile_WriteCarry(Xbyak::Reg64 dst);
|
||||
|
||||
void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
|
||||
void Compile_Send(Xbyak::Reg32 value);
|
||||
@@ -67,11 +65,10 @@ private:
|
||||
struct JITState {
|
||||
Engines::Maxwell3D* maxwell3d{};
|
||||
std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
|
||||
const u32* parameters{};
|
||||
u32 carry_flag{};
|
||||
};
|
||||
static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
|
||||
using ProgramType = void (*)(JITState*);
|
||||
using ProgramType = void (*)(JITState*, const u32*);
|
||||
|
||||
struct OptimizerState {
|
||||
bool can_skip_carry{};
|
||||
@@ -85,8 +82,8 @@ private:
|
||||
std::optional<Macro::Opcode> next_opcode{};
|
||||
ProgramType program{nullptr};
|
||||
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels{};
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{};
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
|
||||
std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
|
||||
Xbyak::Label end_of_code{};
|
||||
|
||||
bool is_delay_slot{};
|
||||
|
||||
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
2074
src/video_core/renderer_opengl/gl_arb_decompiler.cpp
Normal file
File diff suppressed because it is too large
Load Diff
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
29
src/video_core/renderer_opengl/gl_arb_decompiler.h
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
enum class ShaderType : u32;
|
||||
}
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
class ShaderIR;
|
||||
class Registry;
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
|
||||
std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
Tegra::Engines::ShaderType stage, std::string_view identifier);
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -22,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
const Device& device, std::size_t stream_size)
|
||||
@@ -48,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
}
|
||||
|
||||
Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(cpu_addr, size);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
|
||||
return buffer->GetHandle();
|
||||
std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(cpu_addr, size);
|
||||
}
|
||||
|
||||
GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
@@ -62,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
|
||||
|
||||
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) {
|
||||
glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
||||
glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
@@ -70,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
u8* data) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
||||
glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
|
||||
static_cast<GLsizeiptr>(size), data);
|
||||
}
|
||||
|
||||
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) {
|
||||
glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
|
||||
glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
|
||||
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
|
||||
std::size_t size) {
|
||||
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
|
||||
const GLuint& cbuf = cbufs[cbuf_cursor++];
|
||||
const GLuint cbuf = cbufs[cbuf_cursor++];
|
||||
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
|
||||
return {cbuf, 0};
|
||||
}
|
||||
|
||||
@@ -23,17 +23,12 @@ class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CachedBufferBlock;
|
||||
|
||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
|
||||
class CachedBufferBlock : public VideoCommon::BufferBlock {
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
explicit Buffer(VAddr cpu_addr, const std::size_t size);
|
||||
~Buffer();
|
||||
|
||||
GLuint GetHandle() const {
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
@@ -41,6 +36,7 @@ private:
|
||||
OGLBuffer gl_buffer;
|
||||
};
|
||||
|
||||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
@@ -54,9 +50,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
GLuint ToHandle(const Buffer& buffer) override;
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
@@ -123,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
u32 base_images = 0;
|
||||
|
||||
// Reserve more image bindings on fragment and vertex stages.
|
||||
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
|
||||
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
|
||||
// fragment stage, and at least 1 for the rest of the stages.
|
||||
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
|
||||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
|
||||
bindings[0].image =
|
||||
Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
||||
// Reserve the other image bindings.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 2);
|
||||
for (std::size_t i = 2; i < NumStages; ++i) {
|
||||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
if (stage == 4) {
|
||||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||
}
|
||||
@@ -213,6 +221,7 @@ Device::Device()
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
|
||||
GLAD_GL_NV_transform_feedback2;
|
||||
|
||||
@@ -88,6 +88,10 @@ public:
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
|
||||
bool HasNvViewportArray2() const {
|
||||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
@@ -111,6 +115,7 @@ private:
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool use_assembly_shaders{};
|
||||
};
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
@@ -148,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
|
||||
auto program = std::make_shared<ProgramHandle>();
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
const std::string arb = "Not implemented";
|
||||
const std::string arb =
|
||||
DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
|
||||
|
||||
GLuint& arb_prog = program->assembly_program.handle;
|
||||
|
||||
|
||||
@@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
GLuint OGLStreamBuffer::GetHandle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
GLsizeiptr OGLStreamBuffer::GetSize() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
|
||||
@@ -17,9 +17,6 @@ public:
|
||||
bool use_persistent = true);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
GLsizeiptr GetSize() const;
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
@@ -32,6 +29,14 @@ public:
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
GLuint Handle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
GLsizeiptr Size() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
|
||||
@@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_UNSIGNED_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||
switch (attrib.size) {
|
||||
@@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::Float:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
@@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_FLOAT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::UnsignedScaled:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
@@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_UNSIGNED_SHORT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
break;
|
||||
case Maxwell::VertexAttribute::Type::SignedScaled:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
@@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
|
||||
return GL_SHORT;
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
|
||||
return {};
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
|
||||
attrib.SizeString());
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
@@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
case Maxwell::IndexFormat::UnsignedInt:
|
||||
return GL_UNSIGNED_INT;
|
||||
}
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
|
||||
UNREACHABLE();
|
||||
UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -180,10 +169,20 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
}
|
||||
|
||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
|
||||
Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
|
||||
switch (filter_mode) {
|
||||
case Tegra::Texture::TextureFilter::Linear: {
|
||||
switch (mip_filter_mode) {
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_NEAREST_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_NEAREST_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
switch (mipmap_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_LINEAR;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
@@ -193,20 +192,9 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Tegra::Texture::TextureFilter::Nearest: {
|
||||
switch (mip_filter_mode) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
return GL_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return GL_NEAREST_MIPMAP_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return GL_NEAREST_MIPMAP_LINEAR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
|
||||
return GL_LINEAR;
|
||||
UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
|
||||
static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
|
||||
return GL_NEAREST;
|
||||
}
|
||||
|
||||
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
@@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
} else {
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
|
||||
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
@@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
case Tegra::Texture::DepthCompareFunc::Always:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
|
||||
static_cast<u32>(func));
|
||||
UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
|
||||
return GL_GREATER;
|
||||
}
|
||||
|
||||
@@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
|
||||
case Maxwell::Blend::Equation::MaxGL:
|
||||
return GL_MAX;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
|
||||
return GL_FUNC_ADD;
|
||||
}
|
||||
|
||||
@@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
|
||||
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
|
||||
return GL_ONE_MINUS_CONSTANT_ALPHA;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||
UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
@@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
|
||||
case Tegra::Texture::SwizzleSource::OneFloat:
|
||||
return GL_ONE;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
|
||||
UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
|
||||
return GL_ZERO;
|
||||
}
|
||||
|
||||
@@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
|
||||
case Maxwell::ComparisonOp::AlwaysOld:
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||
UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
|
||||
return GL_ALWAYS;
|
||||
}
|
||||
|
||||
@@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
|
||||
case Maxwell::StencilOp::DecrWrapOGL:
|
||||
return GL_DECR_WRAP;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
|
||||
UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
|
||||
return GL_KEEP;
|
||||
}
|
||||
|
||||
@@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
|
||||
case Maxwell::FrontFace::CounterClockWise:
|
||||
return GL_CCW;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
|
||||
UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
|
||||
return GL_CCW;
|
||||
}
|
||||
|
||||
@@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
|
||||
case Maxwell::CullFace::FrontAndBack:
|
||||
return GL_FRONT_AND_BACK;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||
UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
|
||||
return GL_BACK;
|
||||
}
|
||||
|
||||
@@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
|
||||
case Maxwell::LogicOperation::Set:
|
||||
return GL_SET;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
|
||||
UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
|
||||
return GL_COPY;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,29 +21,29 @@ namespace Sampler {
|
||||
|
||||
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
return VK_FILTER_LINEAR;
|
||||
case Tegra::Texture::TextureFilter::Nearest:
|
||||
return VK_FILTER_NEAREST;
|
||||
case Tegra::Texture::TextureFilter::Linear:
|
||||
return VK_FILTER_LINEAR;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
|
||||
UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
|
||||
switch (mipmap_filter) {
|
||||
case Tegra::Texture::TextureMipmapFilter::None:
|
||||
// TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
|
||||
// (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
|
||||
// use an image view with a single mipmap level to emulate this.
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
// There are no Vulkan filter modes that directly correspond to OpenGL minification filters
|
||||
// of GL_LINEAR or GL_NEAREST, but they can be emulated using
|
||||
// VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
|
||||
// VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
|
||||
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Nearest:
|
||||
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
|
||||
case Tegra::Texture::TextureMipmapFilter::Linear:
|
||||
return VK_SAMPLER_MIPMAP_MODE_LINEAR;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||
UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
|
||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||
UNIMPLEMENTED();
|
||||
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
||||
@@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
|
||||
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
|
||||
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
||||
|
||||
@@ -37,8 +37,8 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VAddr cpu_addr, std::size_t size)
|
||||
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
||||
std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
VkBufferCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
@@ -54,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
|
||||
buffer.commit = memory_manager.Commit(buffer.handle, false);
|
||||
}
|
||||
|
||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
@@ -67,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
|
||||
}
|
||||
|
||||
VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
|
||||
return buffer->GetHandle();
|
||||
std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
|
||||
}
|
||||
|
||||
VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||
@@ -91,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
|
||||
std::memcpy(staging.commit->Map(size), data, size);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
|
||||
|
||||
@@ -114,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
u8* data) {
|
||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
|
||||
scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
@@ -141,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
|
||||
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) {
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
|
||||
dst_offset, size](vk::CommandBuffer cmdbuf) {
|
||||
scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset,
|
||||
size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
|
||||
|
||||
std::array<VkBufferMemoryBarrier, 2> barriers;
|
||||
|
||||
@@ -23,13 +23,13 @@ class VKDevice;
|
||||
class VKMemoryManager;
|
||||
class VKScheduler;
|
||||
|
||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
||||
class Buffer final : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VAddr cpu_addr, std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr,
|
||||
std::size_t size);
|
||||
~Buffer();
|
||||
|
||||
VkBuffer GetHandle() const {
|
||||
VkBuffer Handle() const {
|
||||
return *buffer.handle;
|
||||
}
|
||||
|
||||
@@ -37,8 +37,6 @@ private:
|
||||
VKBuffer buffer;
|
||||
};
|
||||
|
||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||
public:
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
@@ -49,9 +47,7 @@ public:
|
||||
VkBuffer GetEmptyBuffer(std::size_t size) override;
|
||||
|
||||
protected:
|
||||
VkBuffer ToHandle(const Buffer& buffer) override;
|
||||
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
@@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Determine if an attachment to be updated has to preserve contents
|
||||
/// @param is_clear True when a clear is being executed
|
||||
/// @param regs 3D registers
|
||||
/// @return True when the contents have to be preserved
|
||||
bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
|
||||
if (!is_clear) {
|
||||
return true;
|
||||
}
|
||||
// First we have to make sure all clear masks are enabled.
|
||||
if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
|
||||
!regs.clear_buffers.A) {
|
||||
return true;
|
||||
}
|
||||
// If scissors are disabled, the whole screen is cleared
|
||||
if (!regs.clear_flags.scissor) {
|
||||
return false;
|
||||
}
|
||||
// Then we have to confirm scissor testing clears the whole image
|
||||
const std::size_t index = regs.clear_buffers.RT;
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
|
||||
scissor.max_y < regs.rt[index].height;
|
||||
}
|
||||
|
||||
/// @brief Determine if an attachment to be updated has to preserve contents
|
||||
/// @param is_clear True when a clear is being executed
|
||||
/// @param regs 3D registers
|
||||
/// @return True when the contents have to be preserved
|
||||
bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
|
||||
// If we are not clearing, the contents have to be preserved
|
||||
if (!is_clear) {
|
||||
return true;
|
||||
}
|
||||
// For depth stencil clears we only have to confirm scissor test covers the whole image
|
||||
if (!regs.clear_flags.scissor) {
|
||||
return false;
|
||||
}
|
||||
// Make sure the clear cover the whole image
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
|
||||
scissor.max_y < regs.zeta_height;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class BufferBindings final {
|
||||
@@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
const Texceptions texceptions = UpdateAttachments();
|
||||
const Texceptions texceptions = UpdateAttachments(false);
|
||||
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
|
||||
|
||||
key.renderpass_params = GetRenderPassParams(texceptions);
|
||||
@@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() {
|
||||
return;
|
||||
}
|
||||
|
||||
[[maybe_unused]] const auto texceptions = UpdateAttachments();
|
||||
[[maybe_unused]] const auto texceptions = UpdateAttachments(true);
|
||||
DEBUG_ASSERT(texceptions.none());
|
||||
SetupImageTransitions(0, color_attachments, zeta_attachment);
|
||||
|
||||
@@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
|
||||
draw_counter = 0;
|
||||
}
|
||||
|
||||
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
|
||||
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
|
||||
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
|
||||
auto& dirty = system.GPU().Maxwell3D().dirty.flags;
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
auto& regs = maxwell3d.regs;
|
||||
|
||||
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
|
||||
dirty[VideoCommon::Dirty::RenderTargets] = false;
|
||||
|
||||
@@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
|
||||
Texceptions texceptions;
|
||||
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
|
||||
if (update_rendertargets) {
|
||||
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
|
||||
const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
|
||||
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
|
||||
}
|
||||
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
|
||||
texceptions[rt] = true;
|
||||
@@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
|
||||
}
|
||||
|
||||
if (update_rendertargets) {
|
||||
zeta_attachment = texture_cache.GetDepthBufferSurface(true);
|
||||
const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
|
||||
zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
|
||||
}
|
||||
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
|
||||
texceptions[ZETA_TEXCEPTION_INDEX] = true;
|
||||
|
||||
@@ -159,7 +159,10 @@ private:
|
||||
|
||||
void FlushWork();
|
||||
|
||||
Texceptions UpdateAttachments();
|
||||
/// @brief Updates the currently bound attachments
|
||||
/// @param is_clear True when the framebuffer is updated as a clear
|
||||
/// @return Bitfield of attachments being used as sampled textures
|
||||
Texceptions UpdateAttachments(bool is_clear);
|
||||
|
||||
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
|
||||
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
|
||||
ci.compareEnable = tsc.depth_compare_enabled;
|
||||
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
|
||||
ci.minLod = tsc.GetMinLod();
|
||||
ci.maxLod = tsc.GetMaxLod();
|
||||
ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
|
||||
ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
|
||||
ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
|
||||
ci.unnormalizedCoordinates = VK_FALSE;
|
||||
return device.GetLogical().CreateSampler(ci);
|
||||
|
||||
@@ -35,7 +35,7 @@ public:
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Unmap(u64 size);
|
||||
|
||||
VkBuffer GetHandle() const {
|
||||
VkBuffer Handle() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
|
||||
@@ -631,13 +631,11 @@ void Config::ReadRendererValues() {
|
||||
static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
|
||||
Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
|
||||
Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
|
||||
Settings::values.resolution_factor =
|
||||
ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
|
||||
Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
|
||||
Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
|
||||
Settings::values.use_frame_limit =
|
||||
ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
|
||||
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
|
||||
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
|
||||
Settings::values.use_disk_shader_cache =
|
||||
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
|
||||
const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
|
||||
@@ -722,8 +720,6 @@ void Config::ReadUIValues() {
|
||||
.toString();
|
||||
UISettings::values.enable_discord_presence =
|
||||
ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
|
||||
UISettings::values.screenshot_resolution_factor =
|
||||
static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
|
||||
UISettings::values.select_user_on_boot =
|
||||
ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
|
||||
|
||||
@@ -1082,8 +1078,6 @@ void Config::SaveRendererValues() {
|
||||
WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
|
||||
WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
|
||||
WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
|
||||
WriteSetting(QStringLiteral("resolution_factor"),
|
||||
static_cast<double>(Settings::values.resolution_factor), 1.0);
|
||||
WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
|
||||
WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
|
||||
WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
|
||||
@@ -1159,8 +1153,6 @@ void Config::SaveUIValues() {
|
||||
QString::fromUtf8(UISettings::themes[0].second));
|
||||
WriteSetting(QStringLiteral("enable_discord_presence"),
|
||||
UISettings::values.enable_discord_presence, true);
|
||||
WriteSetting(QStringLiteral("screenshot_resolution_factor"),
|
||||
UISettings::values.screenshot_resolution_factor, 0);
|
||||
WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
|
||||
false);
|
||||
|
||||
|
||||
@@ -19,47 +19,6 @@
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
enum class Resolution : int {
|
||||
Auto,
|
||||
Scale1x,
|
||||
Scale2x,
|
||||
Scale3x,
|
||||
Scale4x,
|
||||
};
|
||||
|
||||
float ToResolutionFactor(Resolution option) {
|
||||
switch (option) {
|
||||
case Resolution::Auto:
|
||||
return 0.f;
|
||||
case Resolution::Scale1x:
|
||||
return 1.f;
|
||||
case Resolution::Scale2x:
|
||||
return 2.f;
|
||||
case Resolution::Scale3x:
|
||||
return 3.f;
|
||||
case Resolution::Scale4x:
|
||||
return 4.f;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
Resolution FromResolutionFactor(float factor) {
|
||||
if (factor == 0.f) {
|
||||
return Resolution::Auto;
|
||||
} else if (factor == 1.f) {
|
||||
return Resolution::Scale1x;
|
||||
} else if (factor == 2.f) {
|
||||
return Resolution::Scale2x;
|
||||
} else if (factor == 3.f) {
|
||||
return Resolution::Scale3x;
|
||||
} else if (factor == 4.f) {
|
||||
return Resolution::Scale4x;
|
||||
}
|
||||
return Resolution::Auto;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
: QWidget(parent), ui(new Ui::ConfigureGraphics) {
|
||||
vulkan_device = Settings::values.vulkan_device;
|
||||
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
|
||||
ui->api->setEnabled(runtime_lock);
|
||||
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
|
||||
ui->resolution_factor_combobox->setCurrentIndex(
|
||||
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
|
||||
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
|
||||
ui->use_disk_shader_cache->setEnabled(runtime_lock);
|
||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
|
||||
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
|
||||
void ConfigureGraphics::ApplyConfiguration() {
|
||||
Settings::values.renderer_backend = GetCurrentGraphicsBackend();
|
||||
Settings::values.vulkan_device = vulkan_device;
|
||||
Settings::values.resolution_factor =
|
||||
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
|
||||
Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
|
||||
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
|
||||
Settings::values.use_asynchronous_gpu_emulation =
|
||||
|
||||
@@ -84,46 +84,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||
<item>
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>Internal Resolution:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QComboBox" name="resolution_factor_combobox">
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Auto (Window Size)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>Native (1280x720)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>2x Native (2560x1440)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>3x Native (3840x2160)</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string>4x Native (5120x2880)</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_6">
|
||||
<item>
|
||||
|
||||
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
|
||||
|
||||
ui->setupUi(this);
|
||||
|
||||
// TODO: Remove this after assembly shaders are fully integrated
|
||||
ui->use_assembly_shaders->setVisible(false);
|
||||
|
||||
SetConfiguration();
|
||||
}
|
||||
|
||||
|
||||
@@ -689,10 +689,7 @@ void GMainWindow::InitializeHotkeys() {
|
||||
Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
|
||||
UpdateStatusBar();
|
||||
});
|
||||
// TODO: Remove this comment/static whenever the next major release of
|
||||
// MSVC occurs and we make it a requirement (see:
|
||||
// https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
|
||||
static constexpr u16 SPEED_LIMIT_STEP = 5;
|
||||
constexpr u16 SPEED_LIMIT_STEP = 5;
|
||||
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
|
||||
&QShortcut::activated, this, [&] {
|
||||
if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
|
||||
|
||||
@@ -380,8 +380,6 @@ void Config::ReadValues() {
|
||||
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
|
||||
Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
|
||||
|
||||
Settings::values.resolution_factor =
|
||||
static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
|
||||
Settings::values.aspect_ratio =
|
||||
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
|
||||
Settings::values.max_anisotropy =
|
||||
|
||||
@@ -117,11 +117,6 @@ use_hw_renderer =
|
||||
# 0: Interpreter (slow), 1 (default): JIT (fast)
|
||||
use_shader_jit =
|
||||
|
||||
# Resolution scale factor
|
||||
# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
|
||||
# factor for the Switch resolution
|
||||
resolution_factor =
|
||||
|
||||
# Aspect ratio
|
||||
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
|
||||
aspect_ratio =
|
||||
|
||||
@@ -116,8 +116,6 @@ void Config::ReadValues() {
|
||||
Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
|
||||
|
||||
// Renderer
|
||||
Settings::values.resolution_factor =
|
||||
static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
|
||||
Settings::values.aspect_ratio =
|
||||
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
|
||||
Settings::values.max_anisotropy =
|
||||
|
||||
@@ -21,11 +21,6 @@ use_hw_renderer =
|
||||
# 0: Interpreter (slow), 1 (default): JIT (fast)
|
||||
use_shader_jit =
|
||||
|
||||
# Resolution scale factor
|
||||
# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
|
||||
# factor for the Switch resolution
|
||||
resolution_factor =
|
||||
|
||||
# Aspect ratio
|
||||
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
|
||||
aspect_ratio =
|
||||
|
||||
Reference in New Issue
Block a user