Compare commits
41 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee21e4ecd3 | ||
|
|
e68ee43a1a | ||
|
|
104b334e40 | ||
|
|
0ac8848eae | ||
|
|
edbf3144d2 | ||
|
|
f7debcaa04 | ||
|
|
a280822c82 | ||
|
|
bb8ef38152 | ||
|
|
058ec22787 | ||
|
|
9d9ffe0f94 | ||
|
|
d0bdd26c26 | ||
|
|
87b272699f | ||
|
|
1bb3122c1f | ||
|
|
5242b21524 | ||
|
|
9b06e823ee | ||
|
|
f57cbd9f24 | ||
|
|
326403518d | ||
|
|
099ac9c2a8 | ||
|
|
136c563f76 | ||
|
|
640f0d1cec | ||
|
|
b8b6f94ba9 | ||
|
|
630fc12d4e | ||
|
|
d2b2557542 | ||
|
|
b2af304918 | ||
|
|
32e6727dae | ||
|
|
b2c4521a91 | ||
|
|
b17fe82973 | ||
|
|
8bba84a401 | ||
|
|
508242c267 | ||
|
|
623d9c47a2 | ||
|
|
c13e2f1b75 | ||
|
|
86345c126a | ||
|
|
5d0986a53b | ||
|
|
103809a0ca | ||
|
|
ed4e324991 | ||
|
|
434856c636 | ||
|
|
d0fc12684a | ||
|
|
4cff5dd194 | ||
|
|
9a36d8600c | ||
|
|
91dddca26e | ||
|
|
cf6a40fc12 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -28,3 +28,6 @@
|
||||
[submodule "libzip"]
|
||||
path = externals/libzip/libzip
|
||||
url = https://github.com/nih-at/libzip.git
|
||||
[submodule "xbyak"]
|
||||
path = externals/xbyak
|
||||
url = https://github.com/herumi/xbyak.git
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.11)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
|
||||
@@ -13,7 +13,7 @@ project(yuzu)
|
||||
option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
|
||||
|
||||
option(ENABLE_QT "Enable the Qt frontend" ON)
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF)
|
||||
CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
|
||||
|
||||
option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
|
||||
|
||||
|
||||
8
externals/CMakeLists.txt
vendored
8
externals/CMakeLists.txt
vendored
@@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE)
|
||||
target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET xbyak)
|
||||
if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
|
||||
add_library(xbyak INTERFACE)
|
||||
target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
|
||||
target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
2
externals/sirit
vendored
2
externals/sirit
vendored
Submodule externals/sirit updated: 414fc4dbd2...a62c5bbc10
1
externals/xbyak
vendored
Submodule
1
externals/xbyak
vendored
Submodule
Submodule externals/xbyak added at 82b70e6659
@@ -123,6 +123,8 @@ add_library(common STATIC
|
||||
lz4_compression.cpp
|
||||
lz4_compression.h
|
||||
math_util.h
|
||||
memory_detect.cpp
|
||||
memory_detect.h
|
||||
memory_hook.cpp
|
||||
memory_hook.h
|
||||
microprofile.cpp
|
||||
@@ -169,10 +171,12 @@ if(ARCHITECTURE_x86_64)
|
||||
PRIVATE
|
||||
x64/cpu_detect.cpp
|
||||
x64/cpu_detect.h
|
||||
x64/xbyak_abi.h
|
||||
x64/xbyak_util.h
|
||||
)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(common)
|
||||
|
||||
target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
|
||||
target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd)
|
||||
target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
|
||||
|
||||
60
src/common/memory_detect.cpp
Normal file
60
src/common/memory_detect.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#ifdef _WIN32
|
||||
// clang-format off
|
||||
#include <windows.h>
|
||||
#include <sysinfoapi.h>
|
||||
// clang-format on
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#ifdef __APPLE__
|
||||
#include <sys/sysctl.h>
|
||||
#else
|
||||
#include <sys/sysinfo.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "common/memory_detect.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
// Detects the RAM and Swapfile sizes
|
||||
static MemoryInfo Detect() {
|
||||
MemoryInfo mem_info{};
|
||||
|
||||
#ifdef _WIN32
|
||||
MEMORYSTATUSEX memorystatus;
|
||||
memorystatus.dwLength = sizeof(memorystatus);
|
||||
GlobalMemoryStatusEx(&memorystatus);
|
||||
mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys;
|
||||
mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory;
|
||||
#elif defined(__APPLE__)
|
||||
u64 ramsize;
|
||||
struct xsw_usage vmusage;
|
||||
std::size_t sizeof_ramsize = sizeof(ramsize);
|
||||
std::size_t sizeof_vmusage = sizeof(vmusage);
|
||||
// hw and vm are defined in sysctl.h
|
||||
// https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471
|
||||
// sysctlbyname(const char *, void *, size_t *, void *, size_t);
|
||||
sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0);
|
||||
sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0);
|
||||
mem_info.TotalPhysicalMemory = ramsize;
|
||||
mem_info.TotalSwapMemory = vmusage.xsu_total;
|
||||
#else
|
||||
struct sysinfo meminfo;
|
||||
sysinfo(&meminfo);
|
||||
mem_info.TotalPhysicalMemory = meminfo.totalram;
|
||||
mem_info.TotalSwapMemory = meminfo.totalswap;
|
||||
#endif
|
||||
|
||||
return mem_info;
|
||||
}
|
||||
|
||||
const MemoryInfo& GetMemInfo() {
|
||||
static MemoryInfo mem_info = Detect();
|
||||
return mem_info;
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
22
src/common/memory_detect.h
Normal file
22
src/common/memory_detect.h
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
struct MemoryInfo {
|
||||
u64 TotalPhysicalMemory{};
|
||||
u64 TotalSwapMemory{};
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the memory info of the host system
|
||||
* @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes
|
||||
*/
|
||||
const MemoryInfo& GetMemInfo();
|
||||
|
||||
} // namespace Common
|
||||
266
src/common/x64/xbyak_abi.h
Normal file
266
src/common/x64/xbyak_abi.h
Normal file
@@ -0,0 +1,266 @@
|
||||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <initializer_list>
|
||||
#include <xbyak.h>
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
inline int RegToIndex(const Xbyak::Reg& reg) {
|
||||
using Kind = Xbyak::Reg::Kind;
|
||||
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
|
||||
"RegSet only support GPRs and XMM registers.");
|
||||
ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
|
||||
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
|
||||
}
|
||||
|
||||
inline Xbyak::Reg64 IndexToReg64(int reg_index) {
|
||||
ASSERT(reg_index < 16);
|
||||
return Xbyak::Reg64(reg_index);
|
||||
}
|
||||
|
||||
inline Xbyak::Xmm IndexToXmm(int reg_index) {
|
||||
ASSERT(reg_index >= 16 && reg_index < 32);
|
||||
return Xbyak::Xmm(reg_index - 16);
|
||||
}
|
||||
|
||||
inline Xbyak::Reg IndexToReg(int reg_index) {
|
||||
if (reg_index < 16) {
|
||||
return IndexToReg64(reg_index);
|
||||
} else {
|
||||
return IndexToXmm(reg_index);
|
||||
}
|
||||
}
|
||||
|
||||
inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
||||
std::bitset<32> bits;
|
||||
for (const Xbyak::Reg& reg : regs) {
|
||||
bits[RegToIndex(reg)] = true;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||
const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
// Microsoft x64 ABI
|
||||
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
|
||||
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
|
||||
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
|
||||
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
Xbyak::util::r8,
|
||||
Xbyak::util::r9,
|
||||
Xbyak::util::r10,
|
||||
Xbyak::util::r11,
|
||||
// XMMs
|
||||
Xbyak::util::xmm0,
|
||||
Xbyak::util::xmm1,
|
||||
Xbyak::util::xmm2,
|
||||
Xbyak::util::xmm3,
|
||||
Xbyak::util::xmm4,
|
||||
Xbyak::util::xmm5,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rsi,
|
||||
Xbyak::util::rdi,
|
||||
Xbyak::util::rbp,
|
||||
Xbyak::util::r12,
|
||||
Xbyak::util::r13,
|
||||
Xbyak::util::r14,
|
||||
Xbyak::util::r15,
|
||||
// XMMs
|
||||
Xbyak::util::xmm6,
|
||||
Xbyak::util::xmm7,
|
||||
Xbyak::util::xmm8,
|
||||
Xbyak::util::xmm9,
|
||||
Xbyak::util::xmm10,
|
||||
Xbyak::util::xmm11,
|
||||
Xbyak::util::xmm12,
|
||||
Xbyak::util::xmm13,
|
||||
Xbyak::util::xmm14,
|
||||
Xbyak::util::xmm15,
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0x20;
|
||||
|
||||
#else
|
||||
|
||||
// System V x86-64 ABI
|
||||
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
|
||||
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
|
||||
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
|
||||
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
|
||||
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rcx,
|
||||
Xbyak::util::rdx,
|
||||
Xbyak::util::rdi,
|
||||
Xbyak::util::rsi,
|
||||
Xbyak::util::r8,
|
||||
Xbyak::util::r9,
|
||||
Xbyak::util::r10,
|
||||
Xbyak::util::r11,
|
||||
// XMMs
|
||||
Xbyak::util::xmm0,
|
||||
Xbyak::util::xmm1,
|
||||
Xbyak::util::xmm2,
|
||||
Xbyak::util::xmm3,
|
||||
Xbyak::util::xmm4,
|
||||
Xbyak::util::xmm5,
|
||||
Xbyak::util::xmm6,
|
||||
Xbyak::util::xmm7,
|
||||
Xbyak::util::xmm8,
|
||||
Xbyak::util::xmm9,
|
||||
Xbyak::util::xmm10,
|
||||
Xbyak::util::xmm11,
|
||||
Xbyak::util::xmm12,
|
||||
Xbyak::util::xmm13,
|
||||
Xbyak::util::xmm14,
|
||||
Xbyak::util::xmm15,
|
||||
});
|
||||
|
||||
const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
|
||||
// GPRs
|
||||
Xbyak::util::rbx,
|
||||
Xbyak::util::rbp,
|
||||
Xbyak::util::r12,
|
||||
Xbyak::util::r13,
|
||||
Xbyak::util::r14,
|
||||
Xbyak::util::r15,
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0;
|
||||
|
||||
#endif
|
||||
|
||||
inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
|
||||
size_t needed_frame_size, s32* out_subtraction,
|
||||
s32* out_xmm_offset) {
|
||||
const auto count = (regs & ABI_ALL_GPRS).count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
const auto xmm_count = (regs & ABI_ALL_XMMS).count();
|
||||
if (xmm_count) {
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xF;
|
||||
}
|
||||
subtraction += 0x10 * xmm_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += ABI_SHADOW_SPACE;
|
||||
// Final alignment.
|
||||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xF;
|
||||
|
||||
*out_subtraction = (s32)subtraction;
|
||||
*out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.push(IndexToReg64(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
if (subtraction != 0) {
|
||||
code.sub(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
for (int i = 0; i < regs.count(); i++) {
|
||||
if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
|
||||
code.push(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
|
||||
xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
return ABI_SHADOW_SPACE;
|
||||
}
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_XMMS[i]) {
|
||||
code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
|
||||
xmm_offset += 0x10;
|
||||
}
|
||||
}
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.add(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
// GPRs need to be popped in reverse order
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
if (regs[i]) {
|
||||
code.pop(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment,
|
||||
size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
|
||||
for (std::size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && ABI_ALL_GPRS[i]) {
|
||||
code.push(IndexToReg64(static_cast<int>(i)));
|
||||
}
|
||||
}
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.sub(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
return ABI_SHADOW_SPACE;
|
||||
}
|
||||
|
||||
inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
|
||||
size_t rsp_alignment, size_t needed_frame_size = 0) {
|
||||
s32 subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
|
||||
|
||||
if (subtraction != 0) {
|
||||
code.add(code.rsp, subtraction);
|
||||
}
|
||||
|
||||
// GPRs need to be popped in reverse order
|
||||
for (int i = 15; i >= 0; i--) {
|
||||
if (regs[i]) {
|
||||
code.pop(IndexToReg64(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
47
src/common/x64/xbyak_util.h
Normal file
47
src/common/x64/xbyak_util.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <xbyak.h>
|
||||
#include "common/x64/xbyak_abi.h"
|
||||
|
||||
namespace Common::X64 {
|
||||
|
||||
// Constants for use with cmpps/cmpss
|
||||
enum {
|
||||
CMP_EQ = 0,
|
||||
CMP_LT = 1,
|
||||
CMP_LE = 2,
|
||||
CMP_UNORD = 3,
|
||||
CMP_NEQ = 4,
|
||||
CMP_NLT = 5,
|
||||
CMP_NLE = 6,
|
||||
CMP_ORD = 7,
|
||||
};
|
||||
|
||||
constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
|
||||
const u64 distance = target - (ref + 5);
|
||||
return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
|
||||
}
|
||||
|
||||
inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
|
||||
return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
|
||||
static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
|
||||
size_t addr = reinterpret_cast<size_t>(f);
|
||||
if (IsWithin2G(code, addr)) {
|
||||
code.call(f);
|
||||
} else {
|
||||
// ABI_RETURN is a safe temp register to use before a call
|
||||
code.mov(ABI_RETURN, addr);
|
||||
code.call(ABI_RETURN);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Common::X64
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "common/file_util.h"
|
||||
#include "common/hex_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/string_util.h"
|
||||
#include "core/core.h"
|
||||
#include "core/file_sys/content_archive.h"
|
||||
#include "core/file_sys/control_metadata.h"
|
||||
@@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
|
||||
return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
|
||||
}
|
||||
|
||||
std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
|
||||
std::string_view name) {
|
||||
#ifdef _WIN32
|
||||
return dir->GetSubdirectory(name);
|
||||
#else
|
||||
const auto subdirs = dir->GetSubdirectories();
|
||||
for (const auto& subdir : subdirs) {
|
||||
std::string dir_name = Common::ToLower(subdir->GetName());
|
||||
if (dir_name == name) {
|
||||
return subdir;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
|
||||
|
||||
PatchManager::~PatchManager() = default;
|
||||
@@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
|
||||
if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
|
||||
continue;
|
||||
|
||||
auto exefs_dir = subdir->GetSubdirectory("exefs");
|
||||
auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
|
||||
if (exefs_dir != nullptr)
|
||||
layers.push_back(std::move(exefs_dir));
|
||||
}
|
||||
@@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
|
||||
if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
|
||||
continue;
|
||||
|
||||
auto exefs_dir = subdir->GetSubdirectory("exefs");
|
||||
auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
|
||||
if (exefs_dir != nullptr) {
|
||||
for (const auto& file : exefs_dir->GetFiles()) {
|
||||
if (file->GetExtension() == "ips") {
|
||||
@@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cheats_dir = subdir->GetSubdirectory("cheats");
|
||||
auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats");
|
||||
if (cheats_dir != nullptr) {
|
||||
auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
|
||||
if (res.has_value()) {
|
||||
@@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
|
||||
continue;
|
||||
}
|
||||
|
||||
auto romfs_dir = subdir->GetSubdirectory("romfs");
|
||||
auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs");
|
||||
if (romfs_dir != nullptr)
|
||||
layers.push_back(std::move(romfs_dir));
|
||||
|
||||
auto ext_dir = subdir->GetSubdirectory("romfs_ext");
|
||||
auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext");
|
||||
if (ext_dir != nullptr)
|
||||
layers_ext.push_back(std::move(ext_dir));
|
||||
}
|
||||
@@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
|
||||
for (const auto& mod : mod_dir->GetSubdirectories()) {
|
||||
std::string types;
|
||||
|
||||
const auto exefs_dir = mod->GetSubdirectory("exefs");
|
||||
const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs");
|
||||
if (IsDirValidAndNonEmpty(exefs_dir)) {
|
||||
bool ips = false;
|
||||
bool ipswitch = false;
|
||||
@@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
|
||||
if (layeredfs)
|
||||
AppendCommaIfNotEmpty(types, "LayeredExeFS");
|
||||
}
|
||||
if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
|
||||
if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs")))
|
||||
AppendCommaIfNotEmpty(types, "LayeredFS");
|
||||
if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
|
||||
if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats")))
|
||||
AppendCommaIfNotEmpty(types, "Cheats");
|
||||
|
||||
if (types.empty())
|
||||
|
||||
@@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 {
|
||||
std::string FormatTitleVersion(u32 version,
|
||||
TitleVersionFormat format = TitleVersionFormat::ThreeElements);
|
||||
|
||||
// Returns a directory with name matching name case-insensitive. Returns nullptr if directory
|
||||
// doesn't have a directory with name.
|
||||
std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
|
||||
std::string_view name);
|
||||
|
||||
// A centralized class to manage patches to games.
|
||||
class PatchManager {
|
||||
public:
|
||||
|
||||
@@ -14,15 +14,15 @@ namespace SystemVersionData {
|
||||
|
||||
constexpr u8 VERSION_MAJOR = 10;
|
||||
constexpr u8 VERSION_MINOR = 0;
|
||||
constexpr u8 VERSION_MICRO = 3;
|
||||
constexpr u8 VERSION_MICRO = 2;
|
||||
|
||||
constexpr u8 REVISION_MAJOR = 1;
|
||||
constexpr u8 REVISION_MINOR = 0;
|
||||
|
||||
constexpr char PLATFORM_STRING[] = "NX";
|
||||
constexpr char VERSION_HASH[] = "254bdca8851c49b707c91f407aa2ab06e6be39f8";
|
||||
constexpr char DISPLAY_VERSION[] = "10.0.3";
|
||||
constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.3-1.0";
|
||||
constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc";
|
||||
constexpr char DISPLAY_VERSION[] = "10.0.2";
|
||||
constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0";
|
||||
|
||||
} // namespace SystemVersionData
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ endif()
|
||||
create_target_directory_groups(video_core)
|
||||
|
||||
target_link_libraries(video_core PUBLIC common core)
|
||||
target_link_libraries(video_core PRIVATE glad)
|
||||
target_link_libraries(video_core PRIVATE glad xbyak)
|
||||
|
||||
if (ENABLE_VULKAN)
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
|
||||
@@ -457,8 +457,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
|
||||
"Units other than CROP are unimplemented");
|
||||
if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
|
||||
LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
|
||||
}
|
||||
|
||||
switch (regs.query.query_get.operation) {
|
||||
case Regs::QueryOperation::Release:
|
||||
@@ -534,8 +535,8 @@ void Maxwell3D::ProcessCounterReset() {
|
||||
rasterizer.ResetCounter(QueryType::SamplesPassed);
|
||||
break;
|
||||
default:
|
||||
LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
|
||||
static_cast<int>(regs.counter_reset));
|
||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
|
||||
static_cast<int>(regs.counter_reset));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -592,8 +593,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||
system.GPU().GetTicks());
|
||||
return {};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
|
||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
|
||||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
|
||||
GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
|
||||
constexpr std::array LimitUBOs = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSSBOs = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSamplers = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS};
|
||||
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||
|
||||
constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
|
||||
GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
|
||||
constexpr std::array LimitImages = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
return std::exchange(base, base + amount);
|
||||
}
|
||||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
|
||||
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
|
||||
|
||||
@@ -159,15 +170,14 @@ bool IsASTCSupported() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
Device::Device()
|
||||
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
|
||||
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
|
||||
const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||
const std::vector extensions = GetExtensions();
|
||||
|
||||
const bool is_nvidia = vendor == "NVIDIA Corporation";
|
||||
const bool is_amd = vendor == "ATI Technologies Inc.";
|
||||
const bool is_intel = vendor == "Intel";
|
||||
const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
@@ -182,7 +192,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
has_broken_compute = is_intel_proprietary;
|
||||
has_fast_buffer_sub_data = is_nvidia;
|
||||
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||
GLAD_GL_NV_compute_program5;
|
||||
@@ -197,7 +206,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
uniform_buffer_alignment = 0;
|
||||
max_uniform_buffers.fill(std::numeric_limits<u32>::max());
|
||||
uniform_buffer_alignment = 4;
|
||||
shader_storage_alignment = 4;
|
||||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_warp_intrinsics = true;
|
||||
@@ -205,9 +216,6 @@ Device::Device(std::nullptr_t) {
|
||||
has_vertex_viewport_layer = true;
|
||||
has_image_load_formatted = true;
|
||||
has_variable_aoffi = true;
|
||||
has_component_indexing_bug = false;
|
||||
has_broken_compute = false;
|
||||
has_precise_bug = false;
|
||||
}
|
||||
|
||||
bool Device::TestVariableAoffi() {
|
||||
|
||||
@@ -24,6 +24,10 @@ public:
|
||||
explicit Device();
|
||||
explicit Device(std::nullptr_t);
|
||||
|
||||
u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
|
||||
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
|
||||
}
|
||||
|
||||
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
|
||||
return base_bindings[stage_index];
|
||||
}
|
||||
@@ -80,10 +84,6 @@ public:
|
||||
return has_precise_bug;
|
||||
}
|
||||
|
||||
bool HasBrokenCompute() const {
|
||||
return has_broken_compute;
|
||||
}
|
||||
|
||||
bool HasFastBufferSubData() const {
|
||||
return has_fast_buffer_sub_data;
|
||||
}
|
||||
@@ -96,7 +96,8 @@ private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
@@ -109,7 +110,6 @@ private:
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
bool has_broken_compute{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool use_assembly_shaders{};
|
||||
};
|
||||
|
||||
@@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr std::size_t NumSupportedVertexAttributes = 16;
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
@@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||
CheckExtensions();
|
||||
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||
for (const GLuint cbuf : staging_cbufs) {
|
||||
@@ -655,10 +664,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||
if (device.HasBrokenCompute()) {
|
||||
return;
|
||||
}
|
||||
|
||||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
@@ -846,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||
const auto& shader_stage = stages[stage_index];
|
||||
const auto& entries = shader->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
|
||||
|
||||
u32 binding =
|
||||
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
|
||||
for (const auto& entry : shader->GetEntries().const_buffers) {
|
||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
|
||||
const auto base_bindings = device.GetBaseBindings(stage_index);
|
||||
u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const u32 index = entry.GetIndex();
|
||||
const auto& buffer = shader_stage.const_buffers[index];
|
||||
SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
|
||||
base_unified_offset + index * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
|
||||
entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
|
||||
base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
const bool use_unified = entries.use_unified_uniforms;
|
||||
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : kernel->GetEntries().const_buffers) {
|
||||
for (const auto& entry : entries.const_buffers) {
|
||||
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
|
||||
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
|
||||
Tegra::Engines::ConstBufferInfo buffer;
|
||||
buffer.address = config.Address();
|
||||
buffer.size = config.size;
|
||||
buffer.enabled = mask[entry.GetIndex()];
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
|
||||
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
|
||||
use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
|
||||
++binding;
|
||||
}
|
||||
if (use_unified) {
|
||||
const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry) {
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset) {
|
||||
if (!buffer.enabled) {
|
||||
// Set values to zero to unbind buffers
|
||||
if (device.UseAssemblyShaders()) {
|
||||
@@ -889,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||
// UBO alignment requirements.
|
||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||
|
||||
const auto alignment = device.GetUniformBufferAlignment();
|
||||
auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
||||
device.HasFastBufferSubData());
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
||||
const bool fast_upload = !use_unified && device.HasFastBufferSubData();
|
||||
|
||||
const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
|
||||
const GPUVAddr gpu_addr = buffer.address;
|
||||
auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
UNIMPLEMENTED_IF(use_unified);
|
||||
if (offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
||||
cbuf = staging_cbuf;
|
||||
offset = 0;
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
||||
return;
|
||||
}
|
||||
if (offset != 0) {
|
||||
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
||||
cbuf = staging_cbuf;
|
||||
offset = 0;
|
||||
|
||||
if (use_unified) {
|
||||
glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
|
||||
} else {
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
||||
}
|
||||
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
||||
@@ -977,16 +1013,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
|
||||
glBindTextureUnit(binding, 0);
|
||||
return;
|
||||
}
|
||||
glBindTextureUnit(binding, view->GetTexture());
|
||||
|
||||
if (view->GetSurfaceParams().IsBuffer()) {
|
||||
return;
|
||||
const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
glBindTextureUnit(binding, handle);
|
||||
if (!view->GetSurfaceParams().IsBuffer()) {
|
||||
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
|
||||
}
|
||||
// Apply swizzle to textures that are not buffers.
|
||||
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
|
||||
texture.tic.w_source);
|
||||
|
||||
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
|
||||
@@ -1015,14 +1047,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
|
||||
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
|
||||
return;
|
||||
}
|
||||
if (!tic.IsBuffer()) {
|
||||
view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
}
|
||||
if (entry.is_written) {
|
||||
view->MarkAsModified(texture_cache.Tick());
|
||||
}
|
||||
glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
|
||||
view->GetFormat());
|
||||
const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
|
||||
@@ -107,7 +107,8 @@ private:
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
const ConstBufferEntry& entry);
|
||||
const ConstBufferEntry& entry, bool use_unified,
|
||||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
|
||||
@@ -253,6 +254,7 @@ private:
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
@@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
|
||||
MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
||||
@@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
||||
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
PrecompiledShader shader;
|
||||
shader.program = std::move(program);
|
||||
shader.registry = std::move(registry);
|
||||
shader.entries = MakeEntries(ir);
|
||||
shader.entries = MakeEntries(device, ir, entry.type);
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
if (callback) {
|
||||
|
||||
@@ -61,8 +61,8 @@ struct TextureDerivates {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
|
||||
|
||||
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
|
||||
#define ftou floatBitsToUint
|
||||
@@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
||||
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
|
||||
}
|
||||
|
||||
bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
|
||||
// We waste one UBO for emulation
|
||||
const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
|
||||
return num_ubos > num_available_ubos;
|
||||
}
|
||||
|
||||
struct GenericVaryingDescription {
|
||||
std::string name;
|
||||
u8 first_element = 0;
|
||||
@@ -412,8 +419,9 @@ class GLSLDecompiler final {
|
||||
public:
|
||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
||||
: device{device}, ir{ir}, registry{registry}, stage{stage},
|
||||
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
|
||||
: device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
|
||||
suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
|
||||
UseUnifiedUniforms(device, ir, stage)} {
|
||||
if (stage != ShaderType::Compute) {
|
||||
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||
}
|
||||
@@ -834,12 +842,24 @@ private:
|
||||
}
|
||||
|
||||
void DeclareConstantBuffers() {
|
||||
if (use_unified_uniforms) {
|
||||
const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
|
||||
static_cast<u32>(ir.GetGlobalMemory().size());
|
||||
code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
|
||||
binding);
|
||||
code.AddLine(" uint cbufs[];");
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
return;
|
||||
}
|
||||
|
||||
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
|
||||
for (const auto& buffers : ir.GetConstantBuffers()) {
|
||||
const auto index = buffers.first;
|
||||
for (const auto [index, info] : ir.GetConstantBuffers()) {
|
||||
const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
|
||||
const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
|
||||
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
|
||||
GetConstBufferBlock(index));
|
||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
|
||||
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
|
||||
code.AddLine("}};");
|
||||
code.AddNewLine();
|
||||
}
|
||||
@@ -1038,42 +1058,51 @@ private:
|
||||
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
|
||||
const Node offset = cbuf->GetOffset();
|
||||
const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
|
||||
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
// Direct access
|
||||
const u32 offset_imm = immediate->GetValue();
|
||||
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
|
||||
Type::Uint};
|
||||
} else {
|
||||
return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
|
||||
offset_imm / (4 * 4), (offset_imm / 4) % 4),
|
||||
Type::Uint};
|
||||
}
|
||||
}
|
||||
|
||||
// Indirect access
|
||||
if (use_unified_uniforms) {
|
||||
return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
|
||||
Visit(offset).AsUint()),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
if (std::holds_alternative<OperationNode>(*offset)) {
|
||||
// Indirect access
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
|
||||
|
||||
if (!device.HasComponentIndexingBug()) {
|
||||
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset, final_offset),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||
const std::string pack = code.GenerateTemporary();
|
||||
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset);
|
||||
|
||||
const std::string result = code.GenerateTemporary();
|
||||
code.AddLine("uint {};", result);
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
|
||||
pack, GetSwizzle(swizzle));
|
||||
}
|
||||
return {result, Type::Uint};
|
||||
if (!device.HasComponentIndexingBug()) {
|
||||
return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset, final_offset),
|
||||
Type::Uint};
|
||||
}
|
||||
|
||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||
const std::string pack = code.GenerateTemporary();
|
||||
code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset);
|
||||
|
||||
const std::string result = code.GenerateTemporary();
|
||||
code.AddLine("uint {};", result);
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
|
||||
GetSwizzle(swizzle));
|
||||
}
|
||||
return {result, Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
@@ -1538,7 +1567,9 @@ private:
|
||||
Expression target;
|
||||
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
// Writing to Register::ZeroIndex is a no op
|
||||
// Writing to Register::ZeroIndex is a no op but we still have to visit the source
|
||||
// as it might have side effects.
|
||||
code.AddLine("{};", Visit(src).GetCode());
|
||||
return {};
|
||||
}
|
||||
target = {GetRegister(gpr->GetIndex()), Type::Float};
|
||||
@@ -2333,7 +2364,21 @@ private:
|
||||
return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
|
||||
}
|
||||
|
||||
Expression MemoryBarrierGL(Operation) {
|
||||
Expression Barrier(Operation) {
|
||||
if (!ir.IsDecompiled()) {
|
||||
LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
|
||||
return {};
|
||||
}
|
||||
code.AddLine("barrier();");
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression MemoryBarrierGroup(Operation) {
|
||||
code.AddLine("groupMemoryBarrier();");
|
||||
return {};
|
||||
}
|
||||
|
||||
Expression MemoryBarrierGlobal(Operation) {
|
||||
code.AddLine("memoryBarrier();");
|
||||
return {};
|
||||
}
|
||||
@@ -2579,7 +2624,9 @@ private:
|
||||
&GLSLDecompiler::ThreadMask<Func::Lt>,
|
||||
&GLSLDecompiler::ShuffleIndexed,
|
||||
|
||||
&GLSLDecompiler::MemoryBarrierGL,
|
||||
&GLSLDecompiler::Barrier,
|
||||
&GLSLDecompiler::MemoryBarrierGroup,
|
||||
&GLSLDecompiler::MemoryBarrierGlobal,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
@@ -2692,6 +2739,7 @@ private:
|
||||
const std::string_view identifier;
|
||||
const std::string_view suffix;
|
||||
const Header header;
|
||||
const bool use_unified_uniforms;
|
||||
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||
|
||||
ShaderWriter code;
|
||||
@@ -2887,7 +2935,7 @@ void GLSLDecompiler::DecompileAST() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
|
||||
ShaderEntries entries;
|
||||
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
||||
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
|
||||
@@ -2908,6 +2956,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
||||
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
|
||||
}
|
||||
entries.shader_length = ir.GetLength();
|
||||
entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
|
||||
return entries;
|
||||
}
|
||||
|
||||
|
||||
@@ -53,11 +53,13 @@ struct ShaderEntries {
|
||||
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||
std::vector<SamplerEntry> samplers;
|
||||
std::vector<ImageEntry> images;
|
||||
u32 clip_distances{};
|
||||
std::size_t shader_length{};
|
||||
u32 clip_distances{};
|
||||
bool use_unified_uniforms{};
|
||||
};
|
||||
|
||||
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||
ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||
const VideoCommon::Shader::Registry& registry,
|
||||
|
||||
@@ -47,6 +47,10 @@ void ProgramManager::BindHostPipeline(GLuint pipeline) {
|
||||
old_state.geometry = 0;
|
||||
glDisable(GL_GEOMETRY_PROGRAM_NV);
|
||||
}
|
||||
} else {
|
||||
if (!is_graphics_bound) {
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
glBindProgramPipeline(pipeline);
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
|
||||
namespace {
|
||||
|
||||
struct FormatTuple {
|
||||
GLint internal_format;
|
||||
GLenum internal_format;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
@@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
|
||||
return texture;
|
||||
}
|
||||
|
||||
constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
|
||||
SwizzleSource w_source) {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
@@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() {
|
||||
}
|
||||
|
||||
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
|
||||
LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
|
||||
LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
|
||||
}
|
||||
|
||||
View CachedSurface::CreateView(const ViewParams& view_key) {
|
||||
@@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
|
||||
}
|
||||
|
||||
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
|
||||
const bool is_proxy)
|
||||
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
|
||||
target = GetTextureTarget(params.target);
|
||||
format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
|
||||
bool is_proxy)
|
||||
: VideoCommon::ViewBase(params), surface{surface},
|
||||
format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
|
||||
target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
|
||||
if (!is_proxy) {
|
||||
texture_view = CreateTextureView();
|
||||
main_view = CreateTextureView();
|
||||
}
|
||||
swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
|
||||
}
|
||||
|
||||
CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
@@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
|
||||
GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (new_swizzle == swizzle)
|
||||
return;
|
||||
swizzle = new_swizzle;
|
||||
const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
|
||||
GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
|
||||
const GLuint handle = GetTexture();
|
||||
const PixelFormat format = surface.GetSurfaceParams().pixel_format;
|
||||
switch (format) {
|
||||
if (GetSurfaceParams().IsBuffer()) {
|
||||
return GetTexture();
|
||||
}
|
||||
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (current_swizzle == new_swizzle) {
|
||||
return current_view;
|
||||
}
|
||||
current_swizzle = new_swizzle;
|
||||
|
||||
const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
|
||||
OGLTextureView& view = entry->second;
|
||||
if (!is_cache_miss) {
|
||||
current_view = view.handle;
|
||||
return view.handle;
|
||||
}
|
||||
view = CreateTextureView();
|
||||
current_view = view.handle;
|
||||
|
||||
std::array swizzle{x_source, y_source, z_source, w_source};
|
||||
|
||||
switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
|
||||
case PixelFormat::Z24S8:
|
||||
case PixelFormat::Z32FS8:
|
||||
case PixelFormat::S8Z24:
|
||||
glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
|
||||
UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
|
||||
glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
|
||||
GetComponent(format, x_source == SwizzleSource::R));
|
||||
break;
|
||||
default:
|
||||
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
|
||||
|
||||
// Make sure we sample the first component
|
||||
std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
|
||||
return value == SwizzleSource::G ? SwizzleSource::R : value;
|
||||
});
|
||||
[[fallthrough]];
|
||||
default: {
|
||||
const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
|
||||
GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
|
||||
glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return view.handle;
|
||||
}
|
||||
|
||||
OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||
|
||||
@@ -83,7 +83,7 @@ public:
|
||||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
|
||||
void Attach(GLenum attachment, GLenum target) const;
|
||||
|
||||
void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
@@ -98,7 +98,7 @@ public:
|
||||
if (is_proxy) {
|
||||
return surface.GetTexture();
|
||||
}
|
||||
return texture_view.handle;
|
||||
return main_view.handle;
|
||||
}
|
||||
|
||||
GLenum GetFormat() const {
|
||||
@@ -110,23 +110,19 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source) const {
|
||||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
|
||||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
|
||||
}
|
||||
|
||||
OGLTextureView CreateTextureView() const;
|
||||
|
||||
CachedSurface& surface;
|
||||
GLenum target{};
|
||||
GLenum format{};
|
||||
const GLenum format;
|
||||
const GLenum target;
|
||||
const bool is_proxy;
|
||||
|
||||
OGLTextureView texture_view;
|
||||
u32 swizzle{};
|
||||
bool is_proxy{};
|
||||
std::unordered_map<u32, OGLTextureView> view_cache;
|
||||
OGLTextureView main_view;
|
||||
|
||||
// Use an invalid default so it always fails the comparison test
|
||||
u32 current_swizzle = 0xffffffff;
|
||||
GLuint current_view = 0;
|
||||
};
|
||||
|
||||
class TextureCacheOpenGL final : public TextureCacheBase {
|
||||
|
||||
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
|
||||
bool RendererOpenGL::Init() {
|
||||
if (GLAD_GL_KHR_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
if (Settings::values.renderer_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
|
||||
}
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ struct FormatTuple {
|
||||
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
|
||||
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
|
||||
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
|
||||
{VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8
|
||||
{VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
|
||||
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
|
||||
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
|
||||
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
|
||||
@@ -168,7 +168,7 @@ struct FormatTuple {
|
||||
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
|
||||
{VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
|
||||
{VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
|
||||
{VK_FORMAT_UNDEFINED}, // BGRA8_SRGB
|
||||
{VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
|
||||
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
|
||||
{VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
|
||||
{VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
|
||||
|
||||
@@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_FORMAT_B8G8R8A8_SRGB,
|
||||
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
|
||||
VK_FORMAT_D32_SFLOAT,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
|
||||
@@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||
ASSERT(point_size != 0.0f);
|
||||
}
|
||||
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
|
||||
const auto& attribute = fixed_state.vertex_input.attributes[i];
|
||||
specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
|
||||
specialization.attribute_types[i] = attribute.Type();
|
||||
}
|
||||
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
||||
|
||||
|
||||
@@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
|
||||
const auto& attrib = regs.vertex_attrib_format[index];
|
||||
if (!attrib.IsValid()) {
|
||||
if (attrib.IsConstant()) {
|
||||
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
|
||||
continue;
|
||||
}
|
||||
|
||||
[[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
|
||||
ASSERT(buffer.IsEnabled());
|
||||
|
||||
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
|
||||
attrib.size.Value());
|
||||
}
|
||||
|
||||
@@ -741,8 +741,10 @@ private:
|
||||
if (!IsGenericAttribute(index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 location = GetGenericAttributeLocation(index);
|
||||
if (!IsAttributeEnabled(location)) {
|
||||
continue;
|
||||
}
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
Id type;
|
||||
if (IsInputAttributeArray()) {
|
||||
@@ -986,6 +988,10 @@ private:
|
||||
return stage == ShaderType::TesselationControl;
|
||||
}
|
||||
|
||||
bool IsAttributeEnabled(u32 location) const {
|
||||
return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
|
||||
}
|
||||
|
||||
u32 GetNumInputVertices() const {
|
||||
switch (stage) {
|
||||
case ShaderType::Geometry:
|
||||
@@ -1081,8 +1087,7 @@ private:
|
||||
|
||||
void VisitBasicBlock(const NodeBlock& bb) {
|
||||
for (const auto& node : bb) {
|
||||
[[maybe_unused]] const Type type = Visit(node).type;
|
||||
ASSERT(type == Type::Void);
|
||||
Visit(node);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1202,16 +1207,20 @@ private:
|
||||
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
|
||||
return {v_float_zero, Type::Float};
|
||||
default:
|
||||
if (IsGenericAttribute(attribute)) {
|
||||
const u32 location = GetGenericAttributeLocation(attribute);
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
const Type type = type_descriptor.type;
|
||||
const Id attribute_id = input_attributes.at(attribute);
|
||||
const std::vector elements = {element};
|
||||
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
||||
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
||||
if (!IsGenericAttribute(attribute)) {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
const u32 location = GetGenericAttributeLocation(attribute);
|
||||
if (!IsAttributeEnabled(location)) {
|
||||
// Disabled attributes (also known as constant attributes) always return zero.
|
||||
return {v_float_zero, Type::Float};
|
||||
}
|
||||
const auto type_descriptor = GetAttributeType(location);
|
||||
const Type type = type_descriptor.type;
|
||||
const Id attribute_id = input_attributes.at(attribute);
|
||||
const std::vector elements = {element};
|
||||
const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
|
||||
return {OpLoad(GetTypeDefinition(type), pointer), type};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
|
||||
return {v_float_zero, Type::Float};
|
||||
@@ -1372,7 +1381,9 @@ private:
|
||||
Expression target{};
|
||||
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
|
||||
if (gpr->GetIndex() == Register::ZeroIndex) {
|
||||
// Writing to Register::ZeroIndex is a no op
|
||||
// Writing to Register::ZeroIndex is a no op but we still have to visit its source
|
||||
// because it might have side effects.
|
||||
Visit(src);
|
||||
return {};
|
||||
}
|
||||
target = {registers.at(gpr->GetIndex()), Type::Float};
|
||||
@@ -2198,8 +2209,24 @@ private:
|
||||
return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
|
||||
}
|
||||
|
||||
Expression MemoryBarrierGL(Operation) {
|
||||
const auto scope = spv::Scope::Device;
|
||||
Expression Barrier(Operation) {
|
||||
if (!ir.IsDecompiled()) {
|
||||
LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto scope = spv::Scope::Workgroup;
|
||||
const auto memory = spv::Scope::Workgroup;
|
||||
const auto semantics =
|
||||
spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
|
||||
OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
|
||||
Constant(t_uint, static_cast<u32>(memory)),
|
||||
Constant(t_uint, static_cast<u32>(semantics)));
|
||||
return {};
|
||||
}
|
||||
|
||||
template <spv::Scope scope>
|
||||
Expression MemoryBarrier(Operation) {
|
||||
const auto semantics =
|
||||
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
|
||||
spv::MemorySemanticsMask::WorkgroupMemory |
|
||||
@@ -2663,7 +2690,9 @@ private:
|
||||
&SPIRVDecompiler::ThreadMask<4>, // Lt
|
||||
&SPIRVDecompiler::ShuffleIndexed,
|
||||
|
||||
&SPIRVDecompiler::MemoryBarrierGL,
|
||||
&SPIRVDecompiler::Barrier,
|
||||
&SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
|
||||
&SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
|
||||
};
|
||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||
|
||||
|
||||
@@ -88,7 +88,8 @@ struct Specialization final {
|
||||
u32 shared_memory_size{};
|
||||
|
||||
// Graphics specific
|
||||
std::optional<float> point_size{};
|
||||
std::optional<float> point_size;
|
||||
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
|
||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||
bool ndc_minus_one_to_one{};
|
||||
};
|
||||
|
||||
@@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default;
|
||||
|
||||
VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
|
||||
SwizzleSource z_source, SwizzleSource w_source) {
|
||||
const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (last_image_view && last_swizzle == swizzle) {
|
||||
const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
|
||||
if (last_image_view && last_swizzle == new_swizzle) {
|
||||
return last_image_view;
|
||||
}
|
||||
last_swizzle = swizzle;
|
||||
last_swizzle = new_swizzle;
|
||||
|
||||
const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
|
||||
const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
|
||||
auto& image_view = entry->second;
|
||||
if (!is_cache_miss) {
|
||||
return last_image_view = *image_view;
|
||||
}
|
||||
|
||||
auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
|
||||
auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
|
||||
auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
|
||||
auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
|
||||
|
||||
std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
|
||||
MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
|
||||
if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
|
||||
// A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
|
||||
std::swap(swizzle_x, swizzle_z);
|
||||
std::swap(swizzle[0], swizzle[2]);
|
||||
}
|
||||
|
||||
// Games can sample depth or stencil values on textures. This is decided by the swizzle value on
|
||||
@@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
// Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
|
||||
swizzle_x = VK_COMPONENT_SWIZZLE_R;
|
||||
swizzle_y = VK_COMPONENT_SWIZZLE_G;
|
||||
swizzle_z = VK_COMPONENT_SWIZZLE_B;
|
||||
swizzle_w = VK_COMPONENT_SWIZZLE_A;
|
||||
// Make sure we sample the first component
|
||||
std::transform(
|
||||
swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
|
||||
return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
|
||||
});
|
||||
}
|
||||
|
||||
VkImageViewCreateInfo ci;
|
||||
@@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
|
||||
ci.image = surface.GetImageHandle();
|
||||
ci.viewType = image_view_type;
|
||||
ci.format = surface.GetImage().GetFormat();
|
||||
ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
|
||||
ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
|
||||
ci.subresourceRange.aspectMask = aspect;
|
||||
ci.subresourceRange.baseMipLevel = base_level;
|
||||
ci.subresourceRange.levelCount = num_levels;
|
||||
|
||||
@@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
case OpCode::Id::RED: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
|
||||
UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
@@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = GetRegister(instr.gpr0);
|
||||
bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
|
||||
bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOM: {
|
||||
|
||||
@@ -293,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::BAR: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
|
||||
bb.push_back(Operation(OperationCode::Barrier));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::MEMBAR: {
|
||||
UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
|
||||
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
|
||||
bb.push_back(Operation(OperationCode::MemoryBarrierGL));
|
||||
const OperationCode type = [instr] {
|
||||
switch (instr.membar.type) {
|
||||
case Tegra::Shader::MembarType::CTA:
|
||||
return OperationCode::MemoryBarrierGroup;
|
||||
case Tegra::Shader::MembarType::GL:
|
||||
return OperationCode::MemoryBarrierGlobal;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
|
||||
return OperationCode::MemoryBarrierGlobal;
|
||||
}
|
||||
}();
|
||||
bb.push_back(Operation(type));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::DEPBAR: {
|
||||
|
||||
@@ -233,7 +233,9 @@ enum class OperationCode {
|
||||
ThreadLtMask, /// () -> uint
|
||||
ShuffleIndexed, /// (uint value, uint index) -> uint
|
||||
|
||||
MemoryBarrierGL, /// () -> void
|
||||
Barrier, /// () -> void
|
||||
MemoryBarrierGroup, /// () -> void
|
||||
MemoryBarrierGlobal, /// () -> void
|
||||
|
||||
Amount,
|
||||
};
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
|
||||
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
|
||||
|
||||
template <typename TSurface, typename TView>
|
||||
class TextureCache {
|
||||
using VectorSurface = boost::container::small_vector<TSurface, 1>;
|
||||
|
||||
public:
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
@@ -308,18 +310,20 @@ public:
|
||||
dst_surface.first->MarkAsModified(true, Tick());
|
||||
}
|
||||
|
||||
TSurface TryFindFramebufferSurface(VAddr addr) {
|
||||
TSurface TryFindFramebufferSurface(VAddr addr) const {
|
||||
if (!addr) {
|
||||
return nullptr;
|
||||
}
|
||||
const VAddr page = addr >> registry_page_bits;
|
||||
std::vector<TSurface>& list = registry[page];
|
||||
for (auto& surface : list) {
|
||||
if (surface->GetCpuAddr() == addr) {
|
||||
return surface;
|
||||
}
|
||||
const auto it = registry.find(page);
|
||||
if (it == registry.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return nullptr;
|
||||
const auto& list = it->second;
|
||||
const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
|
||||
return surface->GetCpuAddr() == addr;
|
||||
});
|
||||
return found != list.end() ? *found : nullptr;
|
||||
}
|
||||
|
||||
u64 Tick() {
|
||||
@@ -498,7 +502,7 @@ private:
|
||||
* @param untopological Indicates to the recycler that the texture has no way
|
||||
* to match the overlaps due to topological reasons.
|
||||
**/
|
||||
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
|
||||
RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
|
||||
if (Settings::IsGPULevelExtreme()) {
|
||||
return RecycleStrategy::Flush;
|
||||
@@ -538,9 +542,8 @@ private:
|
||||
* @param untopological Indicates to the recycler that the texture has no way to match the
|
||||
* overlaps due to topological reasons.
|
||||
**/
|
||||
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
|
||||
const SurfaceParams& params, const GPUVAddr gpu_addr,
|
||||
const bool preserve_contents,
|
||||
std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr, const bool preserve_contents,
|
||||
const MatchTopologyResult untopological) {
|
||||
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
|
||||
for (auto& surface : overlaps) {
|
||||
@@ -650,7 +653,7 @@ private:
|
||||
* @param params The parameters on the new surface.
|
||||
* @param gpu_addr The starting address of the new surface.
|
||||
**/
|
||||
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
|
||||
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
@@ -708,7 +711,7 @@ private:
|
||||
* @param preserve_contents Indicates that the new surface should be loaded from memory or
|
||||
* left blank.
|
||||
*/
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr,
|
||||
const VAddr cpu_addr,
|
||||
@@ -810,7 +813,7 @@ private:
|
||||
TSurface& current_surface = iter->second;
|
||||
const auto topological_result = current_surface->MatchesTopology(params);
|
||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||
std::vector<TSurface> overlaps{current_surface};
|
||||
VectorSurface overlaps{current_surface};
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
topological_result);
|
||||
}
|
||||
@@ -991,7 +994,9 @@ private:
|
||||
params.target = target;
|
||||
params.is_tiled = false;
|
||||
params.srgb_conversion = false;
|
||||
params.is_layered = false;
|
||||
params.is_layered =
|
||||
target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
|
||||
target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
|
||||
params.block_width = 0;
|
||||
params.block_height = 0;
|
||||
params.block_depth = 0;
|
||||
@@ -1124,23 +1129,25 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
|
||||
VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
VAddr start = cpu_addr >> registry_page_bits;
|
||||
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
|
||||
std::vector<TSurface> surfaces;
|
||||
while (start <= end) {
|
||||
std::vector<TSurface>& list = registry[start];
|
||||
for (auto& surface : list) {
|
||||
if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
|
||||
surface->MarkAsPicked(true);
|
||||
surfaces.push_back(surface);
|
||||
}
|
||||
VectorSurface surfaces;
|
||||
for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
|
||||
const auto it = registry.find(start);
|
||||
if (it == registry.end()) {
|
||||
continue;
|
||||
}
|
||||
for (auto& surface : it->second) {
|
||||
if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
|
||||
continue;
|
||||
}
|
||||
surface->MarkAsPicked(true);
|
||||
surfaces.push_back(surface);
|
||||
}
|
||||
start++;
|
||||
}
|
||||
for (auto& surface : surfaces) {
|
||||
surface->MarkAsPicked(false);
|
||||
|
||||
@@ -106,6 +106,9 @@ public:
|
||||
format.setVersion(4, 3);
|
||||
format.setProfile(QSurfaceFormat::CompatibilityProfile);
|
||||
format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
|
||||
if (Settings::values.renderer_debug) {
|
||||
format.setOption(QSurfaceFormat::FormatOption::DebugContext);
|
||||
}
|
||||
// TODO: expose a setting for buffer value (ie default/single/double/triple)
|
||||
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
|
||||
format.setSwapInterval(0);
|
||||
|
||||
@@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
|
||||
#include "common/logging/backend.h"
|
||||
#include "common/logging/filter.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/memory_detect.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "common/scope_exit.h"
|
||||
@@ -219,6 +220,10 @@ GMainWindow::GMainWindow()
|
||||
LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
|
||||
#endif
|
||||
LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
|
||||
LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
|
||||
Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024);
|
||||
LOG_INFO(Frontend, "Host Swap: {:.2f} GB",
|
||||
Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024);
|
||||
UpdateWindowTitle();
|
||||
|
||||
show();
|
||||
|
||||
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
|
||||
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
|
||||
SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
|
||||
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
|
||||
if (Settings::values.renderer_debug) {
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
|
||||
}
|
||||
SDL_GL_SetSwapInterval(0);
|
||||
|
||||
std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
|
||||
|
||||
Reference in New Issue
Block a user