Compare commits

...

77 Commits

Author SHA1 Message Date
ReinUsesLisp
94b0e2e5da texture_cache: Remove preserve_contents
preserve_contents was always true. We can't assume we don't have to
preserve clears because scissored and color masked clears exist.

This removes preserve_contents and assumes it as true at all times.
2020-04-11 01:51:02 -03:00
bunnei
51c6688e21 Merge pull request #3594 from ReinUsesLisp/vk-instance
yuzu: Drop SDL2 and Qt frontend Vulkan requirements
2020-04-10 20:06:55 -04:00
bunnei
8adf66f9fd Merge pull request #3607 from FearlessTobi/input-kms
yuzu/configuration: Fix input profiles and a wrong assert
2020-04-10 00:39:48 -04:00
Fernando Sahmkow
f570b129a2 Merge pull request #3623 from ReinUsesLisp/renderdoc-bind-spam
qt/bootmanager: Remove unnecessary glBindFramebuffer
2020-04-09 18:02:17 -04:00
Fernando Sahmkow
7182ef31c9 Merge pull request #3622 from ReinUsesLisp/srgb-texture-border
video_core/texture: Use a LUT to convert sRGB texture borders
2020-04-09 18:01:48 -04:00
Rodrigo Locatti
36f607217f Merge pull request #3610 from FernandoS27/gpu-caches
Refactor all the GPU Caches to use VAddr for cache addressing
2020-04-09 17:59:21 -03:00
bunnei
b96fd0bd0e Merge pull request #3601 from ReinUsesLisp/some-shader-encodings
video_core/shader: Add some instruction and S2R encodings
2020-04-09 00:17:39 -04:00
Fernando Sahmkow
7cd6daf115 VkRasterizer: Eliminate Legacy code. 2020-04-08 18:59:09 -04:00
Fernando Sahmkow
1c18dc6577 Memory: Correct GCC errors. 2020-04-08 18:09:16 -04:00
Fernando Sahmkow
913f42a3a7 Memory: Address Feedback. 2020-04-08 13:40:46 -04:00
Fernando Sahmkow
e00d992848 GPUMemoryManager: Improve safety of memory reads. 2020-04-08 12:08:06 -04:00
bunnei
449255675d Merge pull request #3624 from Kewlan/fix-sl-sr-position
configuration: Fix placement of SL and SR
2020-04-08 11:22:56 -04:00
Kewlan
848d619aec Place SL and SR in the right most column. 2020-04-08 11:34:16 +02:00
bunnei
b128beb3a9 Merge pull request #3572 from FearlessTobi/port-5127
Port citra-emu/citra#5127: "common: Port some changes from dolphin"
2020-04-07 23:48:16 -04:00
ReinUsesLisp
c6ea0d010b qt/bootmanager: Remove unnecessary glBindFramebuffer
Presentation context always has GL_DRAW_FRAMEBUFFER_BINDING as zero.
There is no need to bind the default framebuffer constantly.

According to Nsight this was using ~0.7ms per frame and it broke
renderdoc captures.
2020-04-07 20:51:56 -03:00
ReinUsesLisp
a209d464f9 video_core/textures: Move GetMaxAnisotropy to cpp file 2020-04-07 20:47:31 -03:00
ReinUsesLisp
d7db088180 video_core/texture: Use a LUT to convert sRGB texture borders
This is a reversed look up table extracted from
https://gist.github.com/rygorous/2203834#file-gistfile1-cpp-L41-L62

that is used in
04d4e9e587/source/maxwell/tsc_generate.cpp (L38)

Games usually bind 0xFD expecting a float texture border of 1.0f.
The conversion previous to this commit was multiplying the uint8 sRGB
texture border color by 255. This is close to 1.0f but when that
difference matters, some graphical glitches appear.

This look up table is manually changed in the edges, clamping towards
0.0f and 1.0f.

While we are at it, move this logic to its own translation unit.
2020-04-07 20:38:14 -03:00
Zach Hilman
26ed65495d Merge pull request #3621 from SilverBeamx/fullnamefix
Log version and about section version fix
2020-04-07 17:16:23 -04:00
SilverBeamx
863f7385dc Addressed feedback: switched to snake case and fixed clang-format errors 2020-04-07 22:59:09 +02:00
bunnei
f316911248 Merge pull request #3599 from ReinUsesLisp/revert-3499
Revert "Merge pull request #3499 from ReinUsesLisp/depth-2d-array"
2020-04-07 16:51:41 -04:00
SilverBeamx
5a66ca4697 Removed leftover test code 2020-04-07 22:45:30 +02:00
SilverBeamx
6b512d78c9 Addressed feedback: removed CMake hack in favor of building the necessary strings via the supplied title format 2020-04-07 22:41:45 +02:00
ReinUsesLisp
bf1d66b7c0 yuzu: Drop SDL2 and Qt frontend Vulkan requirements
Create Vulkan instances and surfaces from the Vulkan backend.
2020-04-07 16:32:19 -03:00
Rodrigo Locatti
487f9ba525 Merge pull request #3489 from namkazt/patch-2
shader: implement SULD.D bits32/64
2020-04-07 16:21:09 -03:00
SilverBeamx
22b5d5211e Hack BUILD_FULLNAME into GenerateSCMRev.cmake 2020-04-07 15:54:19 +02:00
Nguyen Dac Nam
935648ffa9 address nit. 2020-04-07 18:29:30 +07:00
ReinUsesLisp
bc1b4b85b0 renderer_vulkan: Query device names from the backend 2020-04-07 02:23:23 -03:00
ReinUsesLisp
7104e01bb3 common/dynamic_library: Import and adapt helper from Dolphin 2020-04-07 02:23:23 -03:00
Nguyen Dac Nam
bf1174c114 Apply suggestions from code review
Co-Authored-By: Rodrigo Locatti <reinuseslisp@airmail.cc>
2020-04-07 07:55:49 +07:00
enler
27f122c48c file_sys: fix LayeredFS error when loading some games made with… (#3602)
* fix LayeredFS error when loading some games made with the Unity
2020-04-07 02:03:32 +02:00
Fernando Sahmkow
f9d5718c4b Clang Format. 2020-04-06 09:23:08 -04:00
Fernando Sahmkow
ea535d9470 Shader/Pipeline Cache: Use VAddr instead of physical memory for addressing. 2020-04-06 09:23:07 -04:00
Fernando Sahmkow
3dd5c07454 Query Cache: Use VAddr instead of physical memory for adressing. 2020-04-06 09:23:07 -04:00
Fernando Sahmkow
7fcd0fee6d Buffer Cache: Use vAddr instead of physical memory. 2020-04-06 09:23:06 -04:00
Fernando Sahmkow
6ee316cb8f Texture Cache: Use vAddr instead of physical memory for caching. 2020-04-06 09:23:05 -04:00
Fernando Sahmkow
9c0f40a1f5 GPU: Setup Flush/Invalidate to use VAddr instead of CacheAddr 2020-04-06 09:21:46 -04:00
Fernando Sahmkow
588a20be3f Merge pull request #3513 from ReinUsesLisp/native-astc
video_core: Use native ASTC when available
2020-04-06 09:21:11 -04:00
namkazy
2c98e14d13 shader_decode: SULD.D using std::pair instead of out parameter 2020-04-06 13:46:55 +07:00
namkazy
9efa51311f shader_decode: SULD.D avoid duplicate code block. 2020-04-06 13:34:06 +07:00
namkazy
7f5696513f shader_decode: SULD.D fix conversion error. 2020-04-06 13:26:58 +07:00
namkazy
2906372ba1 shader_decode: SULD.D implement bits64 and reverse shader ir init method to removed shader stage. 2020-04-06 13:09:19 +07:00
Fernando Sahmkow
69277de29d Merge pull request #3592 from ReinUsesLisp/ipa
shader_decompiler: Remove FragCoord.w hack and change IPA implementation
2020-04-05 19:29:40 -04:00
Fernando Sahmkow
1633fbf99a Merge pull request #3589 from ReinUsesLisp/fix-clears
gl_rasterizer: Mark cleared textures as dirty
2020-04-05 19:29:26 -04:00
namkazy
730f9b55b3 silent warning (conversion error) 2020-04-05 16:02:07 +07:00
namkazy
9f6ebccf06 shader_decode: SULD.D -> SINT actually same as UNORM. 2020-04-05 15:18:42 +07:00
namkazy
6f2b7087c2 shader_decode: SULD.D fix decode SNORM component 2020-04-05 14:46:43 +07:00
namkazy
69657ff19c clang-format 2020-04-05 12:57:50 +07:00
namkazy
24cc64c5b3 shader_decode: get sampler descriptor from registry. 2020-04-05 12:54:48 +07:00
FearlessTobi
aa6214feb7 yuzu/configuration: Only assert that all buttons exist when we are handling the click for a button device
This fixes failed assertions that were present in yuzu master code for 18 months.
2020-04-05 07:16:09 +02:00
FearlessTobi
fb8afee077 yuzu/configure_input_simple: Fix "Docked Joycons" controller profile
This was incorrectly using PlayerIndex 1 when calling the ConfigureDialog.
2020-04-05 07:14:35 +02:00
namkazy
acd3f0ab37 tweaking. 2020-04-05 10:31:32 +07:00
Nguyen Dac Nam
8370188b3c clang-format 2020-04-05 10:31:31 +07:00
namkazy
3e3afa9be6 cleanup unuse params 2020-04-05 10:31:31 +07:00
namkazy
5cd5857000 cleanup debug code. 2020-04-05 10:31:30 +07:00
namkazy
658112783d reimplement get component type, uncomment mistaken code 2020-04-05 10:31:30 +07:00
namkazy
3ad06e9b2b remove disable optimize 2020-04-05 10:31:30 +07:00
namkazy
f24c2e1103 [wip] reimplement SULD.D 2020-04-05 10:31:29 +07:00
namkazy
58bcb86af5 add shader stage when init shader ir 2020-04-05 10:31:29 +07:00
Nguyen Dac Nam
2cefdd92bd clang-fix 2020-04-05 10:31:28 +07:00
Nguyen Dac Nam
1f3d142875 shader: image - import PredCondition 2020-04-05 10:31:27 +07:00
Nguyen Dac Nam
08db60392d shader: SULD.D bits32 implement more complexer method. 2020-04-05 10:31:27 +07:00
Nguyen Dac Nam
ed1d8beb13 shader: SULD.D import StoreType 2020-04-05 10:31:26 +07:00
Nguyen Dac Nam
6d235b8631 shader: implement SULD.D bits32 2020-04-05 10:31:26 +07:00
Zach Hilman
59e75f4372 ci: Update to Windows Server 2019 and Visual Studio 2019
This updates to the latest available toolchain for MSVC builds.
2020-04-04 16:13:57 -04:00
ReinUsesLisp
60106531b4 shader/other: Add error message for some S2R registers 2020-04-04 03:46:07 -03:00
ReinUsesLisp
8b719e9e1d shader_bytecode: Rename MOV_SYS to S2R 2020-04-04 03:37:51 -03:00
ReinUsesLisp
9d15feb892 shader_bytecode: Add encoding for BAR 2020-04-04 03:36:21 -03:00
ReinUsesLisp
16ae98dbb3 shader_ir: Add error message for EXIT.FCSM_TR 2020-04-04 03:34:08 -03:00
ReinUsesLisp
c02a2dc24a shader_bytecode: Add encoding for VOTE.VTG 2020-04-04 03:28:11 -03:00
ReinUsesLisp
80c4fee4ec Revert "Merge pull request #3499 from ReinUsesLisp/depth-2d-array"
This reverts commit 41905ee467, reversing
changes made to 35145bd529.

It causes regressions in several games.
2020-04-04 00:02:26 -03:00
ReinUsesLisp
2339fe199f shader_decompiler: Remove FragCoord.w hack and change IPA implementation
Credits go to gdkchan and Ryujinx. The pull request used for this can
be found here: https://github.com/Ryujinx/Ryujinx/pull/1082

yuzu was already using the header for interpolation, but it was missing
the FragCoord.w multiplication described in the linked pull request.
This commit finally removes the FragCoord.w == 1.0f hack from the shader
decompiler.

While we are at it, this commit renames some enumerations to match
Nvidia's documentation (linked below) and fixes component declaration
order in the shader program header (z and w were swapped).

https://github.com/NVIDIA/open-gpu-doc/blob/master/Shader-Program-Header/Shader-Program-Header.html
2020-04-01 21:48:55 -03:00
ReinUsesLisp
dd1232755b gl_texture_cache: Fix software ASTC fallback 2020-04-01 01:44:15 -03:00
ReinUsesLisp
2f0da10dc3 vk_device: Add missing ASTC queries 2020-04-01 01:14:04 -03:00
ReinUsesLisp
b6571ca9f0 video_core: Use native ASTC when available 2020-04-01 01:14:04 -03:00
ReinUsesLisp
16270dcfe4 gl_device: Detect if ASTC is reported and expose it 2020-04-01 01:14:04 -03:00
Vitor K
bd0c56c6e7 common: Port some changes from dolphin (#5127)
* IOFile: Make the move constructor and move assignment operator noexcept

Certain parts of the standard library try to determine whether or not a
transfer operation should either be a copy or a move. The prevalent notion
of move constructors/assignment operators is that they should not throw,
they simply move an already existing resource somewhere else.

This is typically done with 'std::move_if_noexcept'. Like the name says,
if a type's move constructor is noexcept, then the functions retrieves an
r-value reference (for move semantics), or an l-value (for copy semantics)
if it is not noexcept.

As IOFile deletes the copy constructor and copy assignment operators,
using IOFile with certain parts of the standard library can fail in
unexcepted ways (especially when used with various container
implementations). This prevents that.

* fix various instances of -1 being assigned to unsigned types

* do not assign in conditional statements

* File/IOFile: Check _tfopen_s properly

* common/file_util.cpp: address review comments

Co-authored-by: Lioncash <mathew1800@gmail.com>
Co-authored-by: Shawn Hoffman <godisgovernment@gmail.com>
Co-authored-by: Sepalani <sepalani@hotmail.fr>
2020-04-01 02:58:42 +02:00
ReinUsesLisp
1c5e2b60a7 gl_rasterizer: Mark cleared textures as dirty
Fixes a potential edge case where cleared textures read from the CPU
were not flushed.
2020-03-31 05:51:56 -03:00
82 changed files with 2064 additions and 1272 deletions

View File

@@ -4,7 +4,7 @@ parameters:
version: ''
steps:
- script: mkdir build && cd build && cmake -G "Visual Studio 15 2017 Win64" --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
- script: mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
displayName: 'Configure CMake'
- task: MSBuild@1
displayName: 'Build'

View File

@@ -45,7 +45,7 @@ stages:
- job: build
displayName: 'msvc'
pool:
vmImage: vs2017-win2016
vmImage: windows-2019
steps:
- template: ./templates/sync-source.yml
parameters:

View File

@@ -22,7 +22,7 @@ stages:
- job: build
displayName: 'windows-msvc'
pool:
vmImage: vs2017-win2016
vmImage: windows-2019
steps:
- template: ./templates/sync-source.yml
parameters:

View File

@@ -106,6 +106,8 @@ add_library(common STATIC
common_funcs.h
common_paths.h
common_types.h
dynamic_library.cpp
dynamic_library.h
file_util.cpp
file_util.h
hash.h

View File

@@ -0,0 +1,106 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <cstring>
#include <string>
#include <utility>
#include <fmt/format.h>
#include "common/dynamic_library.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#endif
namespace Common {
DynamicLibrary::DynamicLibrary() = default;
DynamicLibrary::DynamicLibrary(const char* filename) {
Open(filename);
}
DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept
: handle{std::exchange(rhs.handle, nullptr)} {}
DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept {
Close();
handle = std::exchange(rhs.handle, nullptr);
return *this;
}
DynamicLibrary::~DynamicLibrary() {
Close();
}
std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) {
#if defined(_WIN32)
return std::string(filename) + ".dll";
#elif defined(__APPLE__)
return std::string(filename) + ".dylib";
#else
return std::string(filename) + ".so";
#endif
}
std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) {
#if defined(_WIN32)
if (major >= 0 && minor >= 0)
return fmt::format("{}-{}-{}.dll", libname, major, minor);
else if (major >= 0)
return fmt::format("{}-{}.dll", libname, major);
else
return fmt::format("{}.dll", libname);
#elif defined(__APPLE__)
const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
if (major >= 0 && minor >= 0)
return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor);
else if (major >= 0)
return fmt::format("{}{}.{}.dylib", prefix, libname, major);
else
return fmt::format("{}{}.dylib", prefix, libname);
#else
const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
if (major >= 0 && minor >= 0)
return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor);
else if (major >= 0)
return fmt::format("{}{}.so.{}", prefix, libname, major);
else
return fmt::format("{}{}.so", prefix, libname);
#endif
}
bool DynamicLibrary::Open(const char* filename) {
#ifdef _WIN32
handle = reinterpret_cast<void*>(LoadLibraryA(filename));
#else
handle = dlopen(filename, RTLD_NOW);
#endif
return handle != nullptr;
}
void DynamicLibrary::Close() {
if (!IsOpen())
return;
#ifdef _WIN32
FreeLibrary(reinterpret_cast<HMODULE>(handle));
#else
dlclose(handle);
#endif
handle = nullptr;
}
void* DynamicLibrary::GetSymbolAddress(const char* name) const {
#ifdef _WIN32
return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name));
#else
return reinterpret_cast<void*>(dlsym(handle, name));
#endif
}
} // namespace Common

View File

@@ -0,0 +1,75 @@
// Copyright 2019 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
namespace Common {
/**
* Provides a platform-independent interface for loading a dynamic library and retrieving symbols.
* The interface maintains an internal reference count to allow one handle to be shared between
* multiple users.
*/
class DynamicLibrary final {
public:
/// Default constructor, does not load a library.
explicit DynamicLibrary();
/// Automatically loads the specified library. Call IsOpen() to check validity before use.
explicit DynamicLibrary(const char* filename);
/// Moves the library.
DynamicLibrary(DynamicLibrary&&) noexcept;
DynamicLibrary& operator=(DynamicLibrary&&) noexcept;
/// Delete copies, we can't copy a dynamic library.
DynamicLibrary(const DynamicLibrary&) = delete;
DynamicLibrary& operator=(const DynamicLibrary&) = delete;
/// Closes the library.
~DynamicLibrary();
/// Returns the specified library name with the platform-specific suffix added.
static std::string GetUnprefixedFilename(const char* filename);
/// Returns the specified library name in platform-specific format.
/// Major/minor versions will not be included if set to -1.
/// If libname already contains the "lib" prefix, it will not be added again.
/// Windows: LIBNAME-MAJOR-MINOR.dll
/// Linux: libLIBNAME.so.MAJOR.MINOR
/// Mac: libLIBNAME.MAJOR.MINOR.dylib
static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1);
/// Returns true if a module is loaded, otherwise false.
bool IsOpen() const {
return handle != nullptr;
}
/// Loads (or replaces) the handle with the specified library file name.
/// Returns true if the library was loaded and can be used.
bool Open(const char* filename);
/// Unloads the library, any function pointers from this library are no longer valid.
void Close();
/// Returns the address of the specified symbol (function or variable) as an untyped pointer.
/// If the specified symbol does not exist in this library, nullptr is returned.
void* GetSymbolAddress(const char* name) const;
/// Obtains the address of the specified symbol, automatically casting to the correct type.
/// Returns true if the symbol was found and assigned, otherwise false.
template <typename T>
bool GetSymbol(const char* name, T* ptr) const {
*ptr = reinterpret_cast<T>(GetSymbolAddress(name));
return *ptr != nullptr;
}
private:
/// Platform-dependent data type representing a dynamic library handle.
void* handle = nullptr;
};
} // namespace Common

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <array>
#include <limits>
#include <memory>
#include <sstream>
#include <unordered_map>
@@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) {
std::optional<std::string> GetCurrentDir() {
// Get the current working directory (getcwd uses malloc)
#ifdef _WIN32
wchar_t* dir;
if (!(dir = _wgetcwd(nullptr, 0))) {
wchar_t* dir = _wgetcwd(nullptr, 0);
if (!dir) {
#else
char* dir;
if (!(dir = getcwd(nullptr, 0))) {
char* dir = getcwd(nullptr, 0);
if (!dir) {
#endif
LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg());
return {};
@@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept {
bool IOFile::Open(const std::string& filename, const char openmode[], int flags) {
Close();
bool m_good;
#ifdef _WIN32
if (flags != 0) {
m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(),
Common::UTF8ToUTF16W(openmode).c_str(), flags);
m_good = m_file != nullptr;
} else {
_wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
Common::UTF8ToUTF16W(openmode).c_str());
m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
Common::UTF8ToUTF16W(openmode).c_str()) == 0;
}
#else
m_file = fopen(filename.c_str(), openmode);
m_file = std::fopen(filename.c_str(), openmode);
m_good = m_file != nullptr;
#endif
return IsOpen();
return m_good;
}
bool IOFile::Close() {
@@ -956,7 +960,7 @@ u64 IOFile::Tell() const {
if (IsOpen())
return ftello(m_file);
return -1;
return std::numeric_limits<u64>::max();
}
bool IOFile::Flush() {

View File

@@ -28,11 +28,8 @@ namespace Common {
#ifdef _MSC_VER
// Sets the debugger-visible name of the current thread.
// Uses undocumented (actually, it is now documented) trick.
// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
// This is implemented much nicer in upcoming msvc++, see:
// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx
// Uses trick documented in:
// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
void SetCurrentThreadName(const char* name) {
static const DWORD MS_VC_EXCEPTION = 0x406D1388;
@@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) {
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = -1; // dwThreadID;
info.dwThreadID = std::numeric_limits<DWORD>::max();
info.dwFlags = 0;
__try {

View File

@@ -5,6 +5,7 @@
#include <memory>
#include "common/common_types.h"
#include "common/string_util.h"
#include "common/swap.h"
#include "core/file_sys/fsmitm_romfsbuild.h"
#include "core/file_sys/romfs.h"
@@ -126,7 +127,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
return out->GetSubdirectories().front();
while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
if (out->GetSubdirectories().front()->GetName() == "data" &&
if (Common::ToLower(out->GetSubdirectories().front()->GetName()) == "data" &&
type == RomFSExtractionType::Truncated)
break;
out = out->GetSubdirectories().front();

View File

@@ -12,6 +12,15 @@
namespace Core::Frontend {
/// Information for the Graphics Backends signifying what type of screen pointer is in
/// WindowInformation
enum class WindowSystemType {
Headless,
Windows,
X11,
Wayland,
};
/**
* Represents a drawing context that supports graphics operations.
*/
@@ -76,6 +85,23 @@ public:
std::pair<unsigned, unsigned> min_client_area_size;
};
/// Data describing host window system information
struct WindowSystemInfo {
// Window system type. Determines which GL context or Vulkan WSI is used.
WindowSystemType type = WindowSystemType::Headless;
// Connection to a display server. This is used on X11 and Wayland platforms.
void* display_connection = nullptr;
// Render surface. This is a pointer to the native window handle, which depends
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
// set to nullptr, the video backend will run in headless mode.
void* render_surface = nullptr;
// Scale of the render surface. For hidpi systems, this will be >1.
float render_surface_scale = 1.0f;
};
/// Polls window events
virtual void PollEvents() = 0;
@@ -87,10 +113,6 @@ public:
/// Returns if window is shown (not minimized)
virtual bool IsShown() const = 0;
/// Retrieves Vulkan specific handlers from the window
virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const = 0;
/**
* Signal that a touch pressed event has occurred (e.g. mouse click pressed)
* @param framebuffer_x Framebuffer x-coordinate that was pressed
@@ -127,6 +149,13 @@ public:
config = val;
}
/**
* Returns system information about the drawing area.
*/
const WindowSystemInfo& GetWindowInfo() const {
return window_info;
}
/**
* Gets the framebuffer layout (width, height, and screen regions)
* @note This method is thread-safe
@@ -142,7 +171,7 @@ public:
void UpdateCurrentFramebufferLayout(unsigned width, unsigned height);
protected:
EmuWindow();
explicit EmuWindow();
virtual ~EmuWindow();
/**
@@ -179,6 +208,8 @@ protected:
client_area_height = size.second;
}
WindowSystemInfo window_info;
private:
/**
* Handler called when the minimal client area was requested to be changed via SetConfig.

View File

@@ -242,7 +242,52 @@ struct Memory::Impl {
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
system.GPU().FlushRegion(current_vaddr, copy_amount);
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* const src_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
@@ -261,6 +306,10 @@ struct Memory::Impl {
ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
@@ -290,7 +339,50 @@ struct Memory::Impl {
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* const dest_ptr =
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
@@ -309,6 +401,10 @@ struct Memory::Impl {
WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
const auto& page_table = process.VMManager().page_table;
std::size_t remaining_size = size;
@@ -337,7 +433,7 @@ struct Memory::Impl {
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memset(host_ptr, 0, copy_amount);
break;
}
@@ -384,7 +480,7 @@ struct Memory::Impl {
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
system.GPU().FlushRegion(current_vaddr, copy_amount);
WriteBlock(process, dest_addr, host_ptr, copy_amount);
break;
}
@@ -545,7 +641,7 @@ struct Memory::Impl {
break;
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr = GetPointerFromVMA(vaddr);
system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
system.GPU().FlushRegion(vaddr, sizeof(T));
T value;
std::memcpy(&value, host_ptr, sizeof(T));
return value;
@@ -587,7 +683,7 @@ struct Memory::Impl {
break;
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromVMA(vaddr)};
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
system.GPU().InvalidateRegion(vaddr, sizeof(T));
std::memcpy(host_ptr, &data, sizeof(T));
break;
}
@@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
impl->ReadBlock(src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
}
void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size) {
impl->WriteBlock(process, dest_addr, src_buffer, size);
@@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
impl->WriteBlock(dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
const void* src_buffer, std::size_t size) {
impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
}
void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(process, dest_addr, size);
}

View File

@@ -294,6 +294,27 @@ public:
void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from a specified process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param process The process to read the data from.
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* process' address space.
*/
void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from the current process' address space.
*
@@ -312,6 +333,25 @@ public:
*/
void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
/**
* Reads a contiguous block of bytes from the current process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* current process' address space.
*/
void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
/**
* Writes a range of bytes into a given process' address space at the specified
* virtual address.
@@ -335,6 +375,26 @@ public:
void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into a given process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param process The process to write data into the address space of.
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into the current process' address space at the specified
* virtual address.
@@ -356,6 +416,24 @@ public:
*/
void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
/**
* Writes a range of bytes into the current process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the current process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the current process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
/**
* Fills the specified address range within a process' address space with zeroes.
*

View File

@@ -148,6 +148,7 @@ add_library(video_core STATIC
textures/convert.h
textures/decoders.cpp
textures/decoders.h
textures/texture.cpp
textures/texture.h
video_core.cpp
video_core.h

View File

@@ -15,37 +15,29 @@ namespace VideoCommon {
class BufferBlock {
public:
bool Overlaps(const CacheAddr start, const CacheAddr end) const {
return (cache_addr < end) && (cache_addr_end > start);
bool Overlaps(const VAddr start, const VAddr end) const {
return (cpu_addr < end) && (cpu_addr_end > start);
}
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
return cache_addr <= other_start && other_end <= cache_addr_end;
bool IsInside(const VAddr other_start, const VAddr other_end) const {
return cpu_addr <= other_start && other_end <= cpu_addr_end;
}
u8* GetWritableHostPtr() const {
return FromCacheAddr(cache_addr);
std::size_t GetOffset(const VAddr in_addr) {
return static_cast<std::size_t>(in_addr - cpu_addr);
}
u8* GetWritableHostPtr(std::size_t offset) const {
return FromCacheAddr(cache_addr + offset);
VAddr GetCpuAddr() const {
return cpu_addr;
}
std::size_t GetOffset(const CacheAddr in_addr) {
return static_cast<std::size_t>(in_addr - cache_addr);
VAddr GetCpuAddrEnd() const {
return cpu_addr_end;
}
CacheAddr GetCacheAddr() const {
return cache_addr;
}
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + size;
void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr;
cpu_addr_end = new_addr + size;
}
std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
}
protected:
explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
SetCacheAddr(cache_addr);
explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
SetCpuAddr(cpu_addr);
}
~BufferBlock() = default;
private:
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
VAddr cpu_addr{};
VAddr cpu_addr_end{};
std::size_t size{};
u64 epoch{};
};

View File

@@ -19,6 +19,7 @@
#include "common/alignment.h"
#include "common/common_types.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_block.h"
#include "video_core/buffer_cache/map_interval.h"
#include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
const std::optional<VAddr> cpu_addr_opt =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr_opt) {
return {GetEmptyBuffer(size), 0};
}
const auto cache_addr = ToCacheAddr(host_ptr);
VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
auto& memory_manager = system.GPU().MemoryManager();
if (use_fast_cbuf) {
return ConstBufferUpload(host_ptr, size);
if (memory_manager.IsGranularRange(gpu_addr, size)) {
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
return ConstBufferUpload(host_ptr, size);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
return ConstBufferUpload(staging_buffer.data(), size);
}
} else {
return StreamBufferUpload(host_ptr, size, alignment);
if (memory_manager.IsGranularRange(gpu_addr, size)) {
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
return StreamBufferUpload(host_ptr, size, alignment);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
return StreamBufferUpload(staging_buffer.data(), size, alignment);
}
}
}
}
auto block = GetBlock(cache_addr, size);
auto map = MapAddress(block, gpu_addr, cache_addr, size);
auto block = GetBlock(cpu_addr, size);
auto map = MapAddress(block, gpu_addr, cpu_addr, size);
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
}
}
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
return {ToHandle(block), offset};
}
@@ -112,7 +130,7 @@ public:
}
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) {
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
}
/// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) {
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
virtual void WriteBarrier() = 0;
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
/// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart();
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
if (!cache_ptr || !cpu_addr) {
const VAddr cpu_addr = new_map->GetStart();
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
new_map->GetGpuAddress());
return;
}
const std::size_t size = new_map->GetEnd() - new_map->GetStart();
new_map->SetCpuAddress(*cpu_addr);
new_map->MarkAsRegistered(true);
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
mapped_addresses.insert({interval, new_map});
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
if (inherit_written) {
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
/// Unregisters an object from the cache
void Unregister(MapInterval& map) {
const std::size_t size = map->GetEnd() - map->GetStart();
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
map->MarkAsRegistered(false);
if (map->IsWritten()) {
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
}
private:
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
}
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
const CacheAddr cache_addr, const std::size_t size) {
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
const std::size_t size) {
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
const CacheAddr cache_addr_end = cache_addr + size;
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
u8* host_ptr = FromCacheAddr(cache_addr);
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
if (memory_manager.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
}
Register(new_map);
return new_map;
}
const CacheAddr cache_addr_end = cache_addr + size;
const VAddr cpu_addr_end = cpu_addr + size;
if (overlaps.size() == 1) {
MapInterval& current_map = overlaps[0];
if (current_map->IsInside(cache_addr, cache_addr_end)) {
if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
return current_map;
}
}
CacheAddr new_start = cache_addr;
CacheAddr new_end = cache_addr_end;
VAddr new_start = cpu_addr;
VAddr new_end = cpu_addr_end;
bool write_inheritance = false;
bool modified_inheritance = false;
// Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
write_inheritance |= overlap->IsWritten();
modified_inheritance |= overlap->IsModified();
}
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
for (auto& overlap : overlaps) {
Unregister(overlap);
}
@@ -250,7 +272,7 @@ private:
return new_map;
}
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
std::vector<MapInterval>& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
for (auto& interval : interval_set) {
std::size_t size = interval.upper() - interval.lower();
if (size > 0) {
u8* host_ptr = FromCacheAddr(interval.lower());
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
staging_buffer.resize(size);
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
UploadBlockData(block, block->GetOffset(interval.lower()), size,
staging_buffer.data());
}
}
}
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
if (size == 0) {
return {};
}
@@ -290,8 +314,9 @@ private:
void FlushMap(MapInterval map) {
std::size_t size = map->GetEnd() - map->GetStart();
TBuffer block = blocks[map->GetStart() >> block_page_bits];
u8* host_ptr = FromCacheAddr(map->GetStart());
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
staging_buffer.resize(size);
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
@@ -316,14 +341,14 @@ private:
TBuffer EnlargeBlock(TBuffer buffer) {
const std::size_t old_size = buffer->GetSize();
const std::size_t new_size = old_size + block_page_size;
const CacheAddr cache_addr = buffer->GetCacheAddr();
TBuffer new_buffer = CreateBlock(cache_addr, new_size);
const VAddr cpu_addr = buffer->GetCpuAddr();
TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(buffer, new_buffer, 0, 0, old_size);
buffer->SetEpoch(epoch);
pending_destruction.push_back(buffer);
const CacheAddr cache_addr_end = cache_addr + new_size - 1;
u64 page_start = cache_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits;
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) {
blocks[page_start] = new_buffer;
++page_start;
@@ -334,9 +359,9 @@ private:
TBuffer MergeBlocks(TBuffer first, TBuffer second) {
const std::size_t size_1 = first->GetSize();
const std::size_t size_2 = second->GetSize();
const CacheAddr first_addr = first->GetCacheAddr();
const CacheAddr second_addr = second->GetCacheAddr();
const CacheAddr new_addr = std::min(first_addr, second_addr);
const VAddr first_addr = first->GetCpuAddr();
const VAddr second_addr = second->GetCpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
TBuffer new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
second->SetEpoch(epoch);
pending_destruction.push_back(first);
pending_destruction.push_back(second);
const CacheAddr cache_addr_end = new_addr + new_size - 1;
const VAddr cpu_addr_end = new_addr + new_size - 1;
u64 page_start = new_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) {
blocks[page_start] = new_buffer;
++page_start;
@@ -355,18 +380,18 @@ private:
return new_buffer;
}
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
TBuffer found{};
const CacheAddr cache_addr_end = cache_addr + size - 1;
u64 page_start = cache_addr >> block_page_bits;
const u64 page_end = cache_addr_end >> block_page_bits;
const VAddr cpu_addr_end = cpu_addr + size - 1;
u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits;
while (page_start <= page_end) {
auto it = blocks.find(page_start);
if (it == blocks.end()) {
if (found) {
found = EnlargeBlock(found);
} else {
const CacheAddr start_addr = (page_start << block_page_bits);
const VAddr start_addr = (page_start << block_page_bits);
found = CreateBlock(start_addr, block_page_size);
blocks[page_start] = found;
}
@@ -386,7 +411,7 @@ private:
return found;
}
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
void MarkRegionAsWritten(const VAddr start, const VAddr end) {
u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
}
}
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
}
}
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
bool IsRegionWritten(const VAddr start, const VAddr end) const {
u64 page_start = start >> write_page_bit;
const u64 page_end = end >> write_page_bit;
while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
using IntervalSet = boost::icl::interval_set<CacheAddr>;
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
using IntervalSet = boost::icl::interval_set<VAddr>;
using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
using IntervalType = typename IntervalCache::interval_type;
IntervalCache mapped_addresses;
@@ -456,6 +481,8 @@ private:
u64 epoch = 0;
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
std::recursive_mutex mutex;
};

View File

@@ -11,7 +11,7 @@ namespace VideoCommon {
class MapIntervalBase {
public:
MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
: start{start}, end{end}, gpu_addr{gpu_addr} {}
void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
return gpu_addr;
}
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
bool IsInside(const VAddr other_start, const VAddr other_end) const {
return (start <= other_start && other_end <= end);
}
@@ -46,11 +46,11 @@ public:
return is_registered;
}
CacheAddr GetStart() const {
VAddr GetStart() const {
return start;
}
CacheAddr GetEnd() const {
VAddr GetEnd() const {
return end;
}
@@ -76,8 +76,8 @@ public:
}
private:
CacheAddr start;
CacheAddr end;
VAddr start;
VAddr end;
GPUVAddr gpu_addr;
VAddr cpu_addr{};
bool is_written{};

View File

@@ -1712,6 +1712,7 @@ public:
BRK,
DEPBAR,
VOTE,
VOTE_VTG,
SHFL,
FSWZADD,
BFE_C,
@@ -1758,6 +1759,7 @@ public:
IPA,
OUT_R, // Emit vertex/primitive
ISBERD,
BAR,
MEMBAR,
VMAD,
VSETP,
@@ -1842,7 +1844,7 @@ public:
MOV_C,
MOV_R,
MOV_IMM,
MOV_SYS,
S2R,
MOV32_IMM,
SHL_C,
SHL_R,
@@ -2026,6 +2028,7 @@ private:
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2063,6 +2066,7 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
@@ -2134,7 +2138,7 @@ private:
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),

View File

@@ -4,6 +4,9 @@
#pragma once
#include <array>
#include <optional>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -16,7 +19,7 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
enum class PixelImap : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
@@ -24,7 +27,7 @@ enum class AttributeUse : u8 {
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
struct Header {
union {
BitField<0, 5, u32> sph_type;
@@ -59,8 +62,8 @@ struct Header {
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<24, 4, u32> reserved;
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
BitField<20, 4, u32> reserved;
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
} common4{};
union {
@@ -93,17 +96,20 @@ struct Header {
struct {
INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
BitField<0, 2, PixelImap> x;
BitField<2, 2, PixelImap> y;
BitField<4, 2, PixelImap> z;
BitField<6, 2, PixelImap> w;
u8 raw;
} imap_generic_vector[32];
INSERT_UNION_PADDING_BYTES(2); // ImapColor
INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
INSERT_UNION_PADDING_BYTES(2); // ImapReserved
struct {
u32 target;
union {
@@ -112,31 +118,30 @@ struct Header {
BitField<2, 30, u32> reserved;
};
} omap;
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
PixelImap GetPixelImap(u32 attribute) const {
const auto get_index = [this, attribute](u32 index) {
return static_cast<PixelImap>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
};
std::optional<PixelImap> result;
for (u32 component = 0; component < 4; ++component) {
const PixelImap index = get_index(component);
if (index == PixelImap::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
if (result && result != index) {
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
}
result = index;
}
return result;
return result.value_or(PixelImap::Unused);
}
} ps;

View File

@@ -270,13 +270,13 @@ public:
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
protected:
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;

View File

@@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}

View File

@@ -27,9 +27,9 @@ public:
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override;
protected:

View File

@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
renderer->SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
}

View File

@@ -26,9 +26,9 @@ public:
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override {}
protected:

View File

@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
PushCommand(FlushRegionCommand(addr, size));
}
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().InvalidateRegion(addr, size);
}
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size);
}

View File

@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
CacheAddr addr;
VAddr addr;
u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
CacheAddr addr;
VAddr addr;
u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
: addr{addr}, size{size} {}
CacheAddr addr;
VAddr addr;
u64 size;
};
@@ -111,13 +111,13 @@ public:
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(CacheAddr addr, u64 size);
void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(CacheAddr addr, u64 size);
void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
void FlushAndInvalidateRegion(VAddr addr, u64 size);
// Wait until the gpu thread is idle.
void WaitIdle() const;

View File

@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & page_mask) == 0);
const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
return {};
}
const u8* page_pointer{page_table.pointers[addr >> page_bits]};
const u8* page_pointer{GetPointer(addr)};
if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
std::memcpy(&value, page_pointer, sizeof(T));
return value;
}
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
return;
}
u8* page_pointer{page_table.pointers[addr >> page_bits]};
u8* page_pointer{GetPointer(addr)};
if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
std::memcpy(page_pointer, &data, sizeof(T));
return;
}
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {};
}
u8* const page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask);
auto& memory = system.Memory();
const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
if (page_addr != 0) {
return memory.GetPointer(page_addr + (addr & page_mask));
}
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
return {};
}
const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask);
const auto& memory = system.Memory();
const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
if (page_addr != 0) {
return memory.GetPointer(page_addr + (addr & page_mask));
}
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
// Flush must happen on the rasterizer interface, such that memory is always synchronous
// when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
std::memcpy(dest_buffer, src_ptr, copy_amount);
rasterizer.FlushRegion(src_addr, copy_amount);
memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
break;
}
default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
const u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) {
const u8* src_ptr{page_pointer + page_offset};
std::memcpy(dest_buffer, src_ptr, copy_amount);
const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
} else {
std::memset(dest_buffer, 0, copy_amount);
}
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
u8* dest_ptr{page_table.pointers[page_index] + page_offset};
const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
// Invalidate must happen on the rasterizer interface, such that memory is always
// synchronous when it is written (even when in asynchronous GPU mode).
rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
std::memcpy(dest_ptr, src_buffer, copy_amount);
rasterizer.InvalidateRegion(dest_addr, copy_amount);
memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
break;
}
default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask};
auto& memory = system.Memory();
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
u8* page_pointer = page_table.pointers[page_index];
if (page_pointer) {
u8* dest_ptr{page_pointer + page_offset};
std::memcpy(dest_ptr, src_buffer, copy_amount);
const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
}
page_index++;
page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
// Flush must happen on the rasterizer interface, such that memory is always synchronous
// when it is copied (even when in asynchronous GPU mode).
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
WriteBlock(dest_addr, src_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
remaining_size -= copy_amount;
}
std::vector<u8> tmp_buffer(size);
ReadBlock(src_addr, tmp_buffer.data(), size);
WriteBlock(dest_addr, tmp_buffer.data(), size);
}
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
const std::size_t page = (addr & Memory::PAGE_MASK) + size;
return page <= Memory::PAGE_SIZE;
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
VAddr backing_addr) {
LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,

View File

@@ -97,6 +97,11 @@ public:
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer
*/
bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
private:
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
using VMAHandle = VMAMap::const_iterator;

View File

@@ -98,12 +98,12 @@ public:
static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(CacheAddr addr, std::size_t size) {
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size);
}
void FlushRegion(CacheAddr addr, std::size_t size) {
void FlushRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size);
}
@@ -117,14 +117,16 @@ public:
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr_opt);
VAddr cpu_addr = *cpu_addr_opt;
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
CachedQuery* query = TryGet(cpu_addr);
if (!query) {
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT_OR_EXECUTE(cpu_addr, return;);
ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
private:
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
const u64 addr_begin = static_cast<u64>(addr);
const u64 addr_end = addr_begin + static_cast<u64>(size);
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
const u64 cache_begin = query.GetCacheAddr();
const u64 cache_begin = query.GetCpuAddr();
const u64 cache_end = cache_begin + query.SizeInBytes();
return cache_begin < addr_end && addr_begin < cache_end;
};
@@ -193,7 +195,7 @@ private:
if (!in_range(query)) {
continue;
}
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
query.Flush();
}
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(CacheAddr addr) {
CachedQuery* TryGet(VAddr addr) {
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
}
auto& contents = it->second;
const auto found =
std::find_if(std::begin(contents), std::end(contents),
[addr](auto& query) { return query.GetCacheAddr() == addr; });
const auto found = std::find_if(std::begin(contents), std::end(contents),
[addr](auto& query) { return query.GetCpuAddr() == addr; });
return found != std::end(contents) ? &*found : nullptr;
}
@@ -323,14 +324,10 @@ public:
timestamp = timestamp_;
}
VAddr CpuAddr() const noexcept {
VAddr GetCpuAddr() const noexcept {
return cpu_addr;
}
CacheAddr GetCacheAddr() const noexcept {
return ToCacheAddr(host_ptr);
}
u64 SizeInBytes() const noexcept {
return SizeInBytes(timestamp.has_value());
}

View File

@@ -18,22 +18,14 @@
class RasterizerCacheObject {
public:
explicit RasterizerCacheObject(const u8* host_ptr)
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
virtual ~RasterizerCacheObject();
CacheAddr GetCacheAddr() const {
return cache_addr;
VAddr GetCpuAddr() const {
return cpu_addr;
}
const u8* GetHostPtr() const {
return host_ptr;
}
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
@@ -68,8 +60,7 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
};
template <class T>
@@ -80,7 +71,7 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) {
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
}
/// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) {
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
protected:
/// Tries to get an object from the cache with the specified cache address
T TryGet(CacheAddr addr) const {
T TryGet(VAddr addr) const {
const auto iter = map_cache.find(addr);
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
T TryGet(const void* addr) const {
const auto iter = map_cache.find(ToCacheAddr(addr));
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
/// Register an object into the cache
virtual void Register(const T& object) {
std::lock_guard lock{mutex};
object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetCacheAddr(), object});
map_cache.insert({object->GetCpuAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
}
@@ -144,7 +128,7 @@ protected:
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr();
const VAddr addr = object->GetCpuAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr);
}
@@ -173,7 +157,7 @@ protected:
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
if (size == 0) {
return {};
}
@@ -197,13 +181,13 @@ private:
}
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<CacheAddr, T>;
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetCacheAddr(),
object->GetCacheAddr() + object->GetSizeInBytes());
return ObjectInterval::right_open(object->GetCpuAddr(),
object->GetCpuAddr() + object->GetSizeInBytes());
}
ObjectCache map_cache;

View File

@@ -53,14 +53,14 @@ public:
virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0;

View File

@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
: VideoCommon::BufferBlock{cache_addr, size} {
CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
}
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
}
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size);
Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cpu_addr, size);
}
void OGLBufferCache::WriteBarrier() {

View File

@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
class CachedBufferBlock : public VideoCommon::BufferBlock {
public:
explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
~CachedBufferBlock();
const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
}
protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
void WriteBarrier() override;

View File

@@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
return bindings;
}
bool IsASTCSupported() {
static constexpr std::array formats = {
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
GLint supported;
glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
&supported);
return supported == GL_TRUE;
}) == formats.end();
}
} // Anonymous namespace
Device::Device() : base_bindings{BuildBaseBindings()} {
@@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();

View File

@@ -64,6 +64,10 @@ public:
return has_image_load_formatted;
}
bool HasASTC() const {
return has_astc;
}
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
@@ -97,6 +101,7 @@ private:
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};

View File

@@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
View depth_surface = texture_cache.GetDepthBufferSurface();
const auto& regs = gpu.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
FramebufferCacheKey key;
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < colors_count; ++index) {
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
View color_surface{texture_cache.GetColorBufferSurface(index)};
if (!color_surface) {
continue;
}
@@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
texture_cache.GuardRenderTargets(true);
View color_surface;
if (using_color_fb) {
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
const std::size_t index = regs.clear_buffers.RT;
color_surface = texture_cache.GetColorBufferSurface(index);
texture_cache.MarkColorBufferInUse(index);
}
View depth_surface;
if (using_depth_fb || using_stencil_fb) {
depth_surface = texture_cache.GetDepthBufferSurface(false);
depth_surface = texture_cache.GetDepthBufferSurface();
texture_cache.MarkDepthBufferInUse();
}
texture_cache.GuardRenderTargets(false);
@@ -653,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.FlushRegion(addr, size);
@@ -663,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
query_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.InvalidateRegion(addr, size);
@@ -674,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::values.use_accurate_gpu_emulation) {
FlushRegion(addr, size);
}
@@ -713,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto surface{
texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
if (!surface) {
return {};
}

View File

@@ -65,9 +65,9 @@ public:
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,

View File

@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
size_in_bytes{size_in_bytes}, program{std::move(program)} {}
CachedShader::~CachedShader() = default;
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
return std::shared_ptr<CachedShader>(new CachedShader(
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
return std::shared_ptr<CachedShader>(new CachedShader(
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program));
return std::shared_ptr<CachedShader>(
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
const auto host_ptr{memory_manager.GetPointer(address)};
Shader shader{TryGet(host_ptr)};
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
const auto host_ptr{memory_manager.GetPointer(address)};
// No shader found - create a new one
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
ProgramCode code_b;
@@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
@@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto host_ptr{memory_manager.GetPointer(code_addr)};
auto kernel = TryGet(host_ptr);
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (kernel) {
return kernel;
}
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {

View File

@@ -65,11 +65,6 @@ public:
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
return size_in_bytes;
@@ -90,13 +85,12 @@ public:
std::size_t size_in_bytes);
private:
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program;
};

View File

@@ -31,11 +31,11 @@ namespace {
using Tegra::Engines::ShaderType;
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
@@ -702,20 +702,19 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(AttributeUse attribute) {
const char* GetInputFlags(PixelImap attribute) {
switch (attribute) {
case AttributeUse::Perspective:
// Default, Smooth
return {};
case AttributeUse::Constant:
return "flat ";
case AttributeUse::ScreenLinear:
return "noperspective ";
default:
case AttributeUse::Unused:
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
return {};
case PixelImap::Perspective:
return "smooth";
case PixelImap::Constant:
return "flat";
case PixelImap::ScreenLinear:
return "noperspective";
case PixelImap::Unused:
break;
}
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
return {};
}
void DeclareInputAttributes() {
@@ -749,8 +748,8 @@ private:
std::string suffix;
if (stage == ShaderType::Fragment) {
const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
const auto input_mode{header.ps.GetPixelImap(location)};
if (input_mode == PixelImap::Unused) {
return;
}
suffix = GetInputFlags(input_mode);
@@ -927,7 +926,7 @@ private:
const u32 address{generic_base + index * generic_stride + element * element_stride};
const bool declared = stage != ShaderType::Fragment ||
header.ps.GetAttributeUse(index) != AttributeUse::Unused;
header.ps.GetPixelImap(index) != PixelImap::Unused;
const std::string value =
declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
code.AddLine("case 0x{:X}U: return {};", address, value);
@@ -1142,8 +1141,7 @@ private:
GetSwizzle(element)),
Type::Float};
case ShaderType::Fragment:
return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
Type::Float};
return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
default:
UNREACHABLE();
}

View File

@@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -37,102 +36,100 @@ namespace {
struct FormatTuple {
GLint internal_format;
GLenum format;
GLenum type;
bool compressed;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
{GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F
{GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F
{GL_R32F, GL_RED, GL_FLOAT, false}, // R32F
{GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U
{GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI
{GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16
{GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I
{GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
{GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
{GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
{GL_COMPRESSED_RED_RGTC1}, // DXN1
{GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
{GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
{GL_R32F, GL_RED, GL_FLOAT}, // R32F
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
// Compressed sRGB formats
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
}};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
return format;
return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
}
GLenum GetTextureTarget(const SurfaceTarget& target) {
@@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
} // Anonymous namespace
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
const auto& tuple{GetFormatTuple(params.pixel_format)};
internal_format = tuple.internal_format;
format = tuple.format;
type = tuple.type;
is_compressed = tuple.compressed;
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
bool is_astc_supported)
: VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
if (is_converted) {
internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
format = GL_RGBA;
type = GL_UNSIGNED_BYTE;
} else {
const auto& tuple{GetFormatTuple(params.pixel_format)};
internal_format = tuple.internal_format;
format = tuple.format;
type = tuple.type;
is_compressed = params.IsCompressed();
}
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
@@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
if (params.IsBuffer()) {
glGetNamedBufferSubData(texture_buffer.handle, 0,
static_cast<GLsizeiptr>(params.GetHostSizeInBytes()),
static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
staging_buffer.data());
return;
}
@@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.emulated_levels; ++level) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
u8* const mip_data = staging_buffer.data() + mip_offset;
const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
if (is_compressed) {
@@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
}
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
auto compression_type = params.GetCompressionType();
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
? params.GetConvertedMipmapOffset(level)
: params.GetHostMipmapLevelOffset(level);
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
const u8* buffer{staging_buffer.data() + mip_offset};
if (is_compressed) {
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
@@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
const Device& device, StateTracker& state_tracker)
: TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
: TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
@@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
TextureCacheOpenGL::~TextureCacheOpenGL() = default;
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
return std::make_shared<CachedSurface>(gpu_addr, params);
return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
}
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
if (source_format.compressed) {
if (src_surface->IsCompressed()) {
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
nullptr);
} else {
@@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
const GLsizei width = static_cast<GLsizei>(dst_params.width);
const GLsizei height = static_cast<GLsizei>(dst_params.height);
const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
if (dest_format.compressed) {
if (dst_surface->IsCompressed()) {
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
UNREACHABLE();
} else {

View File

@@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
~CachedSurface();
void UploadTexture(const std::vector<u8>& staging_buffer) override;
@@ -51,6 +51,10 @@ public:
return texture.handle;
}
bool IsCompressed() const {
return is_compressed;
}
protected:
void DecorateSurfaceName() override;

View File

@@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>;
using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
using UniqueImage = UniqueHandle<vk::Image>;
using UniqueImageView = UniqueHandle<vk::ImageView>;
using UniqueInstance = UniqueHandle<vk::Instance>;
using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
using UniquePipeline = UniqueHandle<vk::Pipeline>;
@@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>;
using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>;
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;

View File

@@ -2,13 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include <fmt/format.h>
#include "common/assert.h"
#include "common/dynamic_library.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -30,15 +35,30 @@
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size
#ifdef _WIN32
#include <windows.h>
// ensure include order
#include <vulkan/vulkan_win32.h>
#endif
#ifdef __linux__
#include <X11/Xlib.h>
#include <vulkan/vulkan_wayland.h>
#include <vulkan/vulkan_xlib.h>
#endif
namespace Vulkan {
namespace {
using Core::Frontend::WindowSystemType;
VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* data,
[[maybe_unused]] void* user_data) {
const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)};
const char* message{data->pMessage};
if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
@@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
return VK_FALSE;
}
Common::DynamicLibrary OpenVulkanLibrary() {
Common::DynamicLibrary library;
#ifdef __APPLE__
// Check if a path to a specific Vulkan library has been specified.
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
library.Open(filename.c_str());
}
#endif
return library;
}
UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld,
WindowSystemType window_type = WindowSystemType::Headless,
bool enable_layers = false) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
return UniqueInstance{};
}
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) {
LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
return UniqueInstance{};
}
dld.init(vkGetInstanceProcAddr);
std::vector<const char*> extensions;
extensions.reserve(4);
switch (window_type) {
case Core::Frontend::WindowSystemType::Headless:
break;
#ifdef _WIN32
case Core::Frontend::WindowSystemType::Windows:
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
break;
#endif
#ifdef __linux__
case Core::Frontend::WindowSystemType::X11:
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
break;
case Core::Frontend::WindowSystemType::Wayland:
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
break;
#endif
default:
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
break;
}
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_layers) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
u32 num_properties;
if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) !=
vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties");
return UniqueInstance{};
}
std::vector<vk::ExtensionProperties> properties(num_properties);
if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(),
dld) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
return UniqueInstance{};
}
for (const char* extension : extensions) {
const auto it =
std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) {
return !std::strcmp(extension, prop.extensionName);
});
if (it == properties.end()) {
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
return UniqueInstance{};
}
}
const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
"yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
VK_API_VERSION_1_1);
const std::array layers = {"VK_LAYER_LUNARG_standard_validation"};
const vk::InstanceCreateInfo instance_ci(
{}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(),
static_cast<u32>(extensions.size()), extensions.data());
vk::Instance unsafe_instance;
if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
return UniqueInstance{};
}
dld.init(unsafe_instance);
return UniqueInstance(unsafe_instance, {nullptr, dld});
}
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
@@ -147,27 +271,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
}
bool RendererVulkan::Init() {
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
std::optional<vk::DebugUtilsMessengerEXT> callback;
if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
callback = CreateDebugCallback(dldi);
if (!callback) {
return false;
}
}
if (!PickDevices(dldi)) {
if (callback) {
instance.destroy(*callback, nullptr, dldi);
}
library = OpenVulkanLibrary();
instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
Settings::values.renderer_debug);
if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
return false;
}
debug_callback = UniqueDebugUtilsMessengerEXT(
*callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
instance, nullptr, device->GetDispatchLoader()));
Report();
@@ -176,7 +285,7 @@ bool RendererVulkan::Init() {
resource_manager = std::make_unique<VKResourceManager>(*device);
const auto& framebuffer = render_window.GetFramebufferLayout();
swapchain = std::make_unique<VKSwapchain>(surface, *device);
swapchain = std::make_unique<VKSwapchain>(*surface, *device);
swapchain->Create(framebuffer.width, framebuffer.height, false);
state_tracker = std::make_unique<StateTracker>(system);
@@ -213,8 +322,10 @@ void RendererVulkan::ShutDown() {
device.reset();
}
std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
const vk::DispatchLoaderDynamic& dldi) {
bool RendererVulkan::CreateDebugCallback() {
if (!Settings::values.renderer_debug) {
return true;
}
const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
{},
vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
@@ -225,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
&DebugCallback, nullptr);
vk::DebugUtilsMessengerEXT callback;
if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
vk::DebugUtilsMessengerEXT unsafe_callback;
if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) !=
vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
return {};
return false;
}
return callback;
debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld});
return true;
}
bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
const auto devices = instance.enumeratePhysicalDevices(dldi);
bool RendererVulkan::CreateSurface() {
[[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
VkSurfaceKHR unsafe_surface = nullptr;
#ifdef _WIN32
if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
const HWND hWnd = static_cast<HWND>(window_info.render_surface);
const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
nullptr, 0, nullptr, hWnd};
const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr,
&unsafe_surface) != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
return false;
}
}
#endif
#ifdef __linux__
if (window_info.type == Core::Frontend::WindowSystemType::X11) {
const VkXlibSurfaceCreateInfoKHR xlib_ci{
VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
static_cast<Display*>(window_info.display_connection),
reinterpret_cast<Window>(window_info.render_surface)};
const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr,
&unsafe_surface) != VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
return false;
}
}
if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
const VkWaylandSurfaceCreateInfoKHR wayland_ci{
VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
static_cast<wl_display*>(window_info.display_connection),
static_cast<wl_surface*>(window_info.render_surface)};
const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
if (!vkCreateWaylandSurfaceKHR ||
vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) !=
VK_SUCCESS) {
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
return false;
}
}
#endif
if (!unsafe_surface) {
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
return false;
}
surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld});
return true;
}
bool RendererVulkan::PickDevices() {
const auto devices = instance->enumeratePhysicalDevices(dld);
// TODO(Rodrigo): Choose device from config file
const s32 device_index = Settings::values.vulkan_device;
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
return false;
}
const vk::PhysicalDevice physical_device = devices[device_index];
const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)];
if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
if (!VKDevice::IsSuitable(physical_device, *surface, dld)) {
return false;
}
device = std::make_unique<VKDevice>(dldi, physical_device, surface);
return device->Create(dldi, instance);
device = std::make_unique<VKDevice>(dld, physical_device, *surface);
return device->Create(*instance);
}
void RendererVulkan::Report() const {
@@ -276,4 +443,33 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
std::vector<std::string> RendererVulkan::EnumerateDevices() {
// Avoid putting DispatchLoaderDynamic, it's too large
auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>();
auto& dld = *dld_memory;
Common::DynamicLibrary library = OpenVulkanLibrary();
UniqueInstance instance = CreateInstance(library, dld);
if (!instance) {
return {};
}
u32 num_devices;
if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) {
return {};
}
std::vector<vk::PhysicalDevice> devices(num_devices);
if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) !=
vk::Result::eSuccess) {
return {};
}
std::vector<std::string> names;
names.reserve(num_devices);
for (auto& device : devices) {
names.push_back(device.getProperties(dld).deviceName);
}
return names;
}
} // namespace Vulkan

View File

@@ -6,8 +6,11 @@
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -44,18 +47,24 @@ public:
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
bool TryPresent(int timeout_ms) override;
private:
std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
const vk::DispatchLoaderDynamic& dldi);
static std::vector<std::string> EnumerateDevices();
bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
private:
bool CreateDebugCallback();
bool CreateSurface();
bool PickDevices();
void Report() const;
Core::System& system;
vk::Instance instance;
vk::SurfaceKHR surface;
Common::DynamicLibrary library;
vk::DispatchLoaderDynamic dld;
UniqueInstance instance;
UniqueSurfaceKHR surface;
VKScreenInfo screen_info;

View File

@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
} // Anonymous namespace
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size)
: VideoCommon::BufferBlock{cache_addr, size} {
VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst,
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
VKBufferCache::~VKBufferCache() = default;
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
}
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {

View File

@@ -30,7 +30,7 @@ class VKScheduler;
class CachedBufferBlock final : public VideoCommon::BufferBlock {
public:
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
CacheAddr cache_addr, std::size_t size);
VAddr cpu_addr, std::size_t size);
~CachedBufferBlock();
const vk::Buffer* GetHandle() const {
@@ -55,7 +55,7 @@ public:
protected:
void WriteBarrier() override {}
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
const vk::Buffer* ToHandle(const Buffer& buffer) override;

View File

@@ -10,6 +10,7 @@
#include <string_view>
#include <thread>
#include <vector>
#include "common/assert.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) {
}
template <typename T>
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
vk::PhysicalDeviceFeatures2 features;
T extension_features;
features.pNext = &extension_features;
physical.getFeatures2(&features, dldi);
physical.getFeatures2(&features, dld);
return extension_features;
}
template <typename T>
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
vk::PhysicalDeviceProperties2 properties;
T extension_properties;
properties.pNext = &extension_properties;
physical.getProperties2(&properties, dldi);
physical.getProperties2(&properties, dld);
return extension_properties;
}
@@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format
} // Anonymous namespace
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
vk::SurfaceKHR surface)
: physical{physical}, properties{physical.getProperties(dldi)},
format_properties{GetFormatProperties(dldi, physical)} {
SetupFamilies(dldi, surface);
SetupFeatures(dldi);
: dld{dld}, physical{physical}, properties{physical.getProperties(dld)},
format_properties{GetFormatProperties(dld, physical)} {
SetupFamilies(surface);
SetupFeatures();
}
VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
bool VKDevice::Create(vk::Instance instance) {
const auto queue_cis = GetDeviceQueueCreateInfos();
const std::vector extensions = LoadExtensions(dldi);
const std::vector extensions = LoadExtensions();
vk::PhysicalDeviceFeatures2 features2;
void** next = &features2.pNext;
@@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
nullptr);
device_ci.pNext = &features2;
vk::Device dummy_logical;
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
vk::Device unsafe_logical;
if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
return false;
}
dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical);
logical = UniqueDevice(unsafe_logical, {nullptr, dld});
CollectTelemetryParameters();
@@ -235,20 +234,23 @@ void VKDevice::ReportLoss() const {
// *(VKGraphicsPipeline*)data[0]
}
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const {
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const {
// Disable for now to avoid converting ASTC twice.
return false;
static constexpr std::array astc_formats = {
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock,
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock,
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock,
vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock,
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock};
if (!features.textureCompressionASTC_LDR) {
return false;
}
@@ -257,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
for (const auto format : astc_formats) {
const auto format_properties{physical.getFormatProperties(format, dldi)};
const auto format_properties{physical.getFormatProperties(format, dld)};
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
return false;
}
@@ -276,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
return (supported_usage & wanted_usage) == wanted_usage;
}
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
bool is_suitable = true;
constexpr std::array required_extensions = {
bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
const vk::DispatchLoaderDynamic& dld) {
static constexpr std::array required_extensions = {
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
@@ -290,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
};
bool is_suitable = true;
std::bitset<required_extensions.size()> available_extensions{};
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
for (std::size_t i = 0; i < required_extensions.size(); ++i) {
if (available_extensions[i]) {
continue;
@@ -312,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
}
bool has_graphics{}, has_present{};
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
const auto& family = queue_family_properties[i];
if (family.queueCount == 0) {
@@ -320,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
}
has_graphics |=
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0;
}
if (!has_graphics || !has_present) {
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
@@ -328,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
}
// TODO(Rodrigo): Check if the device matches all requeriments.
const auto properties{physical.getProperties(dldi)};
const auto properties{physical.getProperties(dld)};
const auto& limits{properties.limits};
constexpr u32 required_ubo_size = 65536;
@@ -345,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
is_suitable = false;
}
const auto features{physical.getFeatures(dldi)};
const auto features{physical.getFeatures(dld)};
const std::array feature_report = {
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
std::make_pair(features.independentBlend, "independentBlend"),
@@ -377,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> VKDevice::LoadExtensions() {
std::vector<const char*> extensions;
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
@@ -408,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
@@ -430,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld);
const auto properties =
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld);
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
@@ -453,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
if (has_ext_transform_feedback) {
const auto features =
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld);
const auto properties =
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld);
if (features.transformFeedback && features.geometryStreams &&
properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
@@ -468,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
return extensions;
}
void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
void VKDevice::SetupFamilies(vk::SurfaceKHR surface) {
std::optional<u32> graphics_family_, present_family_;
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
if (graphics_family_ && present_family_)
break;
@@ -480,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
if (queue_family.queueCount == 0)
continue;
if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) {
graphics_family_ = i;
if (physical.getSurfaceSupportKHR(i, surface, dldi))
}
if (physical.getSurfaceSupportKHR(i, surface, dld)) {
present_family_ = i;
}
}
ASSERT(graphics_family_ && present_family_);
@@ -491,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
present_family = *present_family_;
}
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
void VKDevice::SetupFeatures() {
const auto supported_features{physical.getFeatures(dld)};
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
}
void VKDevice::CollectTelemetryParameters() {
@@ -522,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
}
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) {
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
vk::Format::eA8B8G8R8UintPack32,
vk::Format::eA8B8G8R8SnormPack32,
@@ -572,28 +575,38 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eBc2SrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4UnormBlock,
vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc8x5SrgbBlock,
vk::Format::eAstc5x4UnormBlock,
vk::Format::eAstc5x4SrgbBlock,
vk::Format::eAstc5x5UnormBlock,
vk::Format::eAstc5x5SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc6x6UnormBlock,
vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc10x10UnormBlock,
vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock,
vk::Format::eAstc12x12SrgbBlock,
vk::Format::eAstc8x6UnormBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc6x5UnormBlock,
vk::Format::eAstc6x5SrgbBlock,
vk::Format::eAstc6x6UnormBlock,
vk::Format::eAstc6x6SrgbBlock,
vk::Format::eAstc8x5UnormBlock,
vk::Format::eAstc8x5SrgbBlock,
vk::Format::eAstc8x6UnormBlock,
vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc8x8UnormBlock,
vk::Format::eAstc8x8SrgbBlock,
vk::Format::eAstc10x5UnormBlock,
vk::Format::eAstc10x5SrgbBlock,
vk::Format::eAstc10x6UnormBlock,
vk::Format::eAstc10x6SrgbBlock,
vk::Format::eAstc10x8UnormBlock,
vk::Format::eAstc10x8SrgbBlock,
vk::Format::eAstc10x10UnormBlock,
vk::Format::eAstc10x10SrgbBlock,
vk::Format::eAstc12x10UnormBlock,
vk::Format::eAstc12x10SrgbBlock,
vk::Format::eAstc12x12UnormBlock,
vk::Format::eAstc12x12SrgbBlock,
vk::Format::eE5B9G9R9UfloatPack32};
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
format_properties.emplace(format, physical.getFormatProperties(format, dld));
}
return format_properties;
}

View File

@@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32;
/// Handles data specific to a physical device.
class VKDevice final {
public:
explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
vk::SurfaceKHR surface);
~VKDevice();
/// Initializes the device. Returns true on success.
bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
bool Create(vk::Instance instance);
/**
* Returns a format supported by the device for the passed requeriments.
@@ -188,18 +188,18 @@ public:
}
/// Checks if the physical device is suitable.
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface);
static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
const vk::DispatchLoaderDynamic& dld);
private:
/// Loads extensions into a vector and stores available ones in this object.
std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
std::vector<const char*> LoadExtensions();
/// Sets up queue families.
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
void SetupFamilies(vk::SurfaceKHR surface);
/// Sets up device features.
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
void SetupFeatures();
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
@@ -208,8 +208,7 @@ private:
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
const vk::DispatchLoaderDynamic& dldi) const;
bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const;
/// Returns true if a format is supported.
bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
@@ -217,10 +216,10 @@ private:
/// Returns the device properties for Vulkan formats.
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical);
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.

View File

@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, registry},
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
u32 main_offset)
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)};
auto shader = TryGet(host_ptr);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one
constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset);
std::move(code), stage_offset);
Register(shader);
}
shaders[index] = std::move(shader);
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr);
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) {
// No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code),
program_addr, *cpu_addr, std::move(code),
kernel_main_offset);
Register(shader);
}
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
const auto shader = TryGet(host_ptr);
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
const auto shader = TryGet(*cpu_addr);
ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5

View File

@@ -113,17 +113,13 @@ namespace Vulkan {
class CachedShader final : public RasterizerCacheObject {
public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
~CachedShader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64);
}
@@ -149,7 +145,6 @@ private:
Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;

View File

@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
query_cache.FlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return false;
}
const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
if (!surface) {
return false;
}
@@ -594,7 +599,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -602,7 +607,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
zeta_attachment = texture_cache.GetDepthBufferSurface(true);
zeta_attachment = texture_cache.GetDepthBufferSurface();
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;

View File

@@ -118,9 +118,9 @@ public:
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void FlushAll() override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,

View File

@@ -35,7 +35,7 @@ namespace {
using Sirit::Id;
using Tegra::Engines::ShaderType;
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using namespace VideoCommon::Shader;
@@ -752,16 +752,16 @@ private:
if (stage != ShaderType::Fragment) {
continue;
}
switch (header.ps.GetAttributeUse(location)) {
case AttributeUse::Constant:
switch (header.ps.GetPixelImap(location)) {
case PixelImap::Constant:
Decorate(id, spv::Decoration::Flat);
break;
case AttributeUse::ScreenLinear:
Decorate(id, spv::Decoration::NoPerspective);
break;
case AttributeUse::Perspective:
case PixelImap::Perspective:
// Default
break;
case PixelImap::ScreenLinear:
Decorate(id, spv::Decoration::NoPerspective);
break;
default:
UNREACHABLE_MSG("Unused attribute being fetched");
}
@@ -1145,9 +1145,6 @@ private:
switch (attribute) {
case Attribute::Index::Position: {
if (stage == ShaderType::Fragment) {
if (element == 3) {
return {Constant(t_float, 1.0f), Type::Float};
}
return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
Type::Float};
}

View File

@@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode;
using Tegra::Texture::SwizzleSource;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::SurfaceTarget;
namespace {
@@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) {
return {};
}
UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
std::size_t host_memory_size) {
// TODO(Rodrigo): Move texture buffer creation to the buffer cache
const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
const vk::BufferCreateInfo buffer_ci({}, host_memory_size,
vk::BufferUsageFlagBits::eUniformTexelBuffer |
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst,
@@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
const SurfaceParams& params,
vk::Buffer buffer) {
vk::Buffer buffer,
std::size_t host_memory_size) {
ASSERT(params.IsBuffer());
const auto format =
MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size);
}
vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
GPUVAddr gpu_addr, const SurfaceParams& params)
: SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
staging_pool{staging_pool} {
: SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
device{device}, resource_manager{resource_manager},
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
if (params.IsBuffer()) {
buffer = CreateBuffer(device, params);
buffer = CreateBuffer(device, params, host_memory_size);
commit = memory_manager.Commit(*buffer, false);
const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
const auto buffer_view_ci =
GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
format = buffer_view_ci.format;
const auto dev = device.GetLogical();
@@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
size = host_memory_size](auto cmdbuf, auto& dld) {
const vk::BufferCopy copy(0, 0, size);
cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
@@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
const auto compression_type = params.GetCompressionType();
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
? params.GetConvertedMipmapOffset(level)
: params.GetHostMipmapLevelOffset(level);
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
return vk::BufferImageCopy(
mip_offset, 0, 0,
@@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,
VKStagingBufferPool& staging_pool)
: TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
: TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
staging_pool{staging_pool} {}
VKTextureCache::~VKTextureCache() = default;

View File

@@ -13,13 +13,247 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/textures/texture.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::PredCondition;
using Tegra::Shader::StoreType;
using Tegra::Texture::ComponentType;
using Tegra::Texture::TextureFormat;
using Tegra::Texture::TICEntry;
namespace {
ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
std::size_t component) {
const TextureFormat format{descriptor.format};
switch (format) {
case TextureFormat::R16_G16_B16_A16:
case TextureFormat::R32_G32_B32_A32:
case TextureFormat::R32_G32_B32:
case TextureFormat::R32_G32:
case TextureFormat::R16_G16:
case TextureFormat::R32:
case TextureFormat::R16:
case TextureFormat::R8:
case TextureFormat::R1:
if (component == 0) {
return descriptor.r_type;
}
if (component == 1) {
return descriptor.g_type;
}
if (component == 2) {
return descriptor.b_type;
}
if (component == 3) {
return descriptor.a_type;
}
break;
case TextureFormat::A8R8G8B8:
if (component == 0) {
return descriptor.a_type;
}
if (component == 1) {
return descriptor.r_type;
}
if (component == 2) {
return descriptor.g_type;
}
if (component == 3) {
return descriptor.b_type;
}
break;
case TextureFormat::A2B10G10R10:
case TextureFormat::A4B4G4R4:
case TextureFormat::A5B5G5R1:
case TextureFormat::A1B5G5R5:
if (component == 0) {
return descriptor.a_type;
}
if (component == 1) {
return descriptor.b_type;
}
if (component == 2) {
return descriptor.g_type;
}
if (component == 3) {
return descriptor.r_type;
}
break;
case TextureFormat::R32_B24G8:
if (component == 0) {
return descriptor.r_type;
}
if (component == 1) {
return descriptor.b_type;
}
if (component == 2) {
return descriptor.g_type;
}
break;
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
if (component == 0) {
return descriptor.b_type;
}
if (component == 1) {
return descriptor.g_type;
}
if (component == 2) {
return descriptor.r_type;
}
break;
case TextureFormat::G8R24:
case TextureFormat::G24R8:
case TextureFormat::G8R8:
case TextureFormat::G4R4:
if (component == 0) {
return descriptor.g_type;
}
if (component == 1) {
return descriptor.r_type;
}
break;
}
UNIMPLEMENTED_MSG("texture format not implement={}", format);
return ComponentType::FLOAT;
}
bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
constexpr std::array<u8, 16> mask = {
0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
(A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
return std::bitset<4>{mask.at(component_mask)}.test(component);
}
u32 GetComponentSize(TextureFormat format, std::size_t component) {
switch (format) {
case TextureFormat::R32_G32_B32_A32:
return 32;
case TextureFormat::R16_G16_B16_A16:
return 16;
case TextureFormat::R32_G32_B32:
return component <= 2 ? 32 : 0;
case TextureFormat::R32_G32:
return component <= 1 ? 32 : 0;
case TextureFormat::R16_G16:
return component <= 1 ? 16 : 0;
case TextureFormat::R32:
return component == 0 ? 32 : 0;
case TextureFormat::R16:
return component == 0 ? 16 : 0;
case TextureFormat::R8:
return component == 0 ? 8 : 0;
case TextureFormat::R1:
return component == 0 ? 1 : 0;
case TextureFormat::A8R8G8B8:
return 8;
case TextureFormat::A2B10G10R10:
return (component == 3 || component == 2 || component == 1) ? 10 : 2;
case TextureFormat::A4B4G4R4:
return 4;
case TextureFormat::A5B5G5R1:
return (component == 0 || component == 1 || component == 2) ? 5 : 1;
case TextureFormat::A1B5G5R5:
return (component == 1 || component == 2 || component == 3) ? 5 : 1;
case TextureFormat::R32_B24G8:
if (component == 0) {
return 32;
}
if (component == 1) {
return 24;
}
if (component == 2) {
return 8;
}
return 0;
case TextureFormat::B5G6R5:
if (component == 0 || component == 2) {
return 5;
}
if (component == 1) {
return 6;
}
return 0;
case TextureFormat::B6G5R5:
if (component == 1 || component == 2) {
return 5;
}
if (component == 0) {
return 6;
}
return 0;
case TextureFormat::G8R24:
if (component == 0) {
return 8;
}
if (component == 1) {
return 24;
}
return 0;
case TextureFormat::G24R8:
if (component == 0) {
return 8;
}
if (component == 1) {
return 24;
}
return 0;
case TextureFormat::G8R8:
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
default:
UNIMPLEMENTED_MSG("texture format not implement={}", format);
return 0;
}
}
std::size_t GetImageComponentMask(TextureFormat format) {
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
switch (format) {
case TextureFormat::R32_G32_B32_A32:
case TextureFormat::R16_G16_B16_A16:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A4B4G4R4:
case TextureFormat::A5B5G5R1:
case TextureFormat::A1B5G5R5:
return std::size_t{R | G | B | A};
case TextureFormat::R32_G32_B32:
case TextureFormat::R32_B24G8:
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
return std::size_t{R | G | B};
case TextureFormat::R32_G32:
case TextureFormat::R16_G16:
case TextureFormat::G8R24:
case TextureFormat::G24R8:
case TextureFormat::G8R8:
case TextureFormat::G4R4:
return std::size_t{R | G};
case TextureFormat::R32:
case TextureFormat::R16:
case TextureFormat::R8:
case TextureFormat::R1:
return std::size_t{R};
default:
UNIMPLEMENTED_MSG("texture format not implement={}", format);
return std::size_t{R | G | B | A};
}
}
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
}
} // Anonymous namespace
std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
Node original_value) {
switch (component_type) {
case ComponentType::SNORM: {
// range [-1.0, 1.0]
auto cnv_value = Operation(OperationCode::FMul, original_value,
Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
}
case ComponentType::SINT:
case ComponentType::UNORM: {
bool is_signed = component_type == ComponentType::SINT;
// range [0.0, 1.0]
auto cnv_value = Operation(OperationCode::FMul, original_value,
Immediate(static_cast<float>(1 << component_size) - 1.f));
return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
is_signed};
}
case ComponentType::UINT: // range [0, (1 << component_size) - 1]
return {std::move(original_value), false};
case ComponentType::FLOAT:
if (component_size == 16) {
return {Operation(OperationCode::HCastFloat, original_value), true};
} else {
return {std::move(original_value), true};
}
default:
UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
return {std::move(original_value), true};
}
}
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::SULD: {
UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
Tegra::Shader::OutOfBoundsStore::Ignore);
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
: GetBindlessImage(instr.gpr39, type)};
image.MarkRead();
u32 indexer = 0;
for (u32 element = 0; element < 4; ++element) {
if (!instr.suldst.IsComponentEnabled(element)) {
continue;
if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
u32 indexer = 0;
for (u32 element = 0; element < 4; ++element) {
if (!instr.suldst.IsComponentEnabled(element)) {
continue;
}
MetaImage meta{image, {}, element};
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
SetTemporary(bb, indexer++, std::move(value));
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
auto descriptor = [this, instr] {
std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
if (instr.suldst.is_immediate) {
descriptor =
registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
} else {
const Node image_register = GetRegister(instr.gpr39);
const auto [base_image, buffer, offset] = TrackCbuf(
image_register, global_code, static_cast<s64>(global_code.size()));
descriptor = registry.ObtainBindlessSampler(buffer, offset);
}
if (!descriptor) {
UNREACHABLE_MSG("Failed to obtain image descriptor");
}
return *descriptor;
}();
const auto comp_mask = GetImageComponentMask(descriptor.format);
switch (instr.suldst.GetStoreDataLayout()) {
case StoreType::Bits32:
case StoreType::Bits64: {
u32 indexer = 0;
u32 shifted_counter = 0;
Node value = Immediate(0);
for (u32 element = 0; element < 4; ++element) {
if (!IsComponentEnabled(comp_mask, element)) {
continue;
}
const auto component_type = GetComponentType(descriptor, element);
const auto component_size = GetComponentSize(descriptor.format, element);
MetaImage meta{image, {}, element};
auto [converted_value, is_signed] = GetComponentValue(
component_type, component_size,
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
// shift element to correct position
const auto shifted = shifted_counter;
if (shifted > 0) {
converted_value =
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
std::move(converted_value), Immediate(shifted));
}
shifted_counter += component_size;
// add value into result
value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
// if we shifted enough for 1 byte -> we save it into temp
if (shifted_counter >= 32) {
SetTemporary(bb, indexer++, std::move(value));
// reset counter and value to prepare pack next byte
value = Immediate(0);
shifted_counter = 0;
}
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}
default:
UNREACHABLE();
break;
}
MetaImage meta{image, {}, element};
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
SetTemporary(bb, indexer++, std::move(value));
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
}
break;
}

View File

@@ -11,12 +11,17 @@
namespace VideoCommon::Shader {
using std::move;
using Tegra::Shader::ConditionCode;
using Tegra::Shader::Instruction;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::OpCode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
using Tegra::Shader::SystemVariable;
using Index = Tegra::Shader::Attribute::Index;
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
bb.push_back(Operation(OperationCode::Discard));
break;
}
case OpCode::Id::MOV_SYS: {
case OpCode::Id::S2R: {
const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::LaneId:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
return Immediate(0U);
case SystemVariable::InvocationId:
return Operation(OperationCode::InvocationId);
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
return Immediate(0U);
case SystemVariable::WscaleFactorXY:
UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
return Immediate(0U);
case SystemVariable::WscaleFactorZ:
UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
return Immediate(0U);
case SystemVariable::Tid: {
Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::IPA: {
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
const auto attribute = instr.attribute.fmt28;
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
instr.ipa.sample_mode.Value()};
const Index index = attribute.index;
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
: GetInputAttribute(attribute.index, attribute.element);
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
index <= Tegra::Shader::Attribute::Index::Attribute_31;
if (is_generic || is_physical) {
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
// In theory by setting them as perspective, OpenGL does the perspective correction.
// A way must figured to reverse the last step of it.
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
: GetInputAttribute(index, attribute.element);
// Code taken from Ryujinx.
if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
Node position_w = GetInputAttribute(Index::Position, 3);
value = Operation(OperationCode::FMul, move(value), move(position_w));
}
}
value = GetSaturatedFloat(value, instr.ipa.saturate);
SetRegister(bb, instr.gpr0, value);
if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
}
value = GetSaturatedFloat(move(value), instr.ipa.saturate);
SetRegister(bb, instr.gpr0, move(value));
break;
}
case OpCode::Id::OUT_R: {

View File

@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
switch (cc) {
case Tegra::Shader::ConditionCode::NEU:
return GetInternalFlag(InternalFlag::Zero, true);
case Tegra::Shader::ConditionCode::FCSM_TR:
UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
default:
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
return MakeNode<PredicateNode>(Pred::NeverExecute, false);

View File

@@ -312,6 +312,10 @@ private:
/// Conditionally saturates a half float pair
Node GetSaturatedHalfFloat(Node value, bool saturate = true);
/// Get image component value by type and size
std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
u32 component_size, Node original_value);
/// Returns a predicate comparing two floats
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
/// Returns a predicate comparing two integers

View File

@@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
return GetFormatBpp(pixel_format) / CHAR_BIT;
}
enum class SurfaceCompression {
None, // Not compressed
Compressed, // Texture is compressed
Converted, // Texture is converted before upload or after download
Rearranged, // Texture is swizzled before upload or after download
};
constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
SurfaceCompression::None, // ABGR8U
SurfaceCompression::None, // ABGR8S
SurfaceCompression::None, // ABGR8UI
SurfaceCompression::None, // B5G6R5U
SurfaceCompression::None, // A2B10G10R10U
SurfaceCompression::None, // A1B5G5R5U
SurfaceCompression::None, // R8U
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
SurfaceCompression::None, // RGBA16S
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI
SurfaceCompression::Compressed, // DXT1
SurfaceCompression::Compressed, // DXT23
SurfaceCompression::Compressed, // DXT45
SurfaceCompression::Compressed, // DXN1
SurfaceCompression::Compressed, // DXN2UNORM
SurfaceCompression::Compressed, // DXN2SNORM
SurfaceCompression::Compressed, // BC7U
SurfaceCompression::Compressed, // BC6H_UF16
SurfaceCompression::Compressed, // BC6H_SF16
SurfaceCompression::Converted, // ASTC_2D_4X4
SurfaceCompression::None, // BGRA8
SurfaceCompression::None, // RGBA32F
SurfaceCompression::None, // RG32F
SurfaceCompression::None, // R32F
SurfaceCompression::None, // R16F
SurfaceCompression::None, // R16U
SurfaceCompression::None, // R16S
SurfaceCompression::None, // R16UI
SurfaceCompression::None, // R16I
SurfaceCompression::None, // RG16
SurfaceCompression::None, // RG16F
SurfaceCompression::None, // RG16UI
SurfaceCompression::None, // RG16I
SurfaceCompression::None, // RG16S
SurfaceCompression::None, // RGB32F
SurfaceCompression::None, // RGBA8_SRGB
SurfaceCompression::None, // RG8U
SurfaceCompression::None, // RG8S
SurfaceCompression::None, // RG32UI
SurfaceCompression::None, // RGBX16F
SurfaceCompression::None, // R32UI
SurfaceCompression::None, // R32I
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5
SurfaceCompression::Converted, // ASTC_2D_5X4
SurfaceCompression::None, // BGRA8_SRGB
SurfaceCompression::Compressed, // DXT1_SRGB
SurfaceCompression::Compressed, // DXT23_SRGB
SurfaceCompression::Compressed, // DXT45_SRGB
SurfaceCompression::Compressed, // BC7U_SRGB
SurfaceCompression::None, // R4G4B4A4U
SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
SurfaceCompression::Converted, // ASTC_2D_5X5
SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
SurfaceCompression::Converted, // ASTC_2D_10X8
SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
SurfaceCompression::Converted, // ASTC_2D_6X6
SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
SurfaceCompression::Converted, // ASTC_2D_10X10
SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
SurfaceCompression::Converted, // ASTC_2D_12X12
SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
SurfaceCompression::Converted, // ASTC_2D_8X6
SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
SurfaceCompression::Converted, // ASTC_2D_6X5
SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
SurfaceCompression::None, // E5B9G9R9F
SurfaceCompression::None, // Z32F
SurfaceCompression::None, // Z16
SurfaceCompression::None, // Z24S8
SurfaceCompression::Rearranged, // S8Z24
SurfaceCompression::None, // Z32FS8
}};
constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
if (format == PixelFormat::Invalid) {
return SurfaceCompression::None;
}
DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
return compression_type_table[static_cast<std::size_t>(format)];
}
SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
bool SurfaceTargetIsLayered(SurfaceTarget target);

View File

@@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192,
using Tegra::Texture::ConvertFromGuestToHost;
using VideoCore::MortonSwizzleMode;
using VideoCore::Surface::SurfaceCompression;
using VideoCore::Surface::IsPixelFormatASTC;
using VideoCore::Surface::PixelFormat;
StagingCache::StagingCache() = default;
StagingCache::~StagingCache() = default;
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
: params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
bool is_astc_supported)
: params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
mipmap_offsets(params.num_levels) {
is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
host_memory_size = params.GetHostSizeInBytes(is_converted);
std::size_t offset = 0;
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
@@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
std::size_t guest_offset{mipmap_offsets[level]};
if (params.is_layered) {
std::size_t host_offset{0};
std::size_t host_offset = 0;
const std::size_t guest_stride = layer_size;
const std::size_t host_stride = params.GetHostLayerSize(level);
for (u32 layer = 0; layer < params.depth; ++layer) {
@@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
MICROPROFILE_SCOPE(GPU_Load_Texture);
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
params.block_width, static_cast<u32>(params.target));
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
@@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
const u32 height{(params.height + block_height - 1) / block_height};
const u32 copy_size{width * bpp};
if (params.pitch == copy_size) {
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
} else {
const u8* start{host_ptr};
u8* write_to{staging_buffer.data()};
@@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
}
}
auto compression_type = params.GetCompressionType();
if (compression_type == SurfaceCompression::None ||
compression_type == SurfaceCompression::Compressed)
if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
return;
}
for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
const u32 level = level_up - 1;
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
? in_host_offset
: params.GetConvertedMipmapOffset(level);
u8* in_buffer = staging_buffer.data() + in_host_offset;
u8* out_buffer = staging_buffer.data() + out_host_offset;
for (u32 level = params.num_levels; level--;) {
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
u8* const in_buffer = staging_buffer.data() + in_host_offset;
u8* const out_buffer = staging_buffer.data() + out_host_offset;
ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
params.GetMipWidth(level), params.GetMipHeight(level),
params.GetMipDepth(level), true, true);
@@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
auto& staging_buffer = staging_cache.GetBuffer(0);
u8* host_ptr;
// Handle continuouty
if (is_continuous) {
// Use physical memory directly
host_ptr = memory_manager.GetPointer(gpu_addr);
if (!host_ptr) {
return;
}
} else {
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
}
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
@@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
}
}
}
if (!is_continuous) {
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
} // namespace VideoCommon

View File

@@ -68,8 +68,8 @@ public:
return gpu_addr;
}
bool Overlaps(const CacheAddr start, const CacheAddr end) const {
return (cache_addr < end) && (cache_addr_end > start);
bool Overlaps(const VAddr start, const VAddr end) const {
return (cpu_addr < end) && (cpu_addr_end > start);
}
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
return cpu_addr;
}
VAddr GetCpuAddrEnd() const {
return cpu_addr_end;
}
void SetCpuAddr(const VAddr new_addr) {
cpu_addr = new_addr;
}
CacheAddr GetCacheAddr() const {
return cache_addr;
}
CacheAddr GetCacheAddrEnd() const {
return cache_addr_end;
}
void SetCacheAddr(const CacheAddr new_addr) {
cache_addr = new_addr;
cache_addr_end = new_addr + guest_memory_size;
cpu_addr_end = new_addr + guest_memory_size;
}
const SurfaceParams& GetSurfaceParams() const {
@@ -119,18 +111,14 @@ public:
return mipmap_sizes[level];
}
void MarkAsContinuous(const bool is_continuous) {
this->is_continuous = is_continuous;
}
bool IsContinuous() const {
return is_continuous;
}
bool IsLinear() const {
return !params.is_tiled;
}
bool IsConverted() const {
return is_converted;
}
bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
return params.pixel_format == pixel_format;
}
@@ -160,7 +148,8 @@ public:
}
protected:
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
bool is_astc_supported);
~SurfaceBaseImpl() = default;
virtual void DecorateSurfaceName() = 0;
@@ -168,12 +157,11 @@ protected:
const SurfaceParams params;
std::size_t layer_size;
std::size_t guest_memory_size;
const std::size_t host_memory_size;
std::size_t host_memory_size;
GPUVAddr gpu_addr{};
CacheAddr cache_addr{};
CacheAddr cache_addr_end{};
VAddr cpu_addr{};
bool is_continuous{};
VAddr cpu_addr_end{};
bool is_converted{};
std::vector<std::size_t> mipmap_sizes;
std::vector<std::size_t> mipmap_offsets;
@@ -288,8 +276,9 @@ public:
}
protected:
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
: SurfaceBaseImpl(gpu_addr, params) {}
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
bool is_astc_supported)
: SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
~SurfaceBase() = default;

View File

@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
params.height = tic.Height();
params.depth = tic.Depth();
params.pitch = params.is_tiled ? 0 : tic.Pitch();
if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
params.depth = 1;
} else if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = tic.max_mip_level + 1;
@@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
return offset;
}
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
}
return offset;
}
std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
std::size_t offset = 0;
for (u32 i = 0; i < level; i++) {
offset += GetConvertedMipmapSize(i);
if (is_converted) {
for (u32 i = 0; i < level; ++i) {
offset += GetConvertedMipmapSize(i) * GetNumLayers();
}
} else {
for (u32 i = 0; i < level; ++i) {
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
}
}
return offset;
}
std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
constexpr std::size_t rgba8_bpp = 4ULL;
const std::size_t width_t = GetMipWidth(level);
const std::size_t height_t = GetMipHeight(level);
const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
return width_t * height_t * depth_t * rgba8_bpp;
const std::size_t mip_width = GetMipWidth(level);
const std::size_t mip_height = GetMipHeight(level);
const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
return mip_width * mip_height * mip_depth * rgba8_bpp;
}
std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {

View File

@@ -20,8 +20,6 @@ namespace VideoCommon {
class FormatLookupTable;
using VideoCore::Surface::SurfaceCompression;
class SurfaceParams {
public:
/// Creates SurfaceCachedParams from a texture configuration.
@@ -67,16 +65,14 @@ public:
return GetInnerMemorySize(false, false, false);
}
std::size_t GetHostSizeInBytes() const {
std::size_t host_size_in_bytes;
if (GetCompressionType() == SurfaceCompression::Converted) {
// ASTC is uncompressed in software, in emulated as RGBA8
host_size_in_bytes = 0;
for (u32 level = 0; level < num_levels; ++level) {
host_size_in_bytes += GetConvertedMipmapSize(level);
}
} else {
host_size_in_bytes = GetInnerMemorySize(true, false, false);
std::size_t GetHostSizeInBytes(bool is_converted) const {
if (!is_converted) {
return GetInnerMemorySize(true, false, false);
}
// ASTC is uncompressed in software, in emulated as RGBA8
std::size_t host_size_in_bytes = 0;
for (u32 level = 0; level < num_levels; ++level) {
host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
}
return host_size_in_bytes;
}
@@ -107,9 +103,8 @@ public:
u32 GetMipBlockDepth(u32 level) const;
/// Returns the best possible row/pitch alignment for the surface.
u32 GetRowAlignment(u32 level) const {
const u32 bpp =
GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
u32 GetRowAlignment(u32 level, bool is_converted) const {
const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
}
@@ -117,11 +112,7 @@ public:
std::size_t GetGuestMipmapLevelOffset(u32 level) const;
/// Returns the offset in bytes in host memory (linear) of a given mipmap level.
std::size_t GetHostMipmapLevelOffset(u32 level) const;
/// Returns the offset in bytes in host memory (linear) of a given mipmap level
/// for a texture that is converted in host gpu.
std::size_t GetConvertedMipmapOffset(u32 level) const;
std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
/// Returns the size in bytes in guest memory of a given mipmap level.
std::size_t GetGuestMipmapSize(u32 level) const {
@@ -196,11 +187,6 @@ public:
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
}
/// Returns how the compression should be handled for this texture.
SurfaceCompression GetCompressionType() const {
return VideoCore::Surface::GetFormatCompressionType(pixel_format);
}
/// Returns is the surface is a TextureBuffer type of surface.
bool IsBuffer() const {
return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;

View File

@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
using IntervalType = typename IntervalMap::interval_type;
public:
void InvalidateRegion(CacheAddr addr, std::size_t size) {
void InvalidateRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
guard_samplers = new_guard;
}
void FlushRegion(CacheAddr addr, std::size_t size) {
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -110,7 +108,7 @@ public:
}
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
@@ -124,13 +122,13 @@ public:
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
if (guard_samplers) {
sampled_textures.push_back(surface);
}
@@ -145,7 +143,7 @@ public:
return any_rt;
}
TView GetDepthBufferSurface(bool preserve_contents) {
TView GetDepthBufferSurface() {
std::lock_guard lock{mutex};
auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -159,14 +157,14 @@ public:
SetEmptyDepthBuffer();
return {};
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyDepthBuffer();
return {};
}
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
depth_buffer.target = surface_view.first;
@@ -176,7 +174,7 @@ public:
return surface_view.second;
}
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
TView GetColorBufferSurface(std::size_t index) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
auto& maxwell3d = system.GPU().Maxwell3D();
@@ -199,16 +197,15 @@ public:
return {};
}
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view =
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents, true);
auto surface_view = GetSurface(gpu_addr, *cpu_addr,
SurfaceParams::CreateForFramebuffer(system, index), true);
if (render_targets[index].target)
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
render_targets[index].target = surface_view.first;
@@ -257,27 +254,26 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
const std::optional<VAddr> dst_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
const std::optional<VAddr> src_cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
std::pair<TSurface, TView> dst_surface =
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
std::pair<TSurface, TView> src_surface =
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
ImageBlit(src_surface.second, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
if (!cache_addr) {
TSurface TryFindFramebufferSurface(VAddr addr) {
if (!addr) {
return nullptr;
}
const CacheAddr page = cache_addr >> registry_page_bits;
const VAddr page = addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page];
for (auto& surface : list) {
if (surface->GetCacheAddr() == cache_addr) {
if (surface->GetCpuAddr() == addr) {
return surface;
}
}
@@ -289,8 +285,9 @@ public:
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
bool is_astc_supported)
: system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
@@ -337,18 +334,14 @@ protected:
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
const std::size_t size = surface->GetSizeInBytes();
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_ptr || !cpu_addr) {
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
return;
}
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
surface->MarkAsContinuous(continuous);
surface->SetCacheAddr(cache_ptr);
surface->SetCpuAddr(*cpu_addr);
RegisterInnerCache(surface);
surface->MarkAsRegistered(true);
@@ -381,6 +374,7 @@ protected:
}
Core::System& system;
const bool is_astc_supported;
private:
enum class RecycleStrategy : u32 {
@@ -456,22 +450,18 @@ private:
* @param overlaps The overlapping surfaces registered in the cache.
* @param params The parameters for the new surface.
* @param gpu_addr The starting address of the new surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or left
* blank.
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr,
const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
for (auto& surface : overlaps) {
Unregister(surface);
}
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
case RecycleStrategy::Ignore: {
return InitializeSurface(gpu_addr, params, do_load);
return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
}
case RecycleStrategy::Flush: {
std::sort(overlaps.begin(), overlaps.end(),
@@ -481,7 +471,7 @@ private:
for (auto& surface : overlaps) {
FlushSurface(surface);
}
return InitializeSurface(gpu_addr, params, preserve_contents);
return InitializeSurface(gpu_addr, params);
}
case RecycleStrategy::BufferCopy: {
auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -490,7 +480,7 @@ private:
}
default: {
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
return InitializeSurface(gpu_addr, params, do_load);
return InitializeSurface(gpu_addr, params);
}
}
}
@@ -626,14 +616,11 @@ private:
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
* @param cache_addr The starting address of the new surface on physical memory.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
const SurfaceParams& params,
const GPUVAddr gpu_addr,
const CacheAddr cache_addr,
bool preserve_contents) {
const VAddr cpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
bool failed = false;
if (params.num_levels > 1) {
@@ -657,7 +644,7 @@ private:
failed = true;
break;
}
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
modified |= surface->IsModified();
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -677,23 +664,23 @@ private:
} else {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
if (Settings::values.use_accurate_gpu_emulation) {
return std::nullopt;
}
Unregister(surface);
return InitializeSurface(gpu_addr, params, preserve_contents);
return InitializeSurface(gpu_addr, params);
}
return std::nullopt;
}
if (surface->GetCacheAddr() != cache_addr) {
if (surface->GetCpuAddr() != cpu_addr) {
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
return {{surface, surface->GetMainView()}};
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
return InitializeSurface(gpu_addr, params);
}
}
@@ -716,23 +703,19 @@ private:
*
* @param gpu_addr The starting address of the candidate surface.
* @param params The parameters on the candidate surface.
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
* @param is_render Whether or not the surface is a render target.
**/
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
const SurfaceParams& params, bool preserve_contents,
bool is_render) {
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
const SurfaceParams& params, bool is_render) {
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
return RecycleSurface(overlaps, params, gpu_addr, topological_result);
}
const auto struct_result = current_surface->MatchesStructure(params);
@@ -753,11 +736,11 @@ private:
// Step 2
// Obtain all possible overlaps in the memory region
const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
// If none are found, we are done. we just load the surface and create it.
if (overlaps.empty()) {
return InitializeSurface(gpu_addr, params, preserve_contents);
return InitializeSurface(gpu_addr, params);
}
// Step 3
@@ -767,15 +750,13 @@ private:
for (const auto& surface : overlaps) {
const auto topological_result = surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
return RecycleSurface(overlaps, params, gpu_addr, topological_result);
}
}
// Check if it's a 3D texture
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
if (surface) {
return *surface;
}
@@ -795,8 +776,7 @@ private:
return *view;
}
}
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
}
// Now we check if the candidate is a mipmap/layer of the overlap
std::optional<TView> view =
@@ -820,7 +800,7 @@ private:
pair.first->EmplaceView(params, gpu_addr, candidate_size);
if (mirage_view)
return {pair.first, *mirage_view};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
return RecycleSurface(overlaps, params, gpu_addr,
MatchTopologyResult::FullMatch);
}
return {current_surface, *view};
@@ -836,8 +816,7 @@ private:
}
}
// We failed all the tests, recycle the overlaps into a new texture.
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
}
/**
@@ -850,16 +829,16 @@ private:
* @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
const std::optional<VAddr> cpu_addr =
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
if (!cache_addr) {
if (!cpu_addr) {
Deduction result{};
result.type = DeductionType::DeductionFailed;
return result;
}
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
@@ -878,7 +857,7 @@ private:
}
const std::size_t candidate_size = params.GetGuestSizeInBytes();
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
if (overlaps.empty()) {
Deduction result{};
@@ -995,10 +974,10 @@ private:
}
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
bool preserve_contents) {
bool do_load = true) {
auto new_surface{GetUncachedSurface(gpu_addr, params)};
Register(new_surface);
if (preserve_contents) {
if (do_load) {
LoadSurface(new_surface);
}
return {new_surface, new_surface->GetMainView()};
@@ -1022,10 +1001,10 @@ private:
}
void RegisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache[cache_addr] = surface;
const VAddr cpu_addr = surface->GetCpuAddr();
VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache[cpu_addr] = surface;
while (start <= end) {
registry[start].push_back(surface);
start++;
@@ -1033,10 +1012,10 @@ private:
}
void UnregisterInnerCache(TSurface& surface) {
const CacheAddr cache_addr = surface->GetCacheAddr();
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cache_addr);
const VAddr cpu_addr = surface->GetCpuAddr();
VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
l1_cache.erase(cpu_addr);
while (start <= end) {
auto& reg{registry[start]};
reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1044,18 +1023,18 @@ private:
}
}
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const CacheAddr cache_addr_end = cache_addr + size;
CacheAddr start = cache_addr >> registry_page_bits;
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
const VAddr cpu_addr_end = cpu_addr + size;
VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces;
while (start <= end) {
std::vector<TSurface>& list = registry[start];
for (auto& surface : list) {
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
surface->MarkAsPicked(true);
surfaces.push_back(surface);
}
@@ -1144,14 +1123,14 @@ private:
// large in size.
static constexpr u64 registry_page_bits{20};
static constexpr u64 registry_page_size{1 << registry_page_bits};
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
std::unordered_map<VAddr, std::vector<TSurface>> registry;
static constexpr u32 DEPTH_RT = 8;
static constexpr u32 NO_RT = 0xFFFFFFFF;
// The L1 Cache is used for fast texture lookup before checking the overlaps
// This avoids calculating size and other stuffs.
std::unordered_map<CacheAddr, TSurface> l1_cache;
std::unordered_map<VAddr, TSurface> l1_cache;
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and

View File

@@ -0,0 +1,80 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include "core/settings.h"
#include "video_core/textures/texture.h"
namespace Tegra::Texture {
namespace {
constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
};
unsigned SettingsMinimumAnisotropy() noexcept {
switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
default:
case Anisotropy::Default:
return 1U;
case Anisotropy::Filter2x:
return 2U;
case Anisotropy::Filter4x:
return 4U;
case Anisotropy::Filter8x:
return 8U;
case Anisotropy::Filter16x:
return 16U;
}
}
} // Anonymous namespace
std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
if (!srgb_conversion) {
return border_color;
}
return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
}
float TSCEntry::GetMaxAnisotropy() const noexcept {
return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
}
} // namespace Tegra::Texture

View File

@@ -8,7 +8,6 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/settings.h"
namespace Tegra::Texture {
@@ -336,24 +335,9 @@ struct TSCEntry {
std::array<u8, 0x20> raw;
};
float GetMaxAnisotropy() const {
const u32 min_value = [] {
switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
default:
case Anisotropy::Default:
return 1U;
case Anisotropy::Filter2x:
return 2U;
case Anisotropy::Filter4x:
return 4U;
case Anisotropy::Filter8x:
return 8U;
case Anisotropy::Filter16x:
return 16U;
}
}();
return static_cast<float>(std::max(1U << max_anisotropy, min_value));
}
std::array<float, 4> GetBorderColor() const noexcept;
float GetMaxAnisotropy() const noexcept;
float GetMinLod() const {
return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +352,6 @@ struct TSCEntry {
constexpr u32 mask = 1U << (13 - 1);
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
}
std::array<float, 4> GetBorderColor() const {
if (srgb_conversion) {
return {static_cast<float>(srgb_border_color_r) / 255.0f,
static_cast<float>(srgb_border_color_g) / 255.0f,
static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
}
return border_color;
}
};
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");

View File

@@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets)
target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
if (ENABLE_VULKAN AND NOT WIN32)
target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
endif()
target_compile_definitions(yuzu PRIVATE
# Use QStringBuilder for string concatenation to reduce
# the overall number of temporary strings created.

View File

@@ -3,15 +3,22 @@
// Refer to the license.txt file included.
#include <QIcon>
#include <fmt/format.h>
#include "common/scm_rev.h"
#include "ui_aboutdialog.h"
#include "yuzu/about_dialog.h"
AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
const auto build_id = std::string(Common::g_build_id);
const auto fmt = std::string(Common::g_title_bar_format_idle);
const auto yuzu_build_version =
fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
std::string{}, std::string{}, std::string{}, build_id);
ui->setupUi(this);
ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch),
QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
}

View File

@@ -14,8 +14,9 @@
#include <QScreen>
#include <QStringList>
#include <QWindow>
#ifdef HAS_VULKAN
#include <QVulkanWindow>
#if !defined(WIN32) && HAS_VULKAN
#include <qpa/qplatformnativeinterface.h>
#endif
#include <fmt/format.h>
@@ -224,7 +225,6 @@ public:
}
context->MakeCurrent();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
if (Core::System::GetInstance().Renderer().TryPresent(100)) {
context->SwapBuffers();
glFinish();
@@ -238,16 +238,50 @@ private:
#ifdef HAS_VULKAN
class VulkanRenderWidget : public RenderWidget {
public:
explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance)
: RenderWidget(parent) {
explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {
windowHandle()->setSurfaceType(QWindow::VulkanSurface);
windowHandle()->setVulkanInstance(instance);
}
};
#endif
GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread)
: QWidget(parent_), emu_thread(emu_thread) {
static Core::Frontend::WindowSystemType GetWindowSystemType() {
// Determine WSI type based on Qt platform.
QString platform_name = QGuiApplication::platformName();
if (platform_name == QStringLiteral("windows"))
return Core::Frontend::WindowSystemType::Windows;
else if (platform_name == QStringLiteral("xcb"))
return Core::Frontend::WindowSystemType::X11;
else if (platform_name == QStringLiteral("wayland"))
return Core::Frontend::WindowSystemType::Wayland;
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
return Core::Frontend::WindowSystemType::Windows;
}
static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
Core::Frontend::EmuWindow::WindowSystemInfo wsi;
wsi.type = GetWindowSystemType();
#ifdef HAS_VULKAN
// Our Win32 Qt external doesn't have the private API.
#if defined(WIN32) || defined(__APPLE__)
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#else
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
wsi.display_connection = pni->nativeResourceForWindow("display", window);
if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
else
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
#endif
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
#endif
return wsi;
}
GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_)
: QWidget(parent_), emu_thread(emu_thread_) {
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
.arg(QString::fromUtf8(Common::g_build_name),
QString::fromUtf8(Common::g_scm_branch),
@@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() {
break;
}
// Update the Window System information with the new render target
window_info = GetWindowSystemInfo(child_widget->windowHandle());
child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
layout()->addWidget(child_widget);
// Reset minimum required size to avoid resizing issues on the main window after restarting.
@@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() {
bool GRenderWindow::InitializeVulkan() {
#ifdef HAS_VULKAN
vk_instance = std::make_unique<QVulkanInstance>();
vk_instance->setApiVersion(QVersionNumber(1, 1, 0));
vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect);
if (Settings::values.renderer_debug) {
const auto supported_layers{vk_instance->supportedLayers()};
const bool found =
std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) {
constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation";
return layer.name == searched_layer;
});
if (found) {
vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation");
vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
}
if (!vk_instance->create()) {
QMessageBox::critical(
this, tr("Error while initializing Vulkan 1.1!"),
tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the "
"latest graphics drivers."));
return false;
}
auto child = new VulkanRenderWidget(this, vk_instance.get());
auto child = new VulkanRenderWidget(this);
child_widget = child;
child_widget->windowHandle()->create();
main_context = std::make_unique<DummyContext>();
@@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() {
#endif
}
void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const {
#ifdef HAS_VULKAN
const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr");
const VkInstance instance_copy = vk_instance->vkInstance();
const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle());
std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
std::memcpy(instance, &instance_copy, sizeof(instance_copy));
std::memcpy(surface, &surface_copy, sizeof(surface_copy));
#else
UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan");
#endif
}
bool GRenderWindow::LoadOpenGL() {
auto context = CreateSharedContext();
auto scope = context->Acquire();

View File

@@ -22,9 +22,6 @@ class GMainWindow;
class QKeyEvent;
class QTouchEvent;
class QStringList;
#ifdef HAS_VULKAN
class QVulkanInstance;
#endif
namespace VideoCore {
enum class LoadCallbackStage;
@@ -122,8 +119,6 @@ public:
// EmuWindow implementation.
void PollEvents() override;
bool IsShown() const override;
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
void BackupGeometry();
@@ -186,10 +181,6 @@ private:
// should instead be shared from
std::shared_ptr<Core::Frontend::GraphicsContext> main_context;
#ifdef HAS_VULKAN
std::unique_ptr<QVulkanInstance> vk_instance;
#endif
/// Temporary storage of the screenshot taken
QImage screenshot_image;

View File

@@ -15,6 +15,10 @@
#include "ui_configure_graphics.h"
#include "yuzu/configuration/configure_graphics.h"
#ifdef HAS_VULKAN
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#endif
namespace {
enum class Resolution : int {
Auto,
@@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
void ConfigureGraphics::RetrieveVulkanDevices() {
#ifdef HAS_VULKAN
QVulkanInstance instance;
instance.setApiVersion(QVersionNumber(1, 1, 0));
if (!instance.create()) {
LOG_INFO(Frontend, "Vulkan 1.1 not available");
return;
}
const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))};
if (vkEnumeratePhysicalDevices == nullptr) {
LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices");
return;
}
u32 physical_device_count;
if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) !=
VK_SUCCESS) {
LOG_INFO(Frontend, "Failed to get physical devices count");
return;
}
std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count,
physical_devices.data()) != VK_SUCCESS) {
LOG_INFO(Frontend, "Failed to get physical devices");
return;
}
const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))};
if (vkGetPhysicalDeviceProperties == nullptr) {
LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties");
return;
}
for (const auto physical_device : physical_devices) {
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physical_device, &properties);
vulkan_devices.push_back(QString::fromUtf8(properties.deviceName));
vulkan_devices.clear();
for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
vulkan_devices.push_back(QString::fromStdString(name));
}
#endif
}

View File

@@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick(
button->setText(tr("[press key]"));
button->setFocus();
const auto iter = std::find(button_map.begin(), button_map.end(), button);
ASSERT(iter != button_map.end());
const auto index = std::distance(button_map.begin(), iter);
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
// Keyboard keys can only be used as button devices
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
if (want_keyboard_keys) {
const auto iter = std::find(button_map.begin(), button_map.end(), button);
ASSERT(iter != button_map.end());
const auto index = std::distance(button_map.begin(), iter);
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
}
input_setter = new_input_setter;
device_pollers = InputCommon::Polling::GetPollers(type);
// Keyboard keys can only be used as button devices
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
for (auto& poller : device_pollers) {
poller->Start();
}

View File

@@ -927,7 +927,7 @@
</item>
</layout>
</item>
<item row="2" column="0">
<item row="0" column="2">
<layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout">
<item>
<layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout">
@@ -949,7 +949,7 @@
</item>
</layout>
</item>
<item row="2" column="1">
<item row="1" column="2">
<layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout">
<item>
<layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout">

View File

@@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) {
// - Open any dialogs
// - Block in any way
constexpr std::size_t PLAYER_0_INDEX = 0;
constexpr std::size_t HANDHELD_INDEX = 8;
void HandheldOnProfileSelect() {
@@ -53,8 +54,8 @@ void HandheldOnProfileSelect() {
}
void DualJoyconsDockedOnProfileSelect() {
Settings::values.players[0].connected = true;
Settings::values.players[0].type = Settings::ControllerType::DualJoycon;
Settings::values.players[PLAYER_0_INDEX].connected = true;
Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon;
for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) {
Settings::values.players[player].connected = false;
@@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() {
Settings::values.keyboard_enabled = false;
Settings::values.mouse_enabled = false;
Settings::values.debug_pad_enabled = false;
Settings::values.touchscreen.enabled = false;
Settings::values.touchscreen.enabled = true;
}
// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure
@@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{
}},
{QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect,
[](ConfigureInputSimple* caller) {
CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false);
CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false);
}},
{QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>},
}};

View File

@@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick(
button->setText(tr("[press key]"));
button->setFocus();
const auto iter = std::find(button_map.begin(), button_map.end(), button);
ASSERT(iter != button_map.end());
const auto index = std::distance(button_map.begin(), iter);
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
// Keyboard keys can only be used as button devices
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
if (want_keyboard_keys) {
const auto iter = std::find(button_map.begin(), button_map.end(), button);
ASSERT(iter != button_map.end());
const auto index = std::distance(button_map.begin(), iter);
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
}
input_setter = new_input_setter;
device_pollers = InputCommon::Polling::GetPollers(type);
// Keyboard keys can only be used as button devices
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
for (auto& poller : device_pollers) {
poller->Start();
}

View File

@@ -205,7 +205,13 @@ GMainWindow::GMainWindow()
ConnectMenuEvents();
ConnectWidgetEvents();
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
const auto build_id = std::string(Common::g_build_id);
const auto fmt = std::string(Common::g_title_bar_format_idle);
const auto yuzu_build_version =
fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
std::string{}, std::string{}, std::string{}, build_id);
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
Common::g_scm_desc);
#ifdef ARCHITECTURE_x86_64
LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);

View File

@@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() {
SDL_GL_DeleteContext(window_context);
}
void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const {
// Should not have been called from OpenGL
UNREACHABLE();
}
std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const {
return std::make_unique<SDLGLContext>();
}

View File

@@ -15,10 +15,6 @@ public:
void Present() override;
/// Ignored in OpenGL
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
private:

View File

@@ -2,102 +2,62 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
#include <SDL.h>
#include <SDL_vulkan.h>
#include <fmt/format.h>
#include <vulkan/vulkan.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
// Include these late to avoid polluting everything with Xlib macros
#include <SDL.h>
#include <SDL_syswm.h>
EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
: EmuWindow_SDL2{system, fullscreen} {
if (SDL_Vulkan_LoadLibrary(nullptr) != 0) {
LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError());
exit(EXIT_FAILURE);
}
vkGetInstanceProcAddr =
reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr());
if (vkGetInstanceProcAddr == nullptr) {
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
exit(EXIT_FAILURE);
}
const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name,
Common::g_scm_branch, Common::g_scm_desc);
render_window =
SDL_CreateWindow(window_title.c_str(),
SDL_WINDOWPOS_UNDEFINED, // x position
SDL_WINDOWPOS_UNDEFINED, // y position
SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN);
SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr);
u32 extra_ext_count{};
if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) {
LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}",
SDL_GetError());
exit(1);
SDL_SysWMinfo wm;
if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) {
LOG_CRITICAL(Frontend, "Failed to get information from the window manager");
std::exit(EXIT_FAILURE);
}
auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count);
if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) {
LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError());
exit(1);
}
std::vector<const char*> enabled_extensions;
enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(),
extra_ext_names.get() + extra_ext_count);
std::vector<const char*> enabled_layers;
if (use_standard_layers) {
enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation");
}
VkApplicationInfo app_info{};
app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
app_info.apiVersion = VK_API_VERSION_1_1;
app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
app_info.pApplicationName = "yuzu-emu";
app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
app_info.pEngineName = "yuzu-emu";
VkInstanceCreateInfo instance_ci{};
instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instance_ci.pApplicationInfo = &app_info;
instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size());
instance_ci.ppEnabledExtensionNames = enabled_extensions.data();
if (Settings::values.renderer_debug) {
instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size());
instance_ci.ppEnabledLayerNames = enabled_layers.data();
}
const auto vkCreateInstance =
reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance"));
if (vkCreateInstance == nullptr ||
vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) {
LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!");
exit(EXIT_FAILURE);
}
vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance"));
if (vkDestroyInstance == nullptr) {
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
exit(EXIT_FAILURE);
}
if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) {
LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError());
exit(EXIT_FAILURE);
switch (wm.subsystem) {
#ifdef SDL_VIDEO_DRIVER_WINDOWS
case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
window_info.type = Core::Frontend::WindowSystemType::Windows;
window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window);
break;
#endif
#ifdef SDL_VIDEO_DRIVER_X11
case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
window_info.type = Core::Frontend::WindowSystemType::X11;
window_info.display_connection = wm.info.x11.display;
window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window);
break;
#endif
#ifdef SDL_VIDEO_DRIVER_WAYLAND
case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
window_info.type = Core::Frontend::WindowSystemType::Wayland;
window_info.display_connection = wm.info.wl.display;
window_info.render_surface = wm.info.wl.surface;
break;
#endif
default:
LOG_CRITICAL(Frontend, "Window manager subsystem not implemented");
std::exit(EXIT_FAILURE);
}
OnResize();
@@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
Common::g_scm_branch, Common::g_scm_desc);
}
EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() {
vkDestroyInstance(vk_instance, nullptr);
}
void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const {
const auto instance_proc_addr = vkGetInstanceProcAddr;
std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
std::memcpy(instance, &vk_instance, sizeof(vk_instance));
std::memcpy(surface, &vk_surface, sizeof(vk_surface));
}
EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default;
std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const {
return nullptr;
}
bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const {
if (!Settings::values.renderer_debug) {
return false;
}
const auto vkEnumerateInstanceLayerProperties =
reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>(
vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties"));
if (vkEnumerateInstanceLayerProperties == nullptr) {
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
return false;
}
u32 available_layers_count{};
if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) {
LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
return false;
}
std::vector<VkLayerProperties> layers(available_layers_count);
if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) {
LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
return false;
}
return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) {
return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation");
}) != layers.end();
}
void EmuWindow_SDL2_VK::Present() {
// TODO (bunnei): ImplementMe
}

View File

@@ -4,27 +4,21 @@
#pragma once
#include <vulkan/vulkan.h>
#include <memory>
#include "core/frontend/emu_window.h"
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
namespace Core {
class System;
}
class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
public:
explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen);
~EmuWindow_SDL2_VK();
void Present() override;
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
private:
bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const;
VkInstance vk_instance{};
VkSurfaceKHR vk_surface{};
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
PFN_vkDestroyInstance vkDestroyInstance{};
};

View File

@@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const {
return false;
}
void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const {
UNREACHABLE();
}
class SDLGLContext : public Core::Frontend::GraphicsContext {
public:
explicit SDLGLContext() {

View File

@@ -19,10 +19,6 @@ public:
/// Whether the screen is being shown or not.
bool IsShown() const override;
/// Retrieves Vulkan specific handlers from the window
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
void* surface) const override;
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
private: