Compare commits
77 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94b0e2e5da | ||
|
|
51c6688e21 | ||
|
|
8adf66f9fd | ||
|
|
f570b129a2 | ||
|
|
7182ef31c9 | ||
|
|
36f607217f | ||
|
|
b96fd0bd0e | ||
|
|
7cd6daf115 | ||
|
|
1c18dc6577 | ||
|
|
913f42a3a7 | ||
|
|
e00d992848 | ||
|
|
449255675d | ||
|
|
848d619aec | ||
|
|
b128beb3a9 | ||
|
|
c6ea0d010b | ||
|
|
a209d464f9 | ||
|
|
d7db088180 | ||
|
|
26ed65495d | ||
|
|
863f7385dc | ||
|
|
f316911248 | ||
|
|
5a66ca4697 | ||
|
|
6b512d78c9 | ||
|
|
bf1d66b7c0 | ||
|
|
487f9ba525 | ||
|
|
22b5d5211e | ||
|
|
935648ffa9 | ||
|
|
bc1b4b85b0 | ||
|
|
7104e01bb3 | ||
|
|
bf1174c114 | ||
|
|
27f122c48c | ||
|
|
f9d5718c4b | ||
|
|
ea535d9470 | ||
|
|
3dd5c07454 | ||
|
|
7fcd0fee6d | ||
|
|
6ee316cb8f | ||
|
|
9c0f40a1f5 | ||
|
|
588a20be3f | ||
|
|
2c98e14d13 | ||
|
|
9efa51311f | ||
|
|
7f5696513f | ||
|
|
2906372ba1 | ||
|
|
69277de29d | ||
|
|
1633fbf99a | ||
|
|
730f9b55b3 | ||
|
|
9f6ebccf06 | ||
|
|
6f2b7087c2 | ||
|
|
69657ff19c | ||
|
|
24cc64c5b3 | ||
|
|
aa6214feb7 | ||
|
|
fb8afee077 | ||
|
|
acd3f0ab37 | ||
|
|
8370188b3c | ||
|
|
3e3afa9be6 | ||
|
|
5cd5857000 | ||
|
|
658112783d | ||
|
|
3ad06e9b2b | ||
|
|
f24c2e1103 | ||
|
|
58bcb86af5 | ||
|
|
2cefdd92bd | ||
|
|
1f3d142875 | ||
|
|
08db60392d | ||
|
|
ed1d8beb13 | ||
|
|
6d235b8631 | ||
|
|
59e75f4372 | ||
|
|
60106531b4 | ||
|
|
8b719e9e1d | ||
|
|
9d15feb892 | ||
|
|
16ae98dbb3 | ||
|
|
c02a2dc24a | ||
|
|
80c4fee4ec | ||
|
|
2339fe199f | ||
|
|
dd1232755b | ||
|
|
2f0da10dc3 | ||
|
|
b6571ca9f0 | ||
|
|
16270dcfe4 | ||
|
|
bd0c56c6e7 | ||
|
|
1c5e2b60a7 |
@@ -4,7 +4,7 @@ parameters:
|
||||
version: ''
|
||||
|
||||
steps:
|
||||
- script: mkdir build && cd build && cmake -G "Visual Studio 15 2017 Win64" --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
|
||||
- script: mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
|
||||
displayName: 'Configure CMake'
|
||||
- task: MSBuild@1
|
||||
displayName: 'Build'
|
||||
|
||||
@@ -45,7 +45,7 @@ stages:
|
||||
- job: build
|
||||
displayName: 'msvc'
|
||||
pool:
|
||||
vmImage: vs2017-win2016
|
||||
vmImage: windows-2019
|
||||
steps:
|
||||
- template: ./templates/sync-source.yml
|
||||
parameters:
|
||||
|
||||
@@ -22,7 +22,7 @@ stages:
|
||||
- job: build
|
||||
displayName: 'windows-msvc'
|
||||
pool:
|
||||
vmImage: vs2017-win2016
|
||||
vmImage: windows-2019
|
||||
steps:
|
||||
- template: ./templates/sync-source.yml
|
||||
parameters:
|
||||
|
||||
@@ -106,6 +106,8 @@ add_library(common STATIC
|
||||
common_funcs.h
|
||||
common_paths.h
|
||||
common_types.h
|
||||
dynamic_library.cpp
|
||||
dynamic_library.h
|
||||
file_util.cpp
|
||||
file_util.h
|
||||
hash.h
|
||||
|
||||
106
src/common/dynamic_library.cpp
Normal file
106
src/common/dynamic_library.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2019 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
DynamicLibrary::DynamicLibrary() = default;
|
||||
|
||||
DynamicLibrary::DynamicLibrary(const char* filename) {
|
||||
Open(filename);
|
||||
}
|
||||
|
||||
DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept
|
||||
: handle{std::exchange(rhs.handle, nullptr)} {}
|
||||
|
||||
DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept {
|
||||
Close();
|
||||
handle = std::exchange(rhs.handle, nullptr);
|
||||
return *this;
|
||||
}
|
||||
|
||||
DynamicLibrary::~DynamicLibrary() {
|
||||
Close();
|
||||
}
|
||||
|
||||
std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) {
|
||||
#if defined(_WIN32)
|
||||
return std::string(filename) + ".dll";
|
||||
#elif defined(__APPLE__)
|
||||
return std::string(filename) + ".dylib";
|
||||
#else
|
||||
return std::string(filename) + ".so";
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) {
|
||||
#if defined(_WIN32)
|
||||
if (major >= 0 && minor >= 0)
|
||||
return fmt::format("{}-{}-{}.dll", libname, major, minor);
|
||||
else if (major >= 0)
|
||||
return fmt::format("{}-{}.dll", libname, major);
|
||||
else
|
||||
return fmt::format("{}.dll", libname);
|
||||
#elif defined(__APPLE__)
|
||||
const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
|
||||
if (major >= 0 && minor >= 0)
|
||||
return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor);
|
||||
else if (major >= 0)
|
||||
return fmt::format("{}{}.{}.dylib", prefix, libname, major);
|
||||
else
|
||||
return fmt::format("{}{}.dylib", prefix, libname);
|
||||
#else
|
||||
const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
|
||||
if (major >= 0 && minor >= 0)
|
||||
return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor);
|
||||
else if (major >= 0)
|
||||
return fmt::format("{}{}.so.{}", prefix, libname, major);
|
||||
else
|
||||
return fmt::format("{}{}.so", prefix, libname);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool DynamicLibrary::Open(const char* filename) {
|
||||
#ifdef _WIN32
|
||||
handle = reinterpret_cast<void*>(LoadLibraryA(filename));
|
||||
#else
|
||||
handle = dlopen(filename, RTLD_NOW);
|
||||
#endif
|
||||
return handle != nullptr;
|
||||
}
|
||||
|
||||
void DynamicLibrary::Close() {
|
||||
if (!IsOpen())
|
||||
return;
|
||||
|
||||
#ifdef _WIN32
|
||||
FreeLibrary(reinterpret_cast<HMODULE>(handle));
|
||||
#else
|
||||
dlclose(handle);
|
||||
#endif
|
||||
handle = nullptr;
|
||||
}
|
||||
|
||||
void* DynamicLibrary::GetSymbolAddress(const char* name) const {
|
||||
#ifdef _WIN32
|
||||
return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name));
|
||||
#else
|
||||
return reinterpret_cast<void*>(dlsym(handle, name));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
75
src/common/dynamic_library.h
Normal file
75
src/common/dynamic_library.h
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright 2019 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Common {
|
||||
|
||||
/**
|
||||
* Provides a platform-independent interface for loading a dynamic library and retrieving symbols.
|
||||
* The interface maintains an internal reference count to allow one handle to be shared between
|
||||
* multiple users.
|
||||
*/
|
||||
class DynamicLibrary final {
|
||||
public:
|
||||
/// Default constructor, does not load a library.
|
||||
explicit DynamicLibrary();
|
||||
|
||||
/// Automatically loads the specified library. Call IsOpen() to check validity before use.
|
||||
explicit DynamicLibrary(const char* filename);
|
||||
|
||||
/// Moves the library.
|
||||
DynamicLibrary(DynamicLibrary&&) noexcept;
|
||||
DynamicLibrary& operator=(DynamicLibrary&&) noexcept;
|
||||
|
||||
/// Delete copies, we can't copy a dynamic library.
|
||||
DynamicLibrary(const DynamicLibrary&) = delete;
|
||||
DynamicLibrary& operator=(const DynamicLibrary&) = delete;
|
||||
|
||||
/// Closes the library.
|
||||
~DynamicLibrary();
|
||||
|
||||
/// Returns the specified library name with the platform-specific suffix added.
|
||||
static std::string GetUnprefixedFilename(const char* filename);
|
||||
|
||||
/// Returns the specified library name in platform-specific format.
|
||||
/// Major/minor versions will not be included if set to -1.
|
||||
/// If libname already contains the "lib" prefix, it will not be added again.
|
||||
/// Windows: LIBNAME-MAJOR-MINOR.dll
|
||||
/// Linux: libLIBNAME.so.MAJOR.MINOR
|
||||
/// Mac: libLIBNAME.MAJOR.MINOR.dylib
|
||||
static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1);
|
||||
|
||||
/// Returns true if a module is loaded, otherwise false.
|
||||
bool IsOpen() const {
|
||||
return handle != nullptr;
|
||||
}
|
||||
|
||||
/// Loads (or replaces) the handle with the specified library file name.
|
||||
/// Returns true if the library was loaded and can be used.
|
||||
bool Open(const char* filename);
|
||||
|
||||
/// Unloads the library, any function pointers from this library are no longer valid.
|
||||
void Close();
|
||||
|
||||
/// Returns the address of the specified symbol (function or variable) as an untyped pointer.
|
||||
/// If the specified symbol does not exist in this library, nullptr is returned.
|
||||
void* GetSymbolAddress(const char* name) const;
|
||||
|
||||
/// Obtains the address of the specified symbol, automatically casting to the correct type.
|
||||
/// Returns true if the symbol was found and assigned, otherwise false.
|
||||
template <typename T>
|
||||
bool GetSymbol(const char* name, T* ptr) const {
|
||||
*ptr = reinterpret_cast<T>(GetSymbolAddress(name));
|
||||
return *ptr != nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Platform-dependent data type representing a dynamic library handle.
|
||||
void* handle = nullptr;
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
@@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) {
|
||||
std::optional<std::string> GetCurrentDir() {
|
||||
// Get the current working directory (getcwd uses malloc)
|
||||
#ifdef _WIN32
|
||||
wchar_t* dir;
|
||||
if (!(dir = _wgetcwd(nullptr, 0))) {
|
||||
wchar_t* dir = _wgetcwd(nullptr, 0);
|
||||
if (!dir) {
|
||||
#else
|
||||
char* dir;
|
||||
if (!(dir = getcwd(nullptr, 0))) {
|
||||
char* dir = getcwd(nullptr, 0);
|
||||
if (!dir) {
|
||||
#endif
|
||||
LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg());
|
||||
return {};
|
||||
@@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept {
|
||||
|
||||
bool IOFile::Open(const std::string& filename, const char openmode[], int flags) {
|
||||
Close();
|
||||
bool m_good;
|
||||
#ifdef _WIN32
|
||||
if (flags != 0) {
|
||||
m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(),
|
||||
Common::UTF8ToUTF16W(openmode).c_str(), flags);
|
||||
m_good = m_file != nullptr;
|
||||
} else {
|
||||
_wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
|
||||
Common::UTF8ToUTF16W(openmode).c_str());
|
||||
m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
|
||||
Common::UTF8ToUTF16W(openmode).c_str()) == 0;
|
||||
}
|
||||
#else
|
||||
m_file = fopen(filename.c_str(), openmode);
|
||||
m_file = std::fopen(filename.c_str(), openmode);
|
||||
m_good = m_file != nullptr;
|
||||
#endif
|
||||
|
||||
return IsOpen();
|
||||
return m_good;
|
||||
}
|
||||
|
||||
bool IOFile::Close() {
|
||||
@@ -956,7 +960,7 @@ u64 IOFile::Tell() const {
|
||||
if (IsOpen())
|
||||
return ftello(m_file);
|
||||
|
||||
return -1;
|
||||
return std::numeric_limits<u64>::max();
|
||||
}
|
||||
|
||||
bool IOFile::Flush() {
|
||||
|
||||
@@ -28,11 +28,8 @@ namespace Common {
|
||||
#ifdef _MSC_VER
|
||||
|
||||
// Sets the debugger-visible name of the current thread.
|
||||
// Uses undocumented (actually, it is now documented) trick.
|
||||
// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp
|
||||
|
||||
// This is implemented much nicer in upcoming msvc++, see:
|
||||
// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx
|
||||
// Uses trick documented in:
|
||||
// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
|
||||
void SetCurrentThreadName(const char* name) {
|
||||
static const DWORD MS_VC_EXCEPTION = 0x406D1388;
|
||||
|
||||
@@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) {
|
||||
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = -1; // dwThreadID;
|
||||
info.dwThreadID = std::numeric_limits<DWORD>::max();
|
||||
info.dwFlags = 0;
|
||||
|
||||
__try {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/string_util.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/file_sys/fsmitm_romfsbuild.h"
|
||||
#include "core/file_sys/romfs.h"
|
||||
@@ -126,7 +127,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
|
||||
return out->GetSubdirectories().front();
|
||||
|
||||
while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
|
||||
if (out->GetSubdirectories().front()->GetName() == "data" &&
|
||||
if (Common::ToLower(out->GetSubdirectories().front()->GetName()) == "data" &&
|
||||
type == RomFSExtractionType::Truncated)
|
||||
break;
|
||||
out = out->GetSubdirectories().front();
|
||||
|
||||
@@ -12,6 +12,15 @@
|
||||
|
||||
namespace Core::Frontend {
|
||||
|
||||
/// Information for the Graphics Backends signifying what type of screen pointer is in
|
||||
/// WindowInformation
|
||||
enum class WindowSystemType {
|
||||
Headless,
|
||||
Windows,
|
||||
X11,
|
||||
Wayland,
|
||||
};
|
||||
|
||||
/**
|
||||
* Represents a drawing context that supports graphics operations.
|
||||
*/
|
||||
@@ -76,6 +85,23 @@ public:
|
||||
std::pair<unsigned, unsigned> min_client_area_size;
|
||||
};
|
||||
|
||||
/// Data describing host window system information
|
||||
struct WindowSystemInfo {
|
||||
// Window system type. Determines which GL context or Vulkan WSI is used.
|
||||
WindowSystemType type = WindowSystemType::Headless;
|
||||
|
||||
// Connection to a display server. This is used on X11 and Wayland platforms.
|
||||
void* display_connection = nullptr;
|
||||
|
||||
// Render surface. This is a pointer to the native window handle, which depends
|
||||
// on the platform. e.g. HWND for Windows, Window for X11. If the surface is
|
||||
// set to nullptr, the video backend will run in headless mode.
|
||||
void* render_surface = nullptr;
|
||||
|
||||
// Scale of the render surface. For hidpi systems, this will be >1.
|
||||
float render_surface_scale = 1.0f;
|
||||
};
|
||||
|
||||
/// Polls window events
|
||||
virtual void PollEvents() = 0;
|
||||
|
||||
@@ -87,10 +113,6 @@ public:
|
||||
/// Returns if window is shown (not minimized)
|
||||
virtual bool IsShown() const = 0;
|
||||
|
||||
/// Retrieves Vulkan specific handlers from the window
|
||||
virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const = 0;
|
||||
|
||||
/**
|
||||
* Signal that a touch pressed event has occurred (e.g. mouse click pressed)
|
||||
* @param framebuffer_x Framebuffer x-coordinate that was pressed
|
||||
@@ -127,6 +149,13 @@ public:
|
||||
config = val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns system information about the drawing area.
|
||||
*/
|
||||
const WindowSystemInfo& GetWindowInfo() const {
|
||||
return window_info;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the framebuffer layout (width, height, and screen regions)
|
||||
* @note This method is thread-safe
|
||||
@@ -142,7 +171,7 @@ public:
|
||||
void UpdateCurrentFramebufferLayout(unsigned width, unsigned height);
|
||||
|
||||
protected:
|
||||
EmuWindow();
|
||||
explicit EmuWindow();
|
||||
virtual ~EmuWindow();
|
||||
|
||||
/**
|
||||
@@ -179,6 +208,8 @@ protected:
|
||||
client_area_height = size.second;
|
||||
}
|
||||
|
||||
WindowSystemInfo window_info;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Handler called when the minimal client area was requested to be changed via SetConfig.
|
||||
|
||||
@@ -242,7 +242,52 @@ struct Memory::Impl {
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
|
||||
system.GPU().FlushRegion(current_vaddr, copy_amount);
|
||||
std::memcpy(dest_buffer, host_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
}
|
||||
|
||||
void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
|
||||
const std::size_t size) {
|
||||
const auto& page_table = process.VMManager().page_table;
|
||||
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = src_addr >> PAGE_BITS;
|
||||
std::size_t page_offset = src_addr & PAGE_MASK;
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount =
|
||||
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
|
||||
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case Common::PageType::Unmapped: {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, src_addr, size);
|
||||
std::memset(dest_buffer, 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
const u8* const src_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
std::memcpy(dest_buffer, host_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -261,6 +306,10 @@ struct Memory::Impl {
|
||||
ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
|
||||
ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
|
||||
const std::size_t size) {
|
||||
const auto& page_table = process.VMManager().page_table;
|
||||
@@ -290,7 +339,50 @@ struct Memory::Impl {
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
|
||||
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
|
||||
std::memcpy(host_ptr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
|
||||
const void* src_buffer, const std::size_t size) {
|
||||
const auto& page_table = process.VMManager().page_table;
|
||||
std::size_t remaining_size = size;
|
||||
std::size_t page_index = dest_addr >> PAGE_BITS;
|
||||
std::size_t page_offset = dest_addr & PAGE_MASK;
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount =
|
||||
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
|
||||
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case Common::PageType::Unmapped: {
|
||||
LOG_ERROR(HW_Memory,
|
||||
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, dest_addr, size);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
u8* const dest_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
std::memcpy(host_ptr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -309,6 +401,10 @@ struct Memory::Impl {
|
||||
WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
|
||||
WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
|
||||
const auto& page_table = process.VMManager().page_table;
|
||||
std::size_t remaining_size = size;
|
||||
@@ -337,7 +433,7 @@ struct Memory::Impl {
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
|
||||
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
|
||||
std::memset(host_ptr, 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -384,7 +480,7 @@ struct Memory::Impl {
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
|
||||
system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
|
||||
system.GPU().FlushRegion(current_vaddr, copy_amount);
|
||||
WriteBlock(process, dest_addr, host_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -545,7 +641,7 @@ struct Memory::Impl {
|
||||
break;
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
const u8* const host_ptr = GetPointerFromVMA(vaddr);
|
||||
system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
|
||||
system.GPU().FlushRegion(vaddr, sizeof(T));
|
||||
T value;
|
||||
std::memcpy(&value, host_ptr, sizeof(T));
|
||||
return value;
|
||||
@@ -587,7 +683,7 @@ struct Memory::Impl {
|
||||
break;
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr{GetPointerFromVMA(vaddr)};
|
||||
system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
|
||||
system.GPU().InvalidateRegion(vaddr, sizeof(T));
|
||||
std::memcpy(host_ptr, &data, sizeof(T));
|
||||
break;
|
||||
}
|
||||
@@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
|
||||
impl->ReadBlock(src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
|
||||
void* dest_buffer, const std::size_t size) {
|
||||
impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
|
||||
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
impl->WriteBlock(process, dest_addr, src_buffer, size);
|
||||
@@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
|
||||
impl->WriteBlock(dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
|
||||
const void* src_buffer, std::size_t size) {
|
||||
impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
|
||||
const std::size_t size) {
|
||||
impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
|
||||
impl->ZeroBlock(process, dest_addr, size);
|
||||
}
|
||||
|
||||
@@ -294,6 +294,27 @@ public:
|
||||
void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Reads a contiguous block of bytes from a specified process' address space.
|
||||
* This unsafe version does not trigger GPU flushing.
|
||||
*
|
||||
* @param process The process to read the data from.
|
||||
* @param src_addr The virtual address to begin reading from.
|
||||
* @param dest_buffer The buffer to place the read bytes into.
|
||||
* @param size The amount of data to read, in bytes.
|
||||
*
|
||||
* @note If a size of 0 is specified, then this function reads nothing and
|
||||
* no attempts to access memory are made at all.
|
||||
*
|
||||
* @pre dest_buffer must be at least size bytes in length, otherwise a
|
||||
* buffer overrun will occur.
|
||||
*
|
||||
* @post The range [dest_buffer, size) contains the read bytes from the
|
||||
* process' address space.
|
||||
*/
|
||||
void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Reads a contiguous block of bytes from the current process' address space.
|
||||
*
|
||||
@@ -312,6 +333,25 @@ public:
|
||||
*/
|
||||
void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Reads a contiguous block of bytes from the current process' address space.
|
||||
* This unsafe version does not trigger GPU flushing.
|
||||
*
|
||||
* @param src_addr The virtual address to begin reading from.
|
||||
* @param dest_buffer The buffer to place the read bytes into.
|
||||
* @param size The amount of data to read, in bytes.
|
||||
*
|
||||
* @note If a size of 0 is specified, then this function reads nothing and
|
||||
* no attempts to access memory are made at all.
|
||||
*
|
||||
* @pre dest_buffer must be at least size bytes in length, otherwise a
|
||||
* buffer overrun will occur.
|
||||
*
|
||||
* @post The range [dest_buffer, size) contains the read bytes from the
|
||||
* current process' address space.
|
||||
*/
|
||||
void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into a given process' address space at the specified
|
||||
* virtual address.
|
||||
@@ -335,6 +375,26 @@ public:
|
||||
void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into a given process' address space at the specified
|
||||
* virtual address.
|
||||
* This unsafe version does not invalidate GPU Memory.
|
||||
*
|
||||
* @param process The process to write data into the address space of.
|
||||
* @param dest_addr The destination virtual address to begin writing the data at.
|
||||
* @param src_buffer The data to write into the process' address space.
|
||||
* @param size The size of the data to write, in bytes.
|
||||
*
|
||||
* @post The address range [dest_addr, size) in the process' address space
|
||||
* contains the data that was within src_buffer.
|
||||
*
|
||||
* @post If an attempt is made to write into an unmapped region of memory, the writes
|
||||
* will be ignored and an error will be logged.
|
||||
*
|
||||
*/
|
||||
void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
|
||||
std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into the current process' address space at the specified
|
||||
* virtual address.
|
||||
@@ -356,6 +416,24 @@ public:
|
||||
*/
|
||||
void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into the current process' address space at the specified
|
||||
* virtual address.
|
||||
* This unsafe version does not invalidate GPU Memory.
|
||||
*
|
||||
* @param dest_addr The destination virtual address to begin writing the data at.
|
||||
* @param src_buffer The data to write into the current process' address space.
|
||||
* @param size The size of the data to write, in bytes.
|
||||
*
|
||||
* @post The address range [dest_addr, size) in the current process' address space
|
||||
* contains the data that was within src_buffer.
|
||||
*
|
||||
* @post If an attempt is made to write into an unmapped region of memory, the writes
|
||||
* will be ignored and an error will be logged.
|
||||
*
|
||||
*/
|
||||
void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
* Fills the specified address range within a process' address space with zeroes.
|
||||
*
|
||||
|
||||
@@ -148,6 +148,7 @@ add_library(video_core STATIC
|
||||
textures/convert.h
|
||||
textures/decoders.cpp
|
||||
textures/decoders.h
|
||||
textures/texture.cpp
|
||||
textures/texture.h
|
||||
video_core.cpp
|
||||
video_core.h
|
||||
|
||||
@@ -15,37 +15,29 @@ namespace VideoCommon {
|
||||
|
||||
class BufferBlock {
|
||||
public:
|
||||
bool Overlaps(const CacheAddr start, const CacheAddr end) const {
|
||||
return (cache_addr < end) && (cache_addr_end > start);
|
||||
bool Overlaps(const VAddr start, const VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
|
||||
return cache_addr <= other_start && other_end <= cache_addr_end;
|
||||
bool IsInside(const VAddr other_start, const VAddr other_end) const {
|
||||
return cpu_addr <= other_start && other_end <= cpu_addr_end;
|
||||
}
|
||||
|
||||
u8* GetWritableHostPtr() const {
|
||||
return FromCacheAddr(cache_addr);
|
||||
std::size_t GetOffset(const VAddr in_addr) {
|
||||
return static_cast<std::size_t>(in_addr - cpu_addr);
|
||||
}
|
||||
|
||||
u8* GetWritableHostPtr(std::size_t offset) const {
|
||||
return FromCacheAddr(cache_addr + offset);
|
||||
VAddr GetCpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetOffset(const CacheAddr in_addr) {
|
||||
return static_cast<std::size_t>(in_addr - cache_addr);
|
||||
VAddr GetCpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
CacheAddr GetCacheAddr() const {
|
||||
return cache_addr;
|
||||
}
|
||||
|
||||
CacheAddr GetCacheAddrEnd() const {
|
||||
return cache_addr_end;
|
||||
}
|
||||
|
||||
void SetCacheAddr(const CacheAddr new_addr) {
|
||||
cache_addr = new_addr;
|
||||
cache_addr_end = new_addr + size;
|
||||
void SetCpuAddr(const VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
cpu_addr_end = new_addr + size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
@@ -61,14 +53,14 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
|
||||
SetCacheAddr(cache_addr);
|
||||
explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
|
||||
SetCpuAddr(cpu_addr);
|
||||
}
|
||||
~BufferBlock() = default;
|
||||
|
||||
private:
|
||||
CacheAddr cache_addr{};
|
||||
CacheAddr cache_addr_end{};
|
||||
VAddr cpu_addr{};
|
||||
VAddr cpu_addr_end{};
|
||||
std::size_t size{};
|
||||
u64 epoch{};
|
||||
};
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/buffer_cache/buffer_block.h"
|
||||
#include "video_core/buffer_cache/map_interval.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
@@ -37,28 +38,45 @@ public:
|
||||
bool is_written = false, bool use_fast_cbuf = false) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
if (!host_ptr) {
|
||||
const std::optional<VAddr> cpu_addr_opt =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
|
||||
if (!cpu_addr_opt) {
|
||||
return {GetEmptyBuffer(size), 0};
|
||||
}
|
||||
const auto cache_addr = ToCacheAddr(host_ptr);
|
||||
|
||||
VAddr cpu_addr = *cpu_addr_opt;
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
constexpr std::size_t max_stream_size = 0x800;
|
||||
if (use_fast_cbuf || size < max_stream_size) {
|
||||
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
|
||||
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
if (use_fast_cbuf) {
|
||||
return ConstBufferUpload(host_ptr, size);
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
return ConstBufferUpload(host_ptr, size);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
return ConstBufferUpload(staging_buffer.data(), size);
|
||||
}
|
||||
} else {
|
||||
return StreamBufferUpload(host_ptr, size, alignment);
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
return StreamBufferUpload(host_ptr, size, alignment);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
return StreamBufferUpload(staging_buffer.data(), size, alignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto block = GetBlock(cache_addr, size);
|
||||
auto map = MapAddress(block, gpu_addr, cache_addr, size);
|
||||
auto block = GetBlock(cpu_addr, size);
|
||||
auto map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||
if (is_written) {
|
||||
map->MarkAsModified(true, GetModifiedTicks());
|
||||
if (!map->IsWritten()) {
|
||||
@@ -71,7 +89,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
|
||||
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
|
||||
|
||||
return {ToHandle(block), offset};
|
||||
}
|
||||
@@ -112,7 +130,7 @@ public:
|
||||
}
|
||||
|
||||
/// Write any cached resources overlapping the specified region back to memory
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
std::vector<MapInterval> objects = GetMapsInRange(addr, size);
|
||||
@@ -127,7 +145,7 @@ public:
|
||||
}
|
||||
|
||||
/// Mark the specified region as being invalidated
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
std::vector<MapInterval> objects = GetMapsInRange(addr, size);
|
||||
@@ -152,7 +170,7 @@ protected:
|
||||
|
||||
virtual void WriteBarrier() = 0;
|
||||
|
||||
virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
|
||||
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
|
||||
|
||||
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) = 0;
|
||||
@@ -169,20 +187,17 @@ protected:
|
||||
|
||||
/// Register an object into the cache
|
||||
void Register(const MapInterval& new_map, bool inherit_written = false) {
|
||||
const CacheAddr cache_ptr = new_map->GetStart();
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
|
||||
if (!cache_ptr || !cpu_addr) {
|
||||
const VAddr cpu_addr = new_map->GetStart();
|
||||
if (!cpu_addr) {
|
||||
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
|
||||
new_map->GetGpuAddress());
|
||||
return;
|
||||
}
|
||||
const std::size_t size = new_map->GetEnd() - new_map->GetStart();
|
||||
new_map->SetCpuAddress(*cpu_addr);
|
||||
new_map->MarkAsRegistered(true);
|
||||
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
|
||||
mapped_addresses.insert({interval, new_map});
|
||||
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
|
||||
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
||||
if (inherit_written) {
|
||||
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
|
||||
new_map->MarkAsWritten(true);
|
||||
@@ -192,7 +207,7 @@ protected:
|
||||
/// Unregisters an object from the cache
|
||||
void Unregister(MapInterval& map) {
|
||||
const std::size_t size = map->GetEnd() - map->GetStart();
|
||||
rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
|
||||
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
|
||||
map->MarkAsRegistered(false);
|
||||
if (map->IsWritten()) {
|
||||
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
|
||||
@@ -202,32 +217,39 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
|
||||
MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
|
||||
return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
|
||||
}
|
||||
|
||||
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
|
||||
const CacheAddr cache_addr, const std::size_t size) {
|
||||
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
|
||||
const std::size_t size) {
|
||||
|
||||
std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
|
||||
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
|
||||
if (overlaps.empty()) {
|
||||
const CacheAddr cache_addr_end = cache_addr + size;
|
||||
MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
|
||||
u8* host_ptr = FromCacheAddr(cache_addr);
|
||||
UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
|
||||
if (memory_manager.IsGranularRange(gpu_addr, size)) {
|
||||
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
|
||||
} else {
|
||||
staging_buffer.resize(size);
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
|
||||
}
|
||||
Register(new_map);
|
||||
return new_map;
|
||||
}
|
||||
|
||||
const CacheAddr cache_addr_end = cache_addr + size;
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
if (overlaps.size() == 1) {
|
||||
MapInterval& current_map = overlaps[0];
|
||||
if (current_map->IsInside(cache_addr, cache_addr_end)) {
|
||||
if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
|
||||
return current_map;
|
||||
}
|
||||
}
|
||||
CacheAddr new_start = cache_addr;
|
||||
CacheAddr new_end = cache_addr_end;
|
||||
VAddr new_start = cpu_addr;
|
||||
VAddr new_end = cpu_addr_end;
|
||||
bool write_inheritance = false;
|
||||
bool modified_inheritance = false;
|
||||
// Calculate new buffer parameters
|
||||
@@ -237,7 +259,7 @@ private:
|
||||
write_inheritance |= overlap->IsWritten();
|
||||
modified_inheritance |= overlap->IsModified();
|
||||
}
|
||||
GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
|
||||
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
|
||||
for (auto& overlap : overlaps) {
|
||||
Unregister(overlap);
|
||||
}
|
||||
@@ -250,7 +272,7 @@ private:
|
||||
return new_map;
|
||||
}
|
||||
|
||||
void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
|
||||
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
|
||||
std::vector<MapInterval>& overlaps) {
|
||||
const IntervalType base_interval{start, end};
|
||||
IntervalSet interval_set{};
|
||||
@@ -262,13 +284,15 @@ private:
|
||||
for (auto& interval : interval_set) {
|
||||
std::size_t size = interval.upper() - interval.lower();
|
||||
if (size > 0) {
|
||||
u8* host_ptr = FromCacheAddr(interval.lower());
|
||||
UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
|
||||
staging_buffer.resize(size);
|
||||
system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
|
||||
UploadBlockData(block, block->GetOffset(interval.lower()), size,
|
||||
staging_buffer.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
|
||||
std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
@@ -290,8 +314,9 @@ private:
|
||||
void FlushMap(MapInterval map) {
|
||||
std::size_t size = map->GetEnd() - map->GetStart();
|
||||
TBuffer block = blocks[map->GetStart() >> block_page_bits];
|
||||
u8* host_ptr = FromCacheAddr(map->GetStart());
|
||||
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
|
||||
staging_buffer.resize(size);
|
||||
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
|
||||
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
|
||||
map->MarkAsModified(false, 0);
|
||||
}
|
||||
|
||||
@@ -316,14 +341,14 @@ private:
|
||||
TBuffer EnlargeBlock(TBuffer buffer) {
|
||||
const std::size_t old_size = buffer->GetSize();
|
||||
const std::size_t new_size = old_size + block_page_size;
|
||||
const CacheAddr cache_addr = buffer->GetCacheAddr();
|
||||
TBuffer new_buffer = CreateBlock(cache_addr, new_size);
|
||||
const VAddr cpu_addr = buffer->GetCpuAddr();
|
||||
TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
|
||||
CopyBlock(buffer, new_buffer, 0, 0, old_size);
|
||||
buffer->SetEpoch(epoch);
|
||||
pending_destruction.push_back(buffer);
|
||||
const CacheAddr cache_addr_end = cache_addr + new_size - 1;
|
||||
u64 page_start = cache_addr >> block_page_bits;
|
||||
const u64 page_end = cache_addr_end >> block_page_bits;
|
||||
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
@@ -334,9 +359,9 @@ private:
|
||||
TBuffer MergeBlocks(TBuffer first, TBuffer second) {
|
||||
const std::size_t size_1 = first->GetSize();
|
||||
const std::size_t size_2 = second->GetSize();
|
||||
const CacheAddr first_addr = first->GetCacheAddr();
|
||||
const CacheAddr second_addr = second->GetCacheAddr();
|
||||
const CacheAddr new_addr = std::min(first_addr, second_addr);
|
||||
const VAddr first_addr = first->GetCpuAddr();
|
||||
const VAddr second_addr = second->GetCpuAddr();
|
||||
const VAddr new_addr = std::min(first_addr, second_addr);
|
||||
const std::size_t new_size = size_1 + size_2;
|
||||
TBuffer new_buffer = CreateBlock(new_addr, new_size);
|
||||
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
|
||||
@@ -345,9 +370,9 @@ private:
|
||||
second->SetEpoch(epoch);
|
||||
pending_destruction.push_back(first);
|
||||
pending_destruction.push_back(second);
|
||||
const CacheAddr cache_addr_end = new_addr + new_size - 1;
|
||||
const VAddr cpu_addr_end = new_addr + new_size - 1;
|
||||
u64 page_start = new_addr >> block_page_bits;
|
||||
const u64 page_end = cache_addr_end >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
blocks[page_start] = new_buffer;
|
||||
++page_start;
|
||||
@@ -355,18 +380,18 @@ private:
|
||||
return new_buffer;
|
||||
}
|
||||
|
||||
TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
|
||||
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
|
||||
TBuffer found{};
|
||||
const CacheAddr cache_addr_end = cache_addr + size - 1;
|
||||
u64 page_start = cache_addr >> block_page_bits;
|
||||
const u64 page_end = cache_addr_end >> block_page_bits;
|
||||
const VAddr cpu_addr_end = cpu_addr + size - 1;
|
||||
u64 page_start = cpu_addr >> block_page_bits;
|
||||
const u64 page_end = cpu_addr_end >> block_page_bits;
|
||||
while (page_start <= page_end) {
|
||||
auto it = blocks.find(page_start);
|
||||
if (it == blocks.end()) {
|
||||
if (found) {
|
||||
found = EnlargeBlock(found);
|
||||
} else {
|
||||
const CacheAddr start_addr = (page_start << block_page_bits);
|
||||
const VAddr start_addr = (page_start << block_page_bits);
|
||||
found = CreateBlock(start_addr, block_page_size);
|
||||
blocks[page_start] = found;
|
||||
}
|
||||
@@ -386,7 +411,7 @@ private:
|
||||
return found;
|
||||
}
|
||||
|
||||
void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
|
||||
void MarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
@@ -400,7 +425,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
|
||||
void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
@@ -416,7 +441,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
|
||||
bool IsRegionWritten(const VAddr start, const VAddr end) const {
|
||||
u64 page_start = start >> write_page_bit;
|
||||
const u64 page_end = end >> write_page_bit;
|
||||
while (page_start <= page_end) {
|
||||
@@ -440,8 +465,8 @@ private:
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
|
||||
using IntervalSet = boost::icl::interval_set<CacheAddr>;
|
||||
using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
|
||||
using IntervalSet = boost::icl::interval_set<VAddr>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
|
||||
using IntervalType = typename IntervalCache::interval_type;
|
||||
IntervalCache mapped_addresses;
|
||||
|
||||
@@ -456,6 +481,8 @@ private:
|
||||
u64 epoch = 0;
|
||||
u64 modified_ticks = 0;
|
||||
|
||||
std::vector<u8> staging_buffer;
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
};
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ namespace VideoCommon {
|
||||
|
||||
class MapIntervalBase {
|
||||
public:
|
||||
MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
|
||||
MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
|
||||
: start{start}, end{end}, gpu_addr{gpu_addr} {}
|
||||
|
||||
void SetCpuAddress(VAddr new_cpu_addr) {
|
||||
@@ -26,7 +26,7 @@ public:
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
|
||||
bool IsInside(const VAddr other_start, const VAddr other_end) const {
|
||||
return (start <= other_start && other_end <= end);
|
||||
}
|
||||
|
||||
@@ -46,11 +46,11 @@ public:
|
||||
return is_registered;
|
||||
}
|
||||
|
||||
CacheAddr GetStart() const {
|
||||
VAddr GetStart() const {
|
||||
return start;
|
||||
}
|
||||
|
||||
CacheAddr GetEnd() const {
|
||||
VAddr GetEnd() const {
|
||||
return end;
|
||||
}
|
||||
|
||||
@@ -76,8 +76,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
CacheAddr start;
|
||||
CacheAddr end;
|
||||
VAddr start;
|
||||
VAddr end;
|
||||
GPUVAddr gpu_addr;
|
||||
VAddr cpu_addr{};
|
||||
bool is_written{};
|
||||
|
||||
@@ -1712,6 +1712,7 @@ public:
|
||||
BRK,
|
||||
DEPBAR,
|
||||
VOTE,
|
||||
VOTE_VTG,
|
||||
SHFL,
|
||||
FSWZADD,
|
||||
BFE_C,
|
||||
@@ -1758,6 +1759,7 @@ public:
|
||||
IPA,
|
||||
OUT_R, // Emit vertex/primitive
|
||||
ISBERD,
|
||||
BAR,
|
||||
MEMBAR,
|
||||
VMAD,
|
||||
VSETP,
|
||||
@@ -1842,7 +1844,7 @@ public:
|
||||
MOV_C,
|
||||
MOV_R,
|
||||
MOV_IMM,
|
||||
MOV_SYS,
|
||||
S2R,
|
||||
MOV32_IMM,
|
||||
SHL_C,
|
||||
SHL_R,
|
||||
@@ -2026,6 +2028,7 @@ private:
|
||||
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
|
||||
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
|
||||
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
|
||||
INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
|
||||
INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
|
||||
INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
|
||||
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
|
||||
@@ -2063,6 +2066,7 @@ private:
|
||||
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
|
||||
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
|
||||
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
|
||||
INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
|
||||
INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
|
||||
INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
|
||||
INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
|
||||
@@ -2134,7 +2138,7 @@ private:
|
||||
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
|
||||
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
|
||||
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
|
||||
INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
|
||||
INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
|
||||
INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
|
||||
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
|
||||
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
@@ -16,7 +19,7 @@ enum class OutputTopology : u32 {
|
||||
TriangleStrip = 7,
|
||||
};
|
||||
|
||||
enum class AttributeUse : u8 {
|
||||
enum class PixelImap : u8 {
|
||||
Unused = 0,
|
||||
Constant = 1,
|
||||
Perspective = 2,
|
||||
@@ -24,7 +27,7 @@ enum class AttributeUse : u8 {
|
||||
};
|
||||
|
||||
// Documentation in:
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
|
||||
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
|
||||
struct Header {
|
||||
union {
|
||||
BitField<0, 5, u32> sph_type;
|
||||
@@ -59,8 +62,8 @@ struct Header {
|
||||
union {
|
||||
BitField<0, 12, u32> max_output_vertices;
|
||||
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||
BitField<24, 4, u32> reserved;
|
||||
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
BitField<20, 4, u32> reserved;
|
||||
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||
} common4{};
|
||||
|
||||
union {
|
||||
@@ -93,17 +96,20 @@ struct Header {
|
||||
struct {
|
||||
INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
|
||||
INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
|
||||
|
||||
union {
|
||||
BitField<0, 2, AttributeUse> x;
|
||||
BitField<2, 2, AttributeUse> y;
|
||||
BitField<4, 2, AttributeUse> w;
|
||||
BitField<6, 2, AttributeUse> z;
|
||||
BitField<0, 2, PixelImap> x;
|
||||
BitField<2, 2, PixelImap> y;
|
||||
BitField<4, 2, PixelImap> z;
|
||||
BitField<6, 2, PixelImap> w;
|
||||
u8 raw;
|
||||
} imap_generic_vector[32];
|
||||
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapColor
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
|
||||
INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
|
||||
INSERT_UNION_PADDING_BYTES(2); // ImapReserved
|
||||
|
||||
struct {
|
||||
u32 target;
|
||||
union {
|
||||
@@ -112,31 +118,30 @@ struct Header {
|
||||
BitField<2, 30, u32> reserved;
|
||||
};
|
||||
} omap;
|
||||
|
||||
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
||||
const u32 bit = render_target * 4 + component;
|
||||
return omap.target & (1 << bit);
|
||||
}
|
||||
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
|
||||
return static_cast<AttributeUse>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
|
||||
}
|
||||
AttributeUse GetAttributeUse(u32 attribute) const {
|
||||
AttributeUse result = AttributeUse::Unused;
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
const auto index = GetAttributeIndexUse(attribute, i);
|
||||
if (index == AttributeUse::Unused) {
|
||||
|
||||
PixelImap GetPixelImap(u32 attribute) const {
|
||||
const auto get_index = [this, attribute](u32 index) {
|
||||
return static_cast<PixelImap>(
|
||||
(imap_generic_vector[attribute].raw >> (index * 2)) & 3);
|
||||
};
|
||||
|
||||
std::optional<PixelImap> result;
|
||||
for (u32 component = 0; component < 4; ++component) {
|
||||
const PixelImap index = get_index(component);
|
||||
if (index == PixelImap::Unused) {
|
||||
continue;
|
||||
}
|
||||
if (result == AttributeUse::Unused || result == index) {
|
||||
result = index;
|
||||
continue;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
|
||||
if (index == AttributeUse::Perspective) {
|
||||
result = index;
|
||||
if (result && result != index) {
|
||||
LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
|
||||
}
|
||||
result = index;
|
||||
}
|
||||
return result;
|
||||
return result.value_or(PixelImap::Unused);
|
||||
}
|
||||
} ps;
|
||||
|
||||
|
||||
@@ -270,13 +270,13 @@ public:
|
||||
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
protected:
|
||||
virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
|
||||
|
||||
@@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
gpu_thread.SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
gpu_thread.FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ public:
|
||||
void Start() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
renderer->SwapBuffers(framebuffer);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
|
||||
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
|
||||
@@ -26,9 +26,9 @@ public:
|
||||
void Start() override;
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) override;
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitIdle() const override {}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
|
||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||
PushCommand(FlushRegionCommand(addr, size));
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
system.Renderer().Rasterizer().InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
CacheAddr addr;
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to invalidate a region
|
||||
struct InvalidateRegionCommand final {
|
||||
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
|
||||
|
||||
CacheAddr addr;
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/// Command to signal to the GPU thread to flush and invalidate a region
|
||||
struct FlushAndInvalidateRegionCommand final {
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
|
||||
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
|
||||
: addr{addr}, size{size} {}
|
||||
|
||||
CacheAddr addr;
|
||||
VAddr addr;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
@@ -111,13 +111,13 @@ public:
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
void FlushRegion(CacheAddr addr, u64 size);
|
||||
void FlushRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
void InvalidateRegion(CacheAddr addr, u64 size);
|
||||
void InvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size);
|
||||
|
||||
// Wait until the gpu thread is idle.
|
||||
void WaitIdle() const;
|
||||
|
||||
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
|
||||
ASSERT((gpu_addr & page_mask) == 0);
|
||||
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
|
||||
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||
system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
|
||||
system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
|
||||
|
||||
UnmapRange(gpu_addr, aligned_size);
|
||||
ASSERT(system.CurrentProcess()
|
||||
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
const u8* page_pointer{page_table.pointers[addr >> page_bits]};
|
||||
const u8* page_pointer{GetPointer(addr)};
|
||||
if (page_pointer) {
|
||||
// NOTE: Avoid adding any extra logic to this fast-path block
|
||||
T value;
|
||||
std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
|
||||
std::memcpy(&value, page_pointer, sizeof(T));
|
||||
return value;
|
||||
}
|
||||
|
||||
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
|
||||
return;
|
||||
}
|
||||
|
||||
u8* page_pointer{page_table.pointers[addr >> page_bits]};
|
||||
u8* page_pointer{GetPointer(addr)};
|
||||
if (page_pointer) {
|
||||
// NOTE: Avoid adding any extra logic to this fast-path block
|
||||
std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
|
||||
std::memcpy(page_pointer, &data, sizeof(T));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
|
||||
return {};
|
||||
}
|
||||
|
||||
u8* const page_pointer{page_table.pointers[addr >> page_bits]};
|
||||
if (page_pointer != nullptr) {
|
||||
return page_pointer + (addr & page_mask);
|
||||
auto& memory = system.Memory();
|
||||
|
||||
const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
|
||||
|
||||
if (page_addr != 0) {
|
||||
return memory.GetPointer(page_addr + (addr & page_mask));
|
||||
}
|
||||
|
||||
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
|
||||
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
|
||||
if (page_pointer != nullptr) {
|
||||
return page_pointer + (addr & page_mask);
|
||||
const auto& memory = system.Memory();
|
||||
|
||||
const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
|
||||
|
||||
if (page_addr != 0) {
|
||||
return memory.GetPointer(page_addr + (addr & page_mask));
|
||||
}
|
||||
|
||||
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
|
||||
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
|
||||
std::size_t page_index{src_addr >> page_bits};
|
||||
std::size_t page_offset{src_addr & page_mask};
|
||||
|
||||
auto& memory = system.Memory();
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case Common::PageType::Memory: {
|
||||
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
|
||||
const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
|
||||
// Flush must happen on the rasterizer interface, such that memory is always synchronous
|
||||
// when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
|
||||
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
rasterizer.FlushRegion(src_addr, copy_amount);
|
||||
memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
|
||||
std::size_t page_index{src_addr >> page_bits};
|
||||
std::size_t page_offset{src_addr & page_mask};
|
||||
|
||||
auto& memory = system.Memory();
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
const u8* page_pointer = page_table.pointers[page_index];
|
||||
if (page_pointer) {
|
||||
const u8* src_ptr{page_pointer + page_offset};
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
|
||||
memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
|
||||
} else {
|
||||
std::memset(dest_buffer, 0, copy_amount);
|
||||
}
|
||||
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
|
||||
std::size_t page_index{dest_addr >> page_bits};
|
||||
std::size_t page_offset{dest_addr & page_mask};
|
||||
|
||||
auto& memory = system.Memory();
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case Common::PageType::Memory: {
|
||||
u8* dest_ptr{page_table.pointers[page_index] + page_offset};
|
||||
const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
|
||||
// Invalidate must happen on the rasterizer interface, such that memory is always
|
||||
// synchronous when it is written (even when in asynchronous GPU mode).
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
rasterizer.InvalidateRegion(dest_addr, copy_amount);
|
||||
memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
|
||||
std::size_t page_index{dest_addr >> page_bits};
|
||||
std::size_t page_offset{dest_addr & page_mask};
|
||||
|
||||
auto& memory = system.Memory();
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
u8* page_pointer = page_table.pointers[page_index];
|
||||
if (page_pointer) {
|
||||
u8* dest_ptr{page_pointer + page_offset};
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
|
||||
memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
|
||||
}
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
|
||||
}
|
||||
|
||||
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{src_addr >> page_bits};
|
||||
std::size_t page_offset{src_addr & page_mask};
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
|
||||
switch (page_table.attributes[page_index]) {
|
||||
case Common::PageType::Memory: {
|
||||
// Flush must happen on the rasterizer interface, such that memory is always synchronous
|
||||
// when it is copied (even when in asynchronous GPU mode).
|
||||
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
|
||||
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
|
||||
WriteBlock(dest_addr, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
dest_addr += static_cast<VAddr>(copy_amount);
|
||||
src_addr += static_cast<VAddr>(copy_amount);
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
std::vector<u8> tmp_buffer(size);
|
||||
ReadBlock(src_addr, tmp_buffer.data(), size);
|
||||
WriteBlock(dest_addr, tmp_buffer.data(), size);
|
||||
}
|
||||
|
||||
void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
|
||||
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
|
||||
WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
|
||||
}
|
||||
|
||||
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
|
||||
const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
|
||||
const std::size_t page = (addr & Memory::PAGE_MASK) + size;
|
||||
return page <= Memory::PAGE_SIZE;
|
||||
}
|
||||
|
||||
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
|
||||
VAddr backing_addr) {
|
||||
LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
|
||||
|
||||
@@ -97,6 +97,11 @@ public:
|
||||
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
|
||||
|
||||
/**
|
||||
* IsGranularRange checks if a gpu region can be simply read with a pointer
|
||||
*/
|
||||
bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
|
||||
|
||||
private:
|
||||
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
|
||||
using VMAHandle = VMAMap::const_iterator;
|
||||
|
||||
@@ -98,12 +98,12 @@ public:
|
||||
static_cast<QueryCache&>(*this),
|
||||
VideoCore::QueryType::SamplesPassed}}} {}
|
||||
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
std::unique_lock lock{mutex};
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::unique_lock lock{mutex};
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
@@ -117,14 +117,16 @@ public:
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
|
||||
std::unique_lock lock{mutex};
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr_opt);
|
||||
VAddr cpu_addr = *cpu_addr_opt;
|
||||
|
||||
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||
CachedQuery* query = TryGet(cpu_addr);
|
||||
if (!query) {
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
|
||||
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
|
||||
query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
|
||||
}
|
||||
|
||||
query->BindCounter(Stream(type).Current(), timestamp);
|
||||
@@ -173,11 +175,11 @@ protected:
|
||||
|
||||
private:
|
||||
/// Flushes a memory range to guest memory and removes it from the cache.
|
||||
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
||||
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
|
||||
const u64 addr_begin = static_cast<u64>(addr);
|
||||
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||
const u64 cache_begin = query.GetCacheAddr();
|
||||
const u64 cache_begin = query.GetCpuAddr();
|
||||
const u64 cache_end = cache_begin + query.SizeInBytes();
|
||||
return cache_begin < addr_end && addr_begin < cache_end;
|
||||
};
|
||||
@@ -193,7 +195,7 @@ private:
|
||||
if (!in_range(query)) {
|
||||
continue;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
|
||||
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
|
||||
query.Flush();
|
||||
}
|
||||
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||
@@ -204,22 +206,21 @@ private:
|
||||
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
||||
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
||||
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
||||
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
|
||||
const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
|
||||
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
||||
host_ptr);
|
||||
}
|
||||
|
||||
/// Tries to a get a cached query. Returns nullptr on failure.
|
||||
CachedQuery* TryGet(CacheAddr addr) {
|
||||
CachedQuery* TryGet(VAddr addr) {
|
||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||
const auto it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
const auto found =
|
||||
std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](auto& query) { return query.GetCacheAddr() == addr; });
|
||||
const auto found = std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](auto& query) { return query.GetCpuAddr() == addr; });
|
||||
return found != std::end(contents) ? &*found : nullptr;
|
||||
}
|
||||
|
||||
@@ -323,14 +324,10 @@ public:
|
||||
timestamp = timestamp_;
|
||||
}
|
||||
|
||||
VAddr CpuAddr() const noexcept {
|
||||
VAddr GetCpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
CacheAddr GetCacheAddr() const noexcept {
|
||||
return ToCacheAddr(host_ptr);
|
||||
}
|
||||
|
||||
u64 SizeInBytes() const noexcept {
|
||||
return SizeInBytes(timestamp.has_value());
|
||||
}
|
||||
|
||||
@@ -18,22 +18,14 @@
|
||||
|
||||
class RasterizerCacheObject {
|
||||
public:
|
||||
explicit RasterizerCacheObject(const u8* host_ptr)
|
||||
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
|
||||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
|
||||
|
||||
virtual ~RasterizerCacheObject();
|
||||
|
||||
CacheAddr GetCacheAddr() const {
|
||||
return cache_addr;
|
||||
VAddr GetCpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
const u8* GetHostPtr() const {
|
||||
return host_ptr;
|
||||
}
|
||||
|
||||
/// Gets the address of the shader in guest memory, required for cache management
|
||||
virtual VAddr GetCpuAddr() const = 0;
|
||||
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
virtual std::size_t GetSizeInBytes() const = 0;
|
||||
|
||||
@@ -68,8 +60,7 @@ private:
|
||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
||||
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
|
||||
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
|
||||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
|
||||
};
|
||||
|
||||
template <class T>
|
||||
@@ -80,7 +71,7 @@ public:
|
||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
||||
|
||||
/// Write any cached resources overlapping the specified region back to memory
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
@@ -90,7 +81,7 @@ public:
|
||||
}
|
||||
|
||||
/// Mark the specified region as being invalidated
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
@@ -114,27 +105,20 @@ public:
|
||||
|
||||
protected:
|
||||
/// Tries to get an object from the cache with the specified cache address
|
||||
T TryGet(CacheAddr addr) const {
|
||||
T TryGet(VAddr addr) const {
|
||||
const auto iter = map_cache.find(addr);
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
T TryGet(const void* addr) const {
|
||||
const auto iter = map_cache.find(ToCacheAddr(addr));
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Register an object into the cache
|
||||
virtual void Register(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
object->SetIsRegistered(true);
|
||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.insert({object->GetCacheAddr(), object});
|
||||
map_cache.insert({object->GetCpuAddr(), object});
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
||||
}
|
||||
|
||||
@@ -144,7 +128,7 @@ protected:
|
||||
|
||||
object->SetIsRegistered(false);
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||
const CacheAddr addr = object->GetCacheAddr();
|
||||
const VAddr addr = object->GetCpuAddr();
|
||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.erase(addr);
|
||||
}
|
||||
@@ -173,7 +157,7 @@ protected:
|
||||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
@@ -197,13 +181,13 @@ private:
|
||||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<CacheAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
using ObjectInterval = typename IntervalCache::interval_type;
|
||||
|
||||
static auto GetInterval(const T& object) {
|
||||
return ObjectInterval::right_open(object->GetCacheAddr(),
|
||||
object->GetCacheAddr() + object->GetSizeInBytes());
|
||||
return ObjectInterval::right_open(object->GetCpuAddr(),
|
||||
object->GetCpuAddr() + object->GetSizeInBytes());
|
||||
}
|
||||
|
||||
ObjectCache map_cache;
|
||||
|
||||
@@ -53,14 +53,14 @@ public:
|
||||
virtual void FlushAll() = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify the rasterizer to send all written commands to the host GPU.
|
||||
virtual void FlushCommands() = 0;
|
||||
|
||||
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
|
||||
: VideoCommon::BufferBlock{cache_addr, size} {
|
||||
CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
gl_buffer.Create();
|
||||
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
|
||||
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
|
||||
}
|
||||
|
||||
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(cache_addr, size);
|
||||
Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(cpu_addr, size);
|
||||
}
|
||||
|
||||
void OGLBufferCache::WriteBarrier() {
|
||||
|
||||
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
|
||||
|
||||
class CachedBufferBlock : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
|
||||
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
|
||||
const GLuint* GetHandle() const {
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
void WriteBarrier() override;
|
||||
|
||||
|
||||
@@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
||||
return bindings;
|
||||
}
|
||||
|
||||
bool IsASTCSupported() {
|
||||
static constexpr std::array formats = {
|
||||
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
|
||||
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
|
||||
};
|
||||
return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
|
||||
GLint supported;
|
||||
glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
|
||||
&supported);
|
||||
return supported == GL_TRUE;
|
||||
}) == formats.end();
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
@@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
|
||||
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
|
||||
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
|
||||
has_astc = IsASTCSupported();
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = is_amd;
|
||||
has_precise_bug = TestPreciseBug();
|
||||
|
||||
@@ -64,6 +64,10 @@ public:
|
||||
return has_image_load_formatted;
|
||||
}
|
||||
|
||||
bool HasASTC() const {
|
||||
return has_astc;
|
||||
}
|
||||
|
||||
bool HasVariableAoffi() const {
|
||||
return has_variable_aoffi;
|
||||
}
|
||||
@@ -97,6 +101,7 @@ private:
|
||||
bool has_shader_ballot{};
|
||||
bool has_vertex_viewport_layer{};
|
||||
bool has_image_load_formatted{};
|
||||
bool has_astc{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
bool has_precise_bug{};
|
||||
|
||||
@@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
|
||||
View depth_surface = texture_cache.GetDepthBufferSurface(true);
|
||||
View depth_surface = texture_cache.GetDepthBufferSurface();
|
||||
|
||||
const auto& regs = gpu.regs;
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
@@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
FramebufferCacheKey key;
|
||||
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
|
||||
for (std::size_t index = 0; index < colors_count; ++index) {
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index)};
|
||||
if (!color_surface) {
|
||||
continue;
|
||||
}
|
||||
@@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface;
|
||||
if (using_color_fb) {
|
||||
color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
|
||||
const std::size_t index = regs.clear_buffers.RT;
|
||||
color_surface = texture_cache.GetColorBufferSurface(index);
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
}
|
||||
View depth_surface;
|
||||
if (using_depth_fb || using_stencil_fb) {
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(false);
|
||||
depth_surface = texture_cache.GetDepthBufferSurface();
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
@@ -653,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
|
||||
void RasterizerOpenGL::FlushAll() {}
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (!addr || !size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
@@ -663,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
if (!addr || !size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
@@ -674,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
FlushRegion(addr, size);
|
||||
}
|
||||
@@ -713,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto surface{
|
||||
texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
if (!surface) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -65,9 +65,9 @@ public:
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
|
||||
@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
|
||||
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
|
||||
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
|
||||
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
||||
size_in_bytes{size_in_bytes}, program{std::move(program)} {}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
|
||||
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
|
||||
size_in_bytes, std::move(registry),
|
||||
MakeEntries(ir), std::move(program)));
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
||||
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
||||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
|
||||
size_in_bytes, std::move(registry),
|
||||
MakeEntries(ir), std::move(program)));
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader,
|
||||
std::size_t size_in_bytes) {
|
||||
return std::shared_ptr<CachedShader>(new CachedShader(
|
||||
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
|
||||
precompiled_shader.entries, precompiled_shader.program));
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
|
||||
precompiled_shader.entries, precompiled_shader.program));
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
@@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
const GPUVAddr address{GetShaderAddress(system, program)};
|
||||
|
||||
// Look up shader in the cache based on address
|
||||
const auto host_ptr{memory_manager.GetPointer(address)};
|
||||
Shader shader{TryGet(host_ptr)};
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
|
||||
if (shader) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
const auto host_ptr{memory_manager.GetPointer(address)};
|
||||
|
||||
// No shader found - create a new one
|
||||
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
|
||||
ProgramCode code_b;
|
||||
@@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
|
||||
const auto unique_identifier = GetUniqueIdentifier(
|
||||
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
@@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
|
||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const auto host_ptr{memory_manager.GetPointer(code_addr)};
|
||||
auto kernel = TryGet(host_ptr);
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
||||
|
||||
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
||||
if (kernel) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
const auto host_ptr{memory_manager.GetPointer(code_addr)};
|
||||
// No kernel found, create a new one
|
||||
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
|
||||
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
|
||||
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
|
||||
@@ -65,11 +65,6 @@ public:
|
||||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
|
||||
/// Returns the guest CPU address of the shader
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
/// Returns the size in bytes of the shader
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size_in_bytes;
|
||||
@@ -90,13 +85,12 @@ public:
|
||||
std::size_t size_in_bytes);
|
||||
|
||||
private:
|
||||
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
VAddr cpu_addr = 0;
|
||||
std::size_t size_in_bytes = 0;
|
||||
std::shared_ptr<OGLProgram> program;
|
||||
};
|
||||
|
||||
@@ -31,11 +31,11 @@ namespace {
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::AttributeUse;
|
||||
using Tegra::Shader::Header;
|
||||
using Tegra::Shader::IpaInterpMode;
|
||||
using Tegra::Shader::IpaMode;
|
||||
using Tegra::Shader::IpaSampleMode;
|
||||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using VideoCommon::Shader::BuildTransformFeedback;
|
||||
using VideoCommon::Shader::Registry;
|
||||
@@ -702,20 +702,19 @@ private:
|
||||
code.AddNewLine();
|
||||
}
|
||||
|
||||
std::string GetInputFlags(AttributeUse attribute) {
|
||||
const char* GetInputFlags(PixelImap attribute) {
|
||||
switch (attribute) {
|
||||
case AttributeUse::Perspective:
|
||||
// Default, Smooth
|
||||
return {};
|
||||
case AttributeUse::Constant:
|
||||
return "flat ";
|
||||
case AttributeUse::ScreenLinear:
|
||||
return "noperspective ";
|
||||
default:
|
||||
case AttributeUse::Unused:
|
||||
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
|
||||
return {};
|
||||
case PixelImap::Perspective:
|
||||
return "smooth";
|
||||
case PixelImap::Constant:
|
||||
return "flat";
|
||||
case PixelImap::ScreenLinear:
|
||||
return "noperspective";
|
||||
case PixelImap::Unused:
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
|
||||
return {};
|
||||
}
|
||||
|
||||
void DeclareInputAttributes() {
|
||||
@@ -749,8 +748,8 @@ private:
|
||||
|
||||
std::string suffix;
|
||||
if (stage == ShaderType::Fragment) {
|
||||
const auto input_mode{header.ps.GetAttributeUse(location)};
|
||||
if (skip_unused && input_mode == AttributeUse::Unused) {
|
||||
const auto input_mode{header.ps.GetPixelImap(location)};
|
||||
if (input_mode == PixelImap::Unused) {
|
||||
return;
|
||||
}
|
||||
suffix = GetInputFlags(input_mode);
|
||||
@@ -927,7 +926,7 @@ private:
|
||||
const u32 address{generic_base + index * generic_stride + element * element_stride};
|
||||
|
||||
const bool declared = stage != ShaderType::Fragment ||
|
||||
header.ps.GetAttributeUse(index) != AttributeUse::Unused;
|
||||
header.ps.GetPixelImap(index) != PixelImap::Unused;
|
||||
const std::string value =
|
||||
declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
|
||||
code.AddLine("case 0x{:X}U: return {};", address, value);
|
||||
@@ -1142,8 +1141,7 @@ private:
|
||||
GetSwizzle(element)),
|
||||
Type::Float};
|
||||
case ShaderType::Fragment:
|
||||
return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
|
||||
Type::Float};
|
||||
return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource;
|
||||
using VideoCore::MortonSwizzleMode;
|
||||
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceCompression;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
@@ -37,102 +36,100 @@ namespace {
|
||||
|
||||
struct FormatTuple {
|
||||
GLint internal_format;
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
bool compressed;
|
||||
GLenum format = GL_NONE;
|
||||
GLenum type = GL_NONE;
|
||||
};
|
||||
|
||||
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
|
||||
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
|
||||
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
|
||||
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F
|
||||
{GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F
|
||||
{GL_R32F, GL_RED, GL_FLOAT, false}, // R32F
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
|
||||
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
|
||||
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
|
||||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
|
||||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
|
||||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
|
||||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
|
||||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
|
||||
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
|
||||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
|
||||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
|
||||
{GL_COMPRESSED_RED_RGTC1}, // DXN1
|
||||
{GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
|
||||
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
|
||||
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
|
||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
|
||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
|
||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
|
||||
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
|
||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
|
||||
{GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
|
||||
{GL_R32F, GL_RED, GL_FLOAT}, // R32F
|
||||
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
|
||||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
|
||||
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
|
||||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
|
||||
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
|
||||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
|
||||
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
|
||||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
|
||||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
|
||||
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
|
||||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
|
||||
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
|
||||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
|
||||
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
|
||||
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
|
||||
// Compressed sRGB formats
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB
|
||||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
|
||||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
|
||||
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
|
||||
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
|
||||
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
|
||||
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
|
||||
|
||||
// Depth formats
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
|
||||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
|
||||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
|
||||
|
||||
// DepthStencil formats
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
|
||||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
|
||||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
|
||||
}};
|
||||
|
||||
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
|
||||
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
|
||||
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
|
||||
return format;
|
||||
return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
|
||||
}
|
||||
|
||||
GLenum GetTextureTarget(const SurfaceTarget& target) {
|
||||
@@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
|
||||
: VideoCommon::SurfaceBase<View>(gpu_addr, params) {
|
||||
const auto& tuple{GetFormatTuple(params.pixel_format)};
|
||||
internal_format = tuple.internal_format;
|
||||
format = tuple.format;
|
||||
type = tuple.type;
|
||||
is_compressed = tuple.compressed;
|
||||
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool is_astc_supported)
|
||||
: VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
|
||||
if (is_converted) {
|
||||
internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
||||
format = GL_RGBA;
|
||||
type = GL_UNSIGNED_BYTE;
|
||||
} else {
|
||||
const auto& tuple{GetFormatTuple(params.pixel_format)};
|
||||
internal_format = tuple.internal_format;
|
||||
format = tuple.format;
|
||||
type = tuple.type;
|
||||
is_compressed = params.IsCompressed();
|
||||
}
|
||||
target = GetTextureTarget(params.target);
|
||||
texture = CreateTexture(params, target, internal_format, texture_buffer);
|
||||
DecorateSurfaceName();
|
||||
@@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
|
||||
|
||||
if (params.IsBuffer()) {
|
||||
glGetNamedBufferSubData(texture_buffer.handle, 0,
|
||||
static_cast<GLsizeiptr>(params.GetHostSizeInBytes()),
|
||||
static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
|
||||
staging_buffer.data());
|
||||
return;
|
||||
}
|
||||
@@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
|
||||
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
|
||||
|
||||
for (u32 level = 0; level < params.emulated_levels; ++level) {
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
|
||||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
|
||||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
|
||||
|
||||
u8* const mip_data = staging_buffer.data() + mip_offset;
|
||||
const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
|
||||
if (is_compressed) {
|
||||
@@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
|
||||
}
|
||||
|
||||
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
|
||||
|
||||
auto compression_type = params.GetCompressionType();
|
||||
|
||||
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
|
||||
? params.GetConvertedMipmapOffset(level)
|
||||
: params.GetHostMipmapLevelOffset(level);
|
||||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
|
||||
const u8* buffer{staging_buffer.data() + mip_offset};
|
||||
if (is_compressed) {
|
||||
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
|
||||
@@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
|
||||
VideoCore::RasterizerInterface& rasterizer,
|
||||
const Device& device, StateTracker& state_tracker)
|
||||
: TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
|
||||
: TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
|
||||
src_framebuffer.Create();
|
||||
dst_framebuffer.Create();
|
||||
}
|
||||
@@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
|
||||
TextureCacheOpenGL::~TextureCacheOpenGL() = default;
|
||||
|
||||
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
|
||||
return std::make_shared<CachedSurface>(gpu_addr, params);
|
||||
return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
|
||||
}
|
||||
|
||||
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
|
||||
@@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
|
||||
|
||||
if (source_format.compressed) {
|
||||
if (src_surface->IsCompressed()) {
|
||||
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
|
||||
nullptr);
|
||||
} else {
|
||||
@@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
|
||||
const GLsizei width = static_cast<GLsizei>(dst_params.width);
|
||||
const GLsizei height = static_cast<GLsizei>(dst_params.height);
|
||||
const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
|
||||
if (dest_format.compressed) {
|
||||
if (dst_surface->IsCompressed()) {
|
||||
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
|
||||
UNREACHABLE();
|
||||
} else {
|
||||
|
||||
@@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
|
||||
friend CachedSurfaceView;
|
||||
|
||||
public:
|
||||
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
|
||||
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
|
||||
~CachedSurface();
|
||||
|
||||
void UploadTexture(const std::vector<u8>& staging_buffer) override;
|
||||
@@ -51,6 +51,10 @@ public:
|
||||
return texture.handle;
|
||||
}
|
||||
|
||||
bool IsCompressed() const {
|
||||
return is_compressed;
|
||||
}
|
||||
|
||||
protected:
|
||||
void DecorateSurfaceName() override;
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>;
|
||||
using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
|
||||
using UniqueImage = UniqueHandle<vk::Image>;
|
||||
using UniqueImageView = UniqueHandle<vk::ImageView>;
|
||||
using UniqueInstance = UniqueHandle<vk::Instance>;
|
||||
using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
|
||||
using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
|
||||
using UniquePipeline = UniqueHandle<vk::Pipeline>;
|
||||
@@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>;
|
||||
using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
|
||||
using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
|
||||
using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
|
||||
using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>;
|
||||
using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
|
||||
using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
|
||||
using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
|
||||
|
||||
@@ -2,13 +2,18 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/dynamic_library.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/telemetry.h"
|
||||
#include "core/core.h"
|
||||
@@ -30,15 +35,30 @@
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
// ensure include order
|
||||
#include <vulkan/vulkan_win32.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <X11/Xlib.h>
|
||||
#include <vulkan/vulkan_wayland.h>
|
||||
#include <vulkan/vulkan_xlib.h>
|
||||
#endif
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
using Core::Frontend::WindowSystemType;
|
||||
|
||||
VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
|
||||
VkDebugUtilsMessageTypeFlagsEXT type,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT* data,
|
||||
[[maybe_unused]] void* user_data) {
|
||||
const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
|
||||
const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)};
|
||||
const char* message{data->pMessage};
|
||||
|
||||
if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
|
||||
@@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
Common::DynamicLibrary OpenVulkanLibrary() {
|
||||
Common::DynamicLibrary library;
|
||||
#ifdef __APPLE__
|
||||
// Check if a path to a specific Vulkan library has been specified.
|
||||
char* libvulkan_env = getenv("LIBVULKAN_PATH");
|
||||
if (!libvulkan_env || !library.Open(libvulkan_env)) {
|
||||
// Use the libvulkan.dylib from the application bundle.
|
||||
std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
|
||||
library.Open(filename.c_str());
|
||||
}
|
||||
#else
|
||||
std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
|
||||
if (!library.Open(filename.c_str())) {
|
||||
// Android devices may not have libvulkan.so.1, only libvulkan.so.
|
||||
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
|
||||
library.Open(filename.c_str());
|
||||
}
|
||||
#endif
|
||||
return library;
|
||||
}
|
||||
|
||||
UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld,
|
||||
WindowSystemType window_type = WindowSystemType::Headless,
|
||||
bool enable_layers = false) {
|
||||
if (!library.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
|
||||
return UniqueInstance{};
|
||||
}
|
||||
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr;
|
||||
if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) {
|
||||
LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
|
||||
return UniqueInstance{};
|
||||
}
|
||||
dld.init(vkGetInstanceProcAddr);
|
||||
|
||||
std::vector<const char*> extensions;
|
||||
extensions.reserve(4);
|
||||
switch (window_type) {
|
||||
case Core::Frontend::WindowSystemType::Headless:
|
||||
break;
|
||||
#ifdef _WIN32
|
||||
case Core::Frontend::WindowSystemType::Windows:
|
||||
extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
case Core::Frontend::WindowSystemType::X11:
|
||||
extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
case Core::Frontend::WindowSystemType::Wayland:
|
||||
extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
break;
|
||||
}
|
||||
if (window_type != Core::Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
if (enable_layers) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
u32 num_properties;
|
||||
if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) !=
|
||||
vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties");
|
||||
return UniqueInstance{};
|
||||
}
|
||||
std::vector<vk::ExtensionProperties> properties(num_properties);
|
||||
if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(),
|
||||
dld) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
|
||||
return UniqueInstance{};
|
||||
}
|
||||
|
||||
for (const char* extension : extensions) {
|
||||
const auto it =
|
||||
std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) {
|
||||
return !std::strcmp(extension, prop.extensionName);
|
||||
});
|
||||
if (it == properties.end()) {
|
||||
LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
|
||||
return UniqueInstance{};
|
||||
}
|
||||
}
|
||||
|
||||
const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
|
||||
"yuzu Emulator", VK_MAKE_VERSION(0, 1, 0),
|
||||
VK_API_VERSION_1_1);
|
||||
const std::array layers = {"VK_LAYER_LUNARG_standard_validation"};
|
||||
const vk::InstanceCreateInfo instance_ci(
|
||||
{}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(),
|
||||
static_cast<u32>(extensions.size()), extensions.data());
|
||||
vk::Instance unsafe_instance;
|
||||
if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
|
||||
return UniqueInstance{};
|
||||
}
|
||||
dld.init(unsafe_instance);
|
||||
return UniqueInstance(unsafe_instance, {nullptr, dld});
|
||||
}
|
||||
|
||||
std::string GetReadableVersion(u32 version) {
|
||||
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
|
||||
VK_VERSION_PATCH(version));
|
||||
@@ -147,27 +271,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
|
||||
}
|
||||
|
||||
bool RendererVulkan::Init() {
|
||||
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
|
||||
render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
|
||||
const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
|
||||
|
||||
std::optional<vk::DebugUtilsMessengerEXT> callback;
|
||||
if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
|
||||
callback = CreateDebugCallback(dldi);
|
||||
if (!callback) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!PickDevices(dldi)) {
|
||||
if (callback) {
|
||||
instance.destroy(*callback, nullptr, dldi);
|
||||
}
|
||||
library = OpenVulkanLibrary();
|
||||
instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
|
||||
Settings::values.renderer_debug);
|
||||
if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
|
||||
return false;
|
||||
}
|
||||
debug_callback = UniqueDebugUtilsMessengerEXT(
|
||||
*callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
|
||||
instance, nullptr, device->GetDispatchLoader()));
|
||||
|
||||
Report();
|
||||
|
||||
@@ -176,7 +285,7 @@ bool RendererVulkan::Init() {
|
||||
resource_manager = std::make_unique<VKResourceManager>(*device);
|
||||
|
||||
const auto& framebuffer = render_window.GetFramebufferLayout();
|
||||
swapchain = std::make_unique<VKSwapchain>(surface, *device);
|
||||
swapchain = std::make_unique<VKSwapchain>(*surface, *device);
|
||||
swapchain->Create(framebuffer.width, framebuffer.height, false);
|
||||
|
||||
state_tracker = std::make_unique<StateTracker>(system);
|
||||
@@ -213,8 +322,10 @@ void RendererVulkan::ShutDown() {
|
||||
device.reset();
|
||||
}
|
||||
|
||||
std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
|
||||
const vk::DispatchLoaderDynamic& dldi) {
|
||||
bool RendererVulkan::CreateDebugCallback() {
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return true;
|
||||
}
|
||||
const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
|
||||
{},
|
||||
vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
|
||||
@@ -225,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
|
||||
vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
|
||||
vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
|
||||
&DebugCallback, nullptr);
|
||||
vk::DebugUtilsMessengerEXT callback;
|
||||
if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
|
||||
vk::DebugUtilsMessengerEXT unsafe_callback;
|
||||
if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) !=
|
||||
vk::Result::eSuccess) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
|
||||
return {};
|
||||
return false;
|
||||
}
|
||||
return callback;
|
||||
debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto devices = instance.enumeratePhysicalDevices(dldi);
|
||||
bool RendererVulkan::CreateSurface() {
|
||||
[[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
|
||||
VkSurfaceKHR unsafe_surface = nullptr;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
|
||||
const HWND hWnd = static_cast<HWND>(window_info.render_surface);
|
||||
const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
|
||||
nullptr, 0, nullptr, hWnd};
|
||||
const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
|
||||
dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
|
||||
if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr,
|
||||
&unsafe_surface) != VK_SUCCESS) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
if (window_info.type == Core::Frontend::WindowSystemType::X11) {
|
||||
const VkXlibSurfaceCreateInfoKHR xlib_ci{
|
||||
VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
|
||||
static_cast<Display*>(window_info.display_connection),
|
||||
reinterpret_cast<Window>(window_info.render_surface)};
|
||||
const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
|
||||
dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
|
||||
if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr,
|
||||
&unsafe_surface) != VK_SUCCESS) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
|
||||
const VkWaylandSurfaceCreateInfoKHR wayland_ci{
|
||||
VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
|
||||
static_cast<wl_display*>(window_info.display_connection),
|
||||
static_cast<wl_surface*>(window_info.render_surface)};
|
||||
const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
|
||||
dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
|
||||
if (!vkCreateWaylandSurfaceKHR ||
|
||||
vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) !=
|
||||
VK_SUCCESS) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!unsafe_surface) {
|
||||
LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
|
||||
return false;
|
||||
}
|
||||
|
||||
surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RendererVulkan::PickDevices() {
|
||||
const auto devices = instance->enumeratePhysicalDevices(dld);
|
||||
|
||||
// TODO(Rodrigo): Choose device from config file
|
||||
const s32 device_index = Settings::values.vulkan_device;
|
||||
if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
|
||||
return false;
|
||||
}
|
||||
const vk::PhysicalDevice physical_device = devices[device_index];
|
||||
const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)];
|
||||
|
||||
if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
|
||||
if (!VKDevice::IsSuitable(physical_device, *surface, dld)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
device = std::make_unique<VKDevice>(dldi, physical_device, surface);
|
||||
return device->Create(dldi, instance);
|
||||
device = std::make_unique<VKDevice>(dld, physical_device, *surface);
|
||||
return device->Create(*instance);
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
@@ -276,4 +443,33 @@ void RendererVulkan::Report() const {
|
||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||
}
|
||||
|
||||
std::vector<std::string> RendererVulkan::EnumerateDevices() {
|
||||
// Avoid putting DispatchLoaderDynamic, it's too large
|
||||
auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>();
|
||||
auto& dld = *dld_memory;
|
||||
|
||||
Common::DynamicLibrary library = OpenVulkanLibrary();
|
||||
UniqueInstance instance = CreateInstance(library, dld);
|
||||
if (!instance) {
|
||||
return {};
|
||||
}
|
||||
|
||||
u32 num_devices;
|
||||
if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) {
|
||||
return {};
|
||||
}
|
||||
std::vector<vk::PhysicalDevice> devices(num_devices);
|
||||
if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) !=
|
||||
vk::Result::eSuccess) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<std::string> names;
|
||||
names.reserve(num_devices);
|
||||
for (auto& device : devices) {
|
||||
names.push_back(device.getProperties(dld).deviceName);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -6,8 +6,11 @@
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
|
||||
@@ -44,18 +47,24 @@ public:
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
|
||||
bool TryPresent(int timeout_ms) override;
|
||||
|
||||
private:
|
||||
std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
|
||||
const vk::DispatchLoaderDynamic& dldi);
|
||||
static std::vector<std::string> EnumerateDevices();
|
||||
|
||||
bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
|
||||
private:
|
||||
bool CreateDebugCallback();
|
||||
|
||||
bool CreateSurface();
|
||||
|
||||
bool PickDevices();
|
||||
|
||||
void Report() const;
|
||||
|
||||
Core::System& system;
|
||||
|
||||
vk::Instance instance;
|
||||
vk::SurfaceKHR surface;
|
||||
Common::DynamicLibrary library;
|
||||
vk::DispatchLoaderDynamic dld;
|
||||
|
||||
UniqueInstance instance;
|
||||
UniqueSurfaceKHR surface;
|
||||
|
||||
VKScreenInfo screen_info;
|
||||
|
||||
|
||||
@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
CacheAddr cache_addr, std::size_t size)
|
||||
: VideoCommon::BufferBlock{cache_addr, size} {
|
||||
VAddr cpu_addr, std::size_t size)
|
||||
: VideoCommon::BufferBlock{cpu_addr, size} {
|
||||
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
|
||||
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst,
|
||||
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
|
||||
Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
|
||||
}
|
||||
|
||||
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
|
||||
|
||||
@@ -30,7 +30,7 @@ class VKScheduler;
|
||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
CacheAddr cache_addr, std::size_t size);
|
||||
VAddr cpu_addr, std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
|
||||
const vk::Buffer* GetHandle() const {
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
protected:
|
||||
void WriteBarrier() override {}
|
||||
|
||||
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
|
||||
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
|
||||
|
||||
const vk::Buffer* ToHandle(const Buffer& buffer) override;
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
@@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
|
||||
vk::PhysicalDeviceFeatures2 features;
|
||||
T extension_features;
|
||||
features.pNext = &extension_features;
|
||||
physical.getFeatures2(&features, dldi);
|
||||
physical.getFeatures2(&features, dld);
|
||||
return extension_features;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
|
||||
T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) {
|
||||
vk::PhysicalDeviceProperties2 properties;
|
||||
T extension_properties;
|
||||
properties.pNext = &extension_properties;
|
||||
physical.getProperties2(&properties, dldi);
|
||||
physical.getProperties2(&properties, dld);
|
||||
return extension_properties;
|
||||
}
|
||||
|
||||
@@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface)
|
||||
: physical{physical}, properties{physical.getProperties(dldi)},
|
||||
format_properties{GetFormatProperties(dldi, physical)} {
|
||||
SetupFamilies(dldi, surface);
|
||||
SetupFeatures(dldi);
|
||||
: dld{dld}, physical{physical}, properties{physical.getProperties(dld)},
|
||||
format_properties{GetFormatProperties(dld, physical)} {
|
||||
SetupFamilies(surface);
|
||||
SetupFeatures();
|
||||
}
|
||||
|
||||
VKDevice::~VKDevice() = default;
|
||||
|
||||
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
|
||||
bool VKDevice::Create(vk::Instance instance) {
|
||||
const auto queue_cis = GetDeviceQueueCreateInfos();
|
||||
const std::vector extensions = LoadExtensions(dldi);
|
||||
const std::vector extensions = LoadExtensions();
|
||||
|
||||
vk::PhysicalDeviceFeatures2 features2;
|
||||
void** next = &features2.pNext;
|
||||
@@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||
nullptr);
|
||||
device_ci.pNext = &features2;
|
||||
|
||||
vk::Device dummy_logical;
|
||||
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
|
||||
vk::Device unsafe_logical;
|
||||
if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
|
||||
return false;
|
||||
}
|
||||
|
||||
dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
|
||||
logical = UniqueDevice(
|
||||
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
|
||||
dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical);
|
||||
logical = UniqueDevice(unsafe_logical, {nullptr, dld});
|
||||
|
||||
CollectTelemetryParameters();
|
||||
|
||||
@@ -235,20 +234,23 @@ void VKDevice::ReportLoss() const {
|
||||
// *(VKGraphicsPipeline*)data[0]
|
||||
}
|
||||
|
||||
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
|
||||
const vk::DispatchLoaderDynamic& dldi) const {
|
||||
bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const {
|
||||
// Disable for now to avoid converting ASTC twice.
|
||||
return false;
|
||||
static constexpr std::array astc_formats = {
|
||||
vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock};
|
||||
vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock,
|
||||
vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock};
|
||||
if (!features.textureCompressionASTC_LDR) {
|
||||
return false;
|
||||
}
|
||||
@@ -257,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
|
||||
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
|
||||
vk::FormatFeatureFlagBits::eTransferDst};
|
||||
for (const auto format : astc_formats) {
|
||||
const auto format_properties{physical.getFormatProperties(format, dldi)};
|
||||
const auto format_properties{physical.getFormatProperties(format, dld)};
|
||||
if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
|
||||
return false;
|
||||
}
|
||||
@@ -276,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
|
||||
return (supported_usage & wanted_usage) == wanted_usage;
|
||||
}
|
||||
|
||||
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface) {
|
||||
bool is_suitable = true;
|
||||
|
||||
constexpr std::array required_extensions = {
|
||||
bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
|
||||
const vk::DispatchLoaderDynamic& dld) {
|
||||
static constexpr std::array required_extensions = {
|
||||
VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
|
||||
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
|
||||
@@ -290,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||
};
|
||||
bool is_suitable = true;
|
||||
std::bitset<required_extensions.size()> available_extensions{};
|
||||
|
||||
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
|
||||
for (std::size_t i = 0; i < required_extensions.size(); ++i) {
|
||||
if (available_extensions[i]) {
|
||||
continue;
|
||||
@@ -312,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
}
|
||||
|
||||
bool has_graphics{}, has_present{};
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
|
||||
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
|
||||
const auto& family = queue_family_properties[i];
|
||||
if (family.queueCount == 0) {
|
||||
@@ -320,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
}
|
||||
has_graphics |=
|
||||
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
|
||||
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
|
||||
has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0;
|
||||
}
|
||||
if (!has_graphics || !has_present) {
|
||||
LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
|
||||
@@ -328,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
}
|
||||
|
||||
// TODO(Rodrigo): Check if the device matches all requeriments.
|
||||
const auto properties{physical.getProperties(dldi)};
|
||||
const auto properties{physical.getProperties(dld)};
|
||||
const auto& limits{properties.limits};
|
||||
|
||||
constexpr u32 required_ubo_size = 65536;
|
||||
@@ -345,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
is_suitable = false;
|
||||
}
|
||||
|
||||
const auto features{physical.getFeatures(dldi)};
|
||||
const auto features{physical.getFeatures(dld)};
|
||||
const std::array feature_report = {
|
||||
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
||||
std::make_pair(features.independentBlend, "independentBlend"),
|
||||
@@ -377,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||
return is_suitable;
|
||||
}
|
||||
|
||||
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
|
||||
std::vector<const char*> VKDevice::LoadExtensions() {
|
||||
std::vector<const char*> extensions;
|
||||
const auto Test = [&](const vk::ExtensionProperties& extension,
|
||||
std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
@@ -408,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
bool has_khr_shader_float16_int8{};
|
||||
bool has_ext_subgroup_size_control{};
|
||||
bool has_ext_transform_feedback{};
|
||||
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) {
|
||||
Test(extension, khr_uniform_buffer_standard_layout,
|
||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
|
||||
@@ -430,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
|
||||
if (has_khr_shader_float16_int8) {
|
||||
is_float16_supported =
|
||||
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
|
||||
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16;
|
||||
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
if (has_ext_subgroup_size_control) {
|
||||
const auto features =
|
||||
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
|
||||
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld);
|
||||
const auto properties =
|
||||
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi);
|
||||
GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld);
|
||||
|
||||
is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize;
|
||||
|
||||
@@ -453,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
|
||||
if (has_ext_transform_feedback) {
|
||||
const auto features =
|
||||
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
|
||||
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld);
|
||||
const auto properties =
|
||||
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
|
||||
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld);
|
||||
|
||||
if (features.transformFeedback && features.geometryStreams &&
|
||||
properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
|
||||
@@ -468,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||
return extensions;
|
||||
}
|
||||
|
||||
void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
|
||||
void VKDevice::SetupFamilies(vk::SurfaceKHR surface) {
|
||||
std::optional<u32> graphics_family_, present_family_;
|
||||
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
|
||||
const auto queue_family_properties = physical.getQueueFamilyProperties(dld);
|
||||
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
|
||||
if (graphics_family_ && present_family_)
|
||||
break;
|
||||
@@ -480,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
|
||||
if (queue_family.queueCount == 0)
|
||||
continue;
|
||||
|
||||
if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
|
||||
if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) {
|
||||
graphics_family_ = i;
|
||||
if (physical.getSurfaceSupportKHR(i, surface, dldi))
|
||||
}
|
||||
if (physical.getSurfaceSupportKHR(i, surface, dld)) {
|
||||
present_family_ = i;
|
||||
}
|
||||
}
|
||||
ASSERT(graphics_family_ && present_family_);
|
||||
|
||||
@@ -491,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
|
||||
present_family = *present_family_;
|
||||
}
|
||||
|
||||
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
|
||||
const auto supported_features{physical.getFeatures(dldi)};
|
||||
void VKDevice::SetupFeatures() {
|
||||
const auto supported_features{physical.getFeatures(dld)};
|
||||
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
|
||||
}
|
||||
|
||||
void VKDevice::CollectTelemetryParameters() {
|
||||
@@ -522,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
|
||||
}
|
||||
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
|
||||
const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) {
|
||||
static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
|
||||
vk::Format::eA8B8G8R8UintPack32,
|
||||
vk::Format::eA8B8G8R8SnormPack32,
|
||||
@@ -572,28 +575,38 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
|
||||
vk::Format::eBc2SrgbBlock,
|
||||
vk::Format::eBc3SrgbBlock,
|
||||
vk::Format::eBc7SrgbBlock,
|
||||
vk::Format::eAstc4x4UnormBlock,
|
||||
vk::Format::eAstc4x4SrgbBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock,
|
||||
vk::Format::eAstc5x4UnormBlock,
|
||||
vk::Format::eAstc5x4SrgbBlock,
|
||||
vk::Format::eAstc5x5UnormBlock,
|
||||
vk::Format::eAstc5x5SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock,
|
||||
vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock,
|
||||
vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock,
|
||||
vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc6x5UnormBlock,
|
||||
vk::Format::eAstc6x5SrgbBlock,
|
||||
vk::Format::eAstc6x6UnormBlock,
|
||||
vk::Format::eAstc6x6SrgbBlock,
|
||||
vk::Format::eAstc8x5UnormBlock,
|
||||
vk::Format::eAstc8x5SrgbBlock,
|
||||
vk::Format::eAstc8x6UnormBlock,
|
||||
vk::Format::eAstc8x6SrgbBlock,
|
||||
vk::Format::eAstc8x8UnormBlock,
|
||||
vk::Format::eAstc8x8SrgbBlock,
|
||||
vk::Format::eAstc10x5UnormBlock,
|
||||
vk::Format::eAstc10x5SrgbBlock,
|
||||
vk::Format::eAstc10x6UnormBlock,
|
||||
vk::Format::eAstc10x6SrgbBlock,
|
||||
vk::Format::eAstc10x8UnormBlock,
|
||||
vk::Format::eAstc10x8SrgbBlock,
|
||||
vk::Format::eAstc10x10UnormBlock,
|
||||
vk::Format::eAstc10x10SrgbBlock,
|
||||
vk::Format::eAstc12x10UnormBlock,
|
||||
vk::Format::eAstc12x10SrgbBlock,
|
||||
vk::Format::eAstc12x12UnormBlock,
|
||||
vk::Format::eAstc12x12SrgbBlock,
|
||||
vk::Format::eE5B9G9R9UfloatPack32};
|
||||
std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
|
||||
for (const auto format : formats) {
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
|
||||
format_properties.emplace(format, physical.getFormatProperties(format, dld));
|
||||
}
|
||||
return format_properties;
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32;
|
||||
/// Handles data specific to a physical device.
|
||||
class VKDevice final {
|
||||
public:
|
||||
explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
~VKDevice();
|
||||
|
||||
/// Initializes the device. Returns true on success.
|
||||
bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
|
||||
bool Create(vk::Instance instance);
|
||||
|
||||
/**
|
||||
* Returns a format supported by the device for the passed requeriments.
|
||||
@@ -188,18 +188,18 @@ public:
|
||||
}
|
||||
|
||||
/// Checks if the physical device is suitable.
|
||||
static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
|
||||
vk::SurfaceKHR surface);
|
||||
static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface,
|
||||
const vk::DispatchLoaderDynamic& dld);
|
||||
|
||||
private:
|
||||
/// Loads extensions into a vector and stores available ones in this object.
|
||||
std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
|
||||
std::vector<const char*> LoadExtensions();
|
||||
|
||||
/// Sets up queue families.
|
||||
void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
|
||||
void SetupFamilies(vk::SurfaceKHR surface);
|
||||
|
||||
/// Sets up device features.
|
||||
void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
|
||||
void SetupFeatures();
|
||||
|
||||
/// Collects telemetry information from the device.
|
||||
void CollectTelemetryParameters();
|
||||
@@ -208,8 +208,7 @@ private:
|
||||
std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
/// Returns true if ASTC textures are natively supported.
|
||||
bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
|
||||
const vk::DispatchLoaderDynamic& dldi) const;
|
||||
bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const;
|
||||
|
||||
/// Returns true if a format is supported.
|
||||
bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
|
||||
@@ -217,10 +216,10 @@ private:
|
||||
|
||||
/// Returns the device properties for Vulkan formats.
|
||||
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
|
||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
|
||||
const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical);
|
||||
|
||||
const vk::PhysicalDevice physical; ///< Physical device.
|
||||
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
|
||||
vk::PhysicalDevice physical; ///< Physical device.
|
||||
vk::PhysicalDeviceProperties properties; ///< Device properties.
|
||||
UniqueDevice logical; ///< Logical device.
|
||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||
|
||||
@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
|
||||
ProgramCode program_code, u32 main_offset)
|
||||
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
|
||||
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
|
||||
shader_ir{this->program_code, main_offset, compiler_settings, registry},
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
||||
u32 main_offset)
|
||||
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
|
||||
compiler_settings, registry},
|
||||
entries{GenerateShaderEntries(shader_ir)} {}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||
auto shader = TryGet(host_ptr);
|
||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
||||
if (!shader) {
|
||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||
|
||||
// No shader found - create a new one
|
||||
constexpr u32 stage_offset = 10;
|
||||
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
|
||||
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
||||
|
||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
||||
host_ptr, std::move(code), stage_offset);
|
||||
std::move(code), stage_offset);
|
||||
Register(shader);
|
||||
}
|
||||
shaders[index] = std::move(shader);
|
||||
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto program_addr = key.shader;
|
||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||
|
||||
auto shader = TryGet(host_ptr);
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||
|
||||
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
||||
constexpr u32 kernel_main_offset = 0;
|
||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
||||
program_addr, *cpu_addr, host_ptr, std::move(code),
|
||||
program_addr, *cpu_addr, std::move(code),
|
||||
kernel_main_offset);
|
||||
Register(shader);
|
||||
}
|
||||
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||
}
|
||||
|
||||
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
const auto shader = TryGet(host_ptr);
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
const auto shader = TryGet(*cpu_addr);
|
||||
ASSERT(shader);
|
||||
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||
|
||||
@@ -113,17 +113,13 @@ namespace Vulkan {
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
|
||||
VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
|
||||
~CachedShader();
|
||||
|
||||
GPUVAddr GetGpuAddr() const {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return program_code.size() * sizeof(u64);
|
||||
}
|
||||
@@ -149,7 +145,6 @@ private:
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
GPUVAddr gpu_addr{};
|
||||
VAddr cpu_addr{};
|
||||
ProgramCode program_code;
|
||||
VideoCommon::Shader::Registry registry;
|
||||
VideoCommon::Shader::ShaderIR shader_ir;
|
||||
|
||||
@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||
|
||||
void RasterizerVulkan::FlushAll() {}
|
||||
|
||||
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
||||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
pipeline_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||
FlushRegion(addr, size);
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
return false;
|
||||
}
|
||||
|
||||
const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
if (!surface) {
|
||||
return false;
|
||||
}
|
||||
@@ -594,7 +599,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
|
||||
Texceptions texceptions;
|
||||
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
|
||||
if (update_rendertargets) {
|
||||
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
|
||||
color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
|
||||
}
|
||||
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
|
||||
texceptions[rt] = true;
|
||||
@@ -602,7 +607,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
|
||||
}
|
||||
|
||||
if (update_rendertargets) {
|
||||
zeta_attachment = texture_cache.GetDepthBufferSurface(true);
|
||||
zeta_attachment = texture_cache.GetDepthBufferSurface();
|
||||
}
|
||||
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
|
||||
texceptions[ZETA_TEXCEPTION_INDEX] = true;
|
||||
|
||||
@@ -118,9 +118,9 @@ public:
|
||||
void ResetCounter(VideoCore::QueryType type) override;
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
|
||||
void FlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
|
||||
@@ -35,7 +35,7 @@ namespace {
|
||||
using Sirit::Id;
|
||||
using Tegra::Engines::ShaderType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::AttributeUse;
|
||||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using namespace VideoCommon::Shader;
|
||||
|
||||
@@ -752,16 +752,16 @@ private:
|
||||
if (stage != ShaderType::Fragment) {
|
||||
continue;
|
||||
}
|
||||
switch (header.ps.GetAttributeUse(location)) {
|
||||
case AttributeUse::Constant:
|
||||
switch (header.ps.GetPixelImap(location)) {
|
||||
case PixelImap::Constant:
|
||||
Decorate(id, spv::Decoration::Flat);
|
||||
break;
|
||||
case AttributeUse::ScreenLinear:
|
||||
Decorate(id, spv::Decoration::NoPerspective);
|
||||
break;
|
||||
case AttributeUse::Perspective:
|
||||
case PixelImap::Perspective:
|
||||
// Default
|
||||
break;
|
||||
case PixelImap::ScreenLinear:
|
||||
Decorate(id, spv::Decoration::NoPerspective);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unused attribute being fetched");
|
||||
}
|
||||
@@ -1145,9 +1145,6 @@ private:
|
||||
switch (attribute) {
|
||||
case Attribute::Index::Position: {
|
||||
if (stage == ShaderType::Fragment) {
|
||||
if (element == 3) {
|
||||
return {Constant(t_float, 1.0f), Type::Float};
|
||||
}
|
||||
return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
|
||||
Type::Float};
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode;
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::SurfaceCompression;
|
||||
using VideoCore::Surface::SurfaceTarget;
|
||||
|
||||
namespace {
|
||||
@@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) {
|
||||
return {};
|
||||
}
|
||||
|
||||
UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
|
||||
UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
|
||||
std::size_t host_memory_size) {
|
||||
// TODO(Rodrigo): Move texture buffer creation to the buffer cache
|
||||
const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
|
||||
const vk::BufferCreateInfo buffer_ci({}, host_memory_size,
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer |
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst,
|
||||
@@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
|
||||
|
||||
vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
|
||||
const SurfaceParams& params,
|
||||
vk::Buffer buffer) {
|
||||
vk::Buffer buffer,
|
||||
std::size_t host_memory_size) {
|
||||
ASSERT(params.IsBuffer());
|
||||
|
||||
const auto format =
|
||||
MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
|
||||
return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
|
||||
return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size);
|
||||
}
|
||||
|
||||
vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
|
||||
@@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
|
||||
VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
|
||||
GPUVAddr gpu_addr, const SurfaceParams& params)
|
||||
: SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
|
||||
resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
|
||||
staging_pool{staging_pool} {
|
||||
: SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
|
||||
device{device}, resource_manager{resource_manager},
|
||||
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
|
||||
if (params.IsBuffer()) {
|
||||
buffer = CreateBuffer(device, params);
|
||||
buffer = CreateBuffer(device, params, host_memory_size);
|
||||
commit = memory_manager.Commit(*buffer, false);
|
||||
|
||||
const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
|
||||
const auto buffer_view_ci =
|
||||
GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
|
||||
format = buffer_view_ci.format;
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
@@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
|
||||
std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
|
||||
|
||||
scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
|
||||
size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
|
||||
size = host_memory_size](auto cmdbuf, auto& dld) {
|
||||
const vk::BufferCopy copy(0, 0, size);
|
||||
cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
|
||||
|
||||
@@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
|
||||
|
||||
vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
|
||||
const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
|
||||
const auto compression_type = params.GetCompressionType();
|
||||
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
|
||||
? params.GetConvertedMipmapOffset(level)
|
||||
: params.GetHostMipmapLevelOffset(level);
|
||||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
|
||||
|
||||
return vk::BufferImageCopy(
|
||||
mip_offset, 0, 0,
|
||||
@@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf
|
||||
const VKDevice& device, VKResourceManager& resource_manager,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler,
|
||||
VKStagingBufferPool& staging_pool)
|
||||
: TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
|
||||
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
|
||||
: TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
|
||||
resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
|
||||
staging_pool{staging_pool} {}
|
||||
|
||||
VKTextureCache::~VKTextureCache() = default;
|
||||
|
||||
|
||||
@@ -13,13 +13,247 @@
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/shader/node_helper.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PredCondition;
|
||||
using Tegra::Shader::StoreType;
|
||||
using Tegra::Texture::ComponentType;
|
||||
using Tegra::Texture::TextureFormat;
|
||||
using Tegra::Texture::TICEntry;
|
||||
|
||||
namespace {
|
||||
|
||||
ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
|
||||
std::size_t component) {
|
||||
const TextureFormat format{descriptor.format};
|
||||
switch (format) {
|
||||
case TextureFormat::R16_G16_B16_A16:
|
||||
case TextureFormat::R32_G32_B32_A32:
|
||||
case TextureFormat::R32_G32_B32:
|
||||
case TextureFormat::R32_G32:
|
||||
case TextureFormat::R16_G16:
|
||||
case TextureFormat::R32:
|
||||
case TextureFormat::R16:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::R1:
|
||||
if (component == 0) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::A8R8G8B8:
|
||||
if (component == 0) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::A2B10G10R10:
|
||||
case TextureFormat::A4B4G4R4:
|
||||
case TextureFormat::A5B5G5R1:
|
||||
case TextureFormat::A1B5G5R5:
|
||||
if (component == 0) {
|
||||
return descriptor.a_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 3) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::R32_B24G8:
|
||||
if (component == 0) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::B6G5R5:
|
||||
if (component == 0) {
|
||||
return descriptor.b_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 2) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
case TextureFormat::G8R24:
|
||||
case TextureFormat::G24R8:
|
||||
case TextureFormat::G8R8:
|
||||
case TextureFormat::G4R4:
|
||||
if (component == 0) {
|
||||
return descriptor.g_type;
|
||||
}
|
||||
if (component == 1) {
|
||||
return descriptor.r_type;
|
||||
}
|
||||
break;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
return ComponentType::FLOAT;
|
||||
}
|
||||
|
||||
bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
constexpr u8 B = 0b0100;
|
||||
constexpr u8 A = 0b1000;
|
||||
constexpr std::array<u8, 16> mask = {
|
||||
0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
|
||||
(A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
|
||||
return std::bitset<4>{mask.at(component_mask)}.test(component);
|
||||
}
|
||||
|
||||
u32 GetComponentSize(TextureFormat format, std::size_t component) {
|
||||
switch (format) {
|
||||
case TextureFormat::R32_G32_B32_A32:
|
||||
return 32;
|
||||
case TextureFormat::R16_G16_B16_A16:
|
||||
return 16;
|
||||
case TextureFormat::R32_G32_B32:
|
||||
return component <= 2 ? 32 : 0;
|
||||
case TextureFormat::R32_G32:
|
||||
return component <= 1 ? 32 : 0;
|
||||
case TextureFormat::R16_G16:
|
||||
return component <= 1 ? 16 : 0;
|
||||
case TextureFormat::R32:
|
||||
return component == 0 ? 32 : 0;
|
||||
case TextureFormat::R16:
|
||||
return component == 0 ? 16 : 0;
|
||||
case TextureFormat::R8:
|
||||
return component == 0 ? 8 : 0;
|
||||
case TextureFormat::R1:
|
||||
return component == 0 ? 1 : 0;
|
||||
case TextureFormat::A8R8G8B8:
|
||||
return 8;
|
||||
case TextureFormat::A2B10G10R10:
|
||||
return (component == 3 || component == 2 || component == 1) ? 10 : 2;
|
||||
case TextureFormat::A4B4G4R4:
|
||||
return 4;
|
||||
case TextureFormat::A5B5G5R1:
|
||||
return (component == 0 || component == 1 || component == 2) ? 5 : 1;
|
||||
case TextureFormat::A1B5G5R5:
|
||||
return (component == 1 || component == 2 || component == 3) ? 5 : 1;
|
||||
case TextureFormat::R32_B24G8:
|
||||
if (component == 0) {
|
||||
return 32;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
}
|
||||
if (component == 2) {
|
||||
return 8;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::B5G6R5:
|
||||
if (component == 0 || component == 2) {
|
||||
return 5;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 6;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::B6G5R5:
|
||||
if (component == 1 || component == 2) {
|
||||
return 5;
|
||||
}
|
||||
if (component == 0) {
|
||||
return 6;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::G8R24:
|
||||
if (component == 0) {
|
||||
return 8;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::G24R8:
|
||||
if (component == 0) {
|
||||
return 8;
|
||||
}
|
||||
if (component == 1) {
|
||||
return 24;
|
||||
}
|
||||
return 0;
|
||||
case TextureFormat::G8R8:
|
||||
return (component == 0 || component == 1) ? 8 : 0;
|
||||
case TextureFormat::G4R4:
|
||||
return (component == 0 || component == 1) ? 4 : 0;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetImageComponentMask(TextureFormat format) {
|
||||
constexpr u8 R = 0b0001;
|
||||
constexpr u8 G = 0b0010;
|
||||
constexpr u8 B = 0b0100;
|
||||
constexpr u8 A = 0b1000;
|
||||
switch (format) {
|
||||
case TextureFormat::R32_G32_B32_A32:
|
||||
case TextureFormat::R16_G16_B16_A16:
|
||||
case TextureFormat::A8R8G8B8:
|
||||
case TextureFormat::A2B10G10R10:
|
||||
case TextureFormat::A4B4G4R4:
|
||||
case TextureFormat::A5B5G5R1:
|
||||
case TextureFormat::A1B5G5R5:
|
||||
return std::size_t{R | G | B | A};
|
||||
case TextureFormat::R32_G32_B32:
|
||||
case TextureFormat::R32_B24G8:
|
||||
case TextureFormat::B5G6R5:
|
||||
case TextureFormat::B6G5R5:
|
||||
return std::size_t{R | G | B};
|
||||
case TextureFormat::R32_G32:
|
||||
case TextureFormat::R16_G16:
|
||||
case TextureFormat::G8R24:
|
||||
case TextureFormat::G24R8:
|
||||
case TextureFormat::G8R8:
|
||||
case TextureFormat::G4R4:
|
||||
return std::size_t{R | G};
|
||||
case TextureFormat::R32:
|
||||
case TextureFormat::R16:
|
||||
case TextureFormat::R8:
|
||||
case TextureFormat::R1:
|
||||
return std::size_t{R};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("texture format not implement={}", format);
|
||||
return std::size_t{R | G | B | A};
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
|
||||
switch (image_type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
|
||||
Node original_value) {
|
||||
switch (component_type) {
|
||||
case ComponentType::SNORM: {
|
||||
// range [-1.0, 1.0]
|
||||
auto cnv_value = Operation(OperationCode::FMul, original_value,
|
||||
Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
|
||||
cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
|
||||
return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
|
||||
}
|
||||
case ComponentType::SINT:
|
||||
case ComponentType::UNORM: {
|
||||
bool is_signed = component_type == ComponentType::SINT;
|
||||
// range [0.0, 1.0]
|
||||
auto cnv_value = Operation(OperationCode::FMul, original_value,
|
||||
Immediate(static_cast<float>(1 << component_size) - 1.f));
|
||||
return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
|
||||
is_signed};
|
||||
}
|
||||
case ComponentType::UINT: // range [0, (1 << component_size) - 1]
|
||||
return {std::move(original_value), false};
|
||||
case ComponentType::FLOAT:
|
||||
if (component_size == 16) {
|
||||
return {Operation(OperationCode::HCastFloat, original_value), true};
|
||||
} else {
|
||||
return {std::move(original_value), true};
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
|
||||
return {std::move(original_value), true};
|
||||
}
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::SULD: {
|
||||
UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
|
||||
UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
|
||||
Tegra::Shader::OutOfBoundsStore::Ignore);
|
||||
|
||||
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||
: GetBindlessImage(instr.gpr39, type)};
|
||||
image.MarkRead();
|
||||
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.suldst.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
|
||||
u32 indexer = 0;
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!instr.suldst.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaImage meta{image, {}, element};
|
||||
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
|
||||
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
|
||||
instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
|
||||
|
||||
auto descriptor = [this, instr] {
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
|
||||
if (instr.suldst.is_immediate) {
|
||||
descriptor =
|
||||
registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
|
||||
} else {
|
||||
const Node image_register = GetRegister(instr.gpr39);
|
||||
const auto [base_image, buffer, offset] = TrackCbuf(
|
||||
image_register, global_code, static_cast<s64>(global_code.size()));
|
||||
descriptor = registry.ObtainBindlessSampler(buffer, offset);
|
||||
}
|
||||
if (!descriptor) {
|
||||
UNREACHABLE_MSG("Failed to obtain image descriptor");
|
||||
}
|
||||
return *descriptor;
|
||||
}();
|
||||
|
||||
const auto comp_mask = GetImageComponentMask(descriptor.format);
|
||||
|
||||
switch (instr.suldst.GetStoreDataLayout()) {
|
||||
case StoreType::Bits32:
|
||||
case StoreType::Bits64: {
|
||||
u32 indexer = 0;
|
||||
u32 shifted_counter = 0;
|
||||
Node value = Immediate(0);
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
if (!IsComponentEnabled(comp_mask, element)) {
|
||||
continue;
|
||||
}
|
||||
const auto component_type = GetComponentType(descriptor, element);
|
||||
const auto component_size = GetComponentSize(descriptor.format, element);
|
||||
MetaImage meta{image, {}, element};
|
||||
|
||||
auto [converted_value, is_signed] = GetComponentValue(
|
||||
component_type, component_size,
|
||||
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
|
||||
|
||||
// shift element to correct position
|
||||
const auto shifted = shifted_counter;
|
||||
if (shifted > 0) {
|
||||
converted_value =
|
||||
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
|
||||
std::move(converted_value), Immediate(shifted));
|
||||
}
|
||||
shifted_counter += component_size;
|
||||
|
||||
// add value into result
|
||||
value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
|
||||
|
||||
// if we shifted enough for 1 byte -> we save it into temp
|
||||
if (shifted_counter >= 32) {
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
// reset counter and value to prepare pack next byte
|
||||
value = Immediate(0);
|
||||
shifted_counter = 0;
|
||||
}
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
MetaImage meta{image, {}, element};
|
||||
Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
|
||||
SetTemporary(bb, indexer++, std::move(value));
|
||||
}
|
||||
for (u32 i = 0; i < indexer; ++i) {
|
||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -11,12 +11,17 @@
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using std::move;
|
||||
using Tegra::Shader::ConditionCode;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::IpaInterpMode;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::SystemVariable;
|
||||
|
||||
using Index = Tegra::Shader::Attribute::Index;
|
||||
|
||||
u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
bb.push_back(Operation(OperationCode::Discard));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::MOV_SYS: {
|
||||
case OpCode::Id::S2R: {
|
||||
const Node value = [this, instr] {
|
||||
switch (instr.sys20) {
|
||||
case SystemVariable::LaneId:
|
||||
LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
|
||||
LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::InvocationId:
|
||||
return Operation(OperationCode::InvocationId);
|
||||
case SystemVariable::Ydirection:
|
||||
return Operation(OperationCode::YNegate);
|
||||
case SystemVariable::InvocationInfo:
|
||||
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
|
||||
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::WscaleFactorXY:
|
||||
UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::WscaleFactorZ:
|
||||
UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
|
||||
return Immediate(0U);
|
||||
case SystemVariable::Tid: {
|
||||
Node value = Immediate(0);
|
||||
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
case OpCode::Id::IPA: {
|
||||
const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
|
||||
|
||||
const auto attribute = instr.attribute.fmt28;
|
||||
const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
|
||||
instr.ipa.sample_mode.Value()};
|
||||
const Index index = attribute.index;
|
||||
|
||||
Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
|
||||
: GetInputAttribute(attribute.index, attribute.element);
|
||||
const Tegra::Shader::Attribute::Index index = attribute.index.Value();
|
||||
const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
|
||||
index <= Tegra::Shader::Attribute::Index::Attribute_31;
|
||||
if (is_generic || is_physical) {
|
||||
// TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
|
||||
// In theory by setting them as perspective, OpenGL does the perspective correction.
|
||||
// A way must figured to reverse the last step of it.
|
||||
if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
|
||||
value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
|
||||
: GetInputAttribute(index, attribute.element);
|
||||
|
||||
// Code taken from Ryujinx.
|
||||
if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
|
||||
const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
|
||||
if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
|
||||
Node position_w = GetInputAttribute(Index::Position, 3);
|
||||
value = Operation(OperationCode::FMul, move(value), move(position_w));
|
||||
}
|
||||
}
|
||||
value = GetSaturatedFloat(value, instr.ipa.saturate);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
|
||||
value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
|
||||
}
|
||||
|
||||
value = GetSaturatedFloat(move(value), instr.ipa.saturate);
|
||||
|
||||
SetRegister(bb, instr.gpr0, move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::OUT_R: {
|
||||
|
||||
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
|
||||
switch (cc) {
|
||||
case Tegra::Shader::ConditionCode::NEU:
|
||||
return GetInternalFlag(InternalFlag::Zero, true);
|
||||
case Tegra::Shader::ConditionCode::FCSM_TR:
|
||||
UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
|
||||
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
|
||||
return MakeNode<PredicateNode>(Pred::NeverExecute, false);
|
||||
|
||||
@@ -312,6 +312,10 @@ private:
|
||||
/// Conditionally saturates a half float pair
|
||||
Node GetSaturatedHalfFloat(Node value, bool saturate = true);
|
||||
|
||||
/// Get image component value by type and size
|
||||
std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
|
||||
u32 component_size, Node original_value);
|
||||
|
||||
/// Returns a predicate comparing two floats
|
||||
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
|
||||
/// Returns a predicate comparing two integers
|
||||
|
||||
@@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
|
||||
return GetFormatBpp(pixel_format) / CHAR_BIT;
|
||||
}
|
||||
|
||||
enum class SurfaceCompression {
|
||||
None, // Not compressed
|
||||
Compressed, // Texture is compressed
|
||||
Converted, // Texture is converted before upload or after download
|
||||
Rearranged, // Texture is swizzled before upload or after download
|
||||
};
|
||||
|
||||
constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
|
||||
SurfaceCompression::None, // ABGR8U
|
||||
SurfaceCompression::None, // ABGR8S
|
||||
SurfaceCompression::None, // ABGR8UI
|
||||
SurfaceCompression::None, // B5G6R5U
|
||||
SurfaceCompression::None, // A2B10G10R10U
|
||||
SurfaceCompression::None, // A1B5G5R5U
|
||||
SurfaceCompression::None, // R8U
|
||||
SurfaceCompression::None, // R8UI
|
||||
SurfaceCompression::None, // RGBA16F
|
||||
SurfaceCompression::None, // RGBA16U
|
||||
SurfaceCompression::None, // RGBA16S
|
||||
SurfaceCompression::None, // RGBA16UI
|
||||
SurfaceCompression::None, // R11FG11FB10F
|
||||
SurfaceCompression::None, // RGBA32UI
|
||||
SurfaceCompression::Compressed, // DXT1
|
||||
SurfaceCompression::Compressed, // DXT23
|
||||
SurfaceCompression::Compressed, // DXT45
|
||||
SurfaceCompression::Compressed, // DXN1
|
||||
SurfaceCompression::Compressed, // DXN2UNORM
|
||||
SurfaceCompression::Compressed, // DXN2SNORM
|
||||
SurfaceCompression::Compressed, // BC7U
|
||||
SurfaceCompression::Compressed, // BC6H_UF16
|
||||
SurfaceCompression::Compressed, // BC6H_SF16
|
||||
SurfaceCompression::Converted, // ASTC_2D_4X4
|
||||
SurfaceCompression::None, // BGRA8
|
||||
SurfaceCompression::None, // RGBA32F
|
||||
SurfaceCompression::None, // RG32F
|
||||
SurfaceCompression::None, // R32F
|
||||
SurfaceCompression::None, // R16F
|
||||
SurfaceCompression::None, // R16U
|
||||
SurfaceCompression::None, // R16S
|
||||
SurfaceCompression::None, // R16UI
|
||||
SurfaceCompression::None, // R16I
|
||||
SurfaceCompression::None, // RG16
|
||||
SurfaceCompression::None, // RG16F
|
||||
SurfaceCompression::None, // RG16UI
|
||||
SurfaceCompression::None, // RG16I
|
||||
SurfaceCompression::None, // RG16S
|
||||
SurfaceCompression::None, // RGB32F
|
||||
SurfaceCompression::None, // RGBA8_SRGB
|
||||
SurfaceCompression::None, // RG8U
|
||||
SurfaceCompression::None, // RG8S
|
||||
SurfaceCompression::None, // RG32UI
|
||||
SurfaceCompression::None, // RGBX16F
|
||||
SurfaceCompression::None, // R32UI
|
||||
SurfaceCompression::None, // R32I
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X8
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X5
|
||||
SurfaceCompression::Converted, // ASTC_2D_5X4
|
||||
SurfaceCompression::None, // BGRA8_SRGB
|
||||
SurfaceCompression::Compressed, // DXT1_SRGB
|
||||
SurfaceCompression::Compressed, // DXT23_SRGB
|
||||
SurfaceCompression::Compressed, // DXT45_SRGB
|
||||
SurfaceCompression::Compressed, // BC7U_SRGB
|
||||
SurfaceCompression::None, // R4G4B4A4U
|
||||
SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_5X5
|
||||
SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_10X8
|
||||
SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_6X6
|
||||
SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_10X10
|
||||
SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_12X12
|
||||
SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X6
|
||||
SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
|
||||
SurfaceCompression::Converted, // ASTC_2D_6X5
|
||||
SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
|
||||
SurfaceCompression::None, // E5B9G9R9F
|
||||
SurfaceCompression::None, // Z32F
|
||||
SurfaceCompression::None, // Z16
|
||||
SurfaceCompression::None, // Z24S8
|
||||
SurfaceCompression::Rearranged, // S8Z24
|
||||
SurfaceCompression::None, // Z32FS8
|
||||
}};
|
||||
|
||||
constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
|
||||
if (format == PixelFormat::Invalid) {
|
||||
return SurfaceCompression::None;
|
||||
}
|
||||
DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
|
||||
return compression_type_table[static_cast<std::size_t>(format)];
|
||||
}
|
||||
|
||||
SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
|
||||
|
||||
bool SurfaceTargetIsLayered(SurfaceTarget target);
|
||||
|
||||
@@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192,
|
||||
|
||||
using Tegra::Texture::ConvertFromGuestToHost;
|
||||
using VideoCore::MortonSwizzleMode;
|
||||
using VideoCore::Surface::SurfaceCompression;
|
||||
using VideoCore::Surface::IsPixelFormatASTC;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
|
||||
StagingCache::StagingCache() = default;
|
||||
|
||||
StagingCache::~StagingCache() = default;
|
||||
|
||||
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
|
||||
: params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
|
||||
mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
|
||||
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool is_astc_supported)
|
||||
: params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
|
||||
mipmap_offsets(params.num_levels) {
|
||||
is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
|
||||
host_memory_size = params.GetHostSizeInBytes(is_converted);
|
||||
|
||||
std::size_t offset = 0;
|
||||
for (u32 level = 0; level < params.num_levels; ++level) {
|
||||
const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
|
||||
@@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
|
||||
|
||||
std::size_t guest_offset{mipmap_offsets[level]};
|
||||
if (params.is_layered) {
|
||||
std::size_t host_offset{0};
|
||||
std::size_t host_offset = 0;
|
||||
const std::size_t guest_stride = layer_size;
|
||||
const std::size_t host_stride = params.GetHostLayerSize(level);
|
||||
for (u32 layer = 0; layer < params.depth; ++layer) {
|
||||
@@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
|
||||
MICROPROFILE_SCOPE(GPU_Load_Texture);
|
||||
auto& staging_buffer = staging_cache.GetBuffer(0);
|
||||
u8* host_ptr;
|
||||
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
|
||||
|
||||
// Handle continuouty
|
||||
if (is_continuous) {
|
||||
// Use physical memory directly
|
||||
host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
if (!host_ptr) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
}
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
|
||||
if (params.is_tiled) {
|
||||
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
|
||||
params.block_width, static_cast<u32>(params.target));
|
||||
for (u32 level = 0; level < params.num_levels; ++level) {
|
||||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
|
||||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
|
||||
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
|
||||
staging_buffer.data() + host_offset, level);
|
||||
}
|
||||
@@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
|
||||
const u32 height{(params.height + block_height - 1) / block_height};
|
||||
const u32 copy_size{width * bpp};
|
||||
if (params.pitch == copy_size) {
|
||||
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
|
||||
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
|
||||
} else {
|
||||
const u8* start{host_ptr};
|
||||
u8* write_to{staging_buffer.data()};
|
||||
@@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
|
||||
}
|
||||
}
|
||||
|
||||
auto compression_type = params.GetCompressionType();
|
||||
if (compression_type == SurfaceCompression::None ||
|
||||
compression_type == SurfaceCompression::Compressed)
|
||||
if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
|
||||
const u32 level = level_up - 1;
|
||||
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
|
||||
const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
|
||||
? in_host_offset
|
||||
: params.GetConvertedMipmapOffset(level);
|
||||
u8* in_buffer = staging_buffer.data() + in_host_offset;
|
||||
u8* out_buffer = staging_buffer.data() + out_host_offset;
|
||||
for (u32 level = params.num_levels; level--;) {
|
||||
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
|
||||
const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
|
||||
u8* const in_buffer = staging_buffer.data() + in_host_offset;
|
||||
u8* const out_buffer = staging_buffer.data() + out_host_offset;
|
||||
ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
|
||||
params.GetMipWidth(level), params.GetMipHeight(level),
|
||||
params.GetMipDepth(level), true, true);
|
||||
@@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
|
||||
auto& staging_buffer = staging_cache.GetBuffer(0);
|
||||
u8* host_ptr;
|
||||
|
||||
// Handle continuouty
|
||||
if (is_continuous) {
|
||||
// Use physical memory directly
|
||||
host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
if (!host_ptr) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
}
|
||||
// Use an extra temporal buffer
|
||||
auto& tmp_buffer = staging_cache.GetBuffer(1);
|
||||
tmp_buffer.resize(guest_memory_size);
|
||||
host_ptr = tmp_buffer.data();
|
||||
|
||||
if (params.is_tiled) {
|
||||
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
|
||||
for (u32 level = 0; level < params.num_levels; ++level) {
|
||||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
|
||||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
|
||||
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
|
||||
staging_buffer.data() + host_offset, level);
|
||||
}
|
||||
@@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!is_continuous) {
|
||||
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
}
|
||||
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -68,8 +68,8 @@ public:
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
bool Overlaps(const CacheAddr start, const CacheAddr end) const {
|
||||
return (cache_addr < end) && (cache_addr_end > start);
|
||||
bool Overlaps(const VAddr start, const VAddr end) const {
|
||||
return (cpu_addr < end) && (cpu_addr_end > start);
|
||||
}
|
||||
|
||||
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
|
||||
@@ -86,21 +86,13 @@ public:
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
VAddr GetCpuAddrEnd() const {
|
||||
return cpu_addr_end;
|
||||
}
|
||||
|
||||
void SetCpuAddr(const VAddr new_addr) {
|
||||
cpu_addr = new_addr;
|
||||
}
|
||||
|
||||
CacheAddr GetCacheAddr() const {
|
||||
return cache_addr;
|
||||
}
|
||||
|
||||
CacheAddr GetCacheAddrEnd() const {
|
||||
return cache_addr_end;
|
||||
}
|
||||
|
||||
void SetCacheAddr(const CacheAddr new_addr) {
|
||||
cache_addr = new_addr;
|
||||
cache_addr_end = new_addr + guest_memory_size;
|
||||
cpu_addr_end = new_addr + guest_memory_size;
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
@@ -119,18 +111,14 @@ public:
|
||||
return mipmap_sizes[level];
|
||||
}
|
||||
|
||||
void MarkAsContinuous(const bool is_continuous) {
|
||||
this->is_continuous = is_continuous;
|
||||
}
|
||||
|
||||
bool IsContinuous() const {
|
||||
return is_continuous;
|
||||
}
|
||||
|
||||
bool IsLinear() const {
|
||||
return !params.is_tiled;
|
||||
}
|
||||
|
||||
bool IsConverted() const {
|
||||
return is_converted;
|
||||
}
|
||||
|
||||
bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
|
||||
return params.pixel_format == pixel_format;
|
||||
}
|
||||
@@ -160,7 +148,8 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
|
||||
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool is_astc_supported);
|
||||
~SurfaceBaseImpl() = default;
|
||||
|
||||
virtual void DecorateSurfaceName() = 0;
|
||||
@@ -168,12 +157,11 @@ protected:
|
||||
const SurfaceParams params;
|
||||
std::size_t layer_size;
|
||||
std::size_t guest_memory_size;
|
||||
const std::size_t host_memory_size;
|
||||
std::size_t host_memory_size;
|
||||
GPUVAddr gpu_addr{};
|
||||
CacheAddr cache_addr{};
|
||||
CacheAddr cache_addr_end{};
|
||||
VAddr cpu_addr{};
|
||||
bool is_continuous{};
|
||||
VAddr cpu_addr_end{};
|
||||
bool is_converted{};
|
||||
|
||||
std::vector<std::size_t> mipmap_sizes;
|
||||
std::vector<std::size_t> mipmap_offsets;
|
||||
@@ -288,8 +276,9 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
|
||||
: SurfaceBaseImpl(gpu_addr, params) {}
|
||||
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool is_astc_supported)
|
||||
: SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
|
||||
|
||||
~SurfaceBase() = default;
|
||||
|
||||
|
||||
@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
|
||||
params.height = tic.Height();
|
||||
params.depth = tic.Depth();
|
||||
params.pitch = params.is_tiled ? 0 : tic.Pitch();
|
||||
if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
|
||||
params.depth = 1;
|
||||
} else if (params.target == SurfaceTarget::TextureCubemap ||
|
||||
params.target == SurfaceTarget::TextureCubeArray) {
|
||||
if (params.target == SurfaceTarget::TextureCubemap ||
|
||||
params.target == SurfaceTarget::TextureCubeArray) {
|
||||
params.depth *= 6;
|
||||
}
|
||||
params.num_levels = tic.max_mip_level + 1;
|
||||
@@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
|
||||
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
|
||||
std::size_t offset = 0;
|
||||
for (u32 i = 0; i < level; i++) {
|
||||
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
|
||||
std::size_t offset = 0;
|
||||
for (u32 i = 0; i < level; i++) {
|
||||
offset += GetConvertedMipmapSize(i);
|
||||
if (is_converted) {
|
||||
for (u32 i = 0; i < level; ++i) {
|
||||
offset += GetConvertedMipmapSize(i) * GetNumLayers();
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < level; ++i) {
|
||||
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
|
||||
}
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
|
||||
constexpr std::size_t rgba8_bpp = 4ULL;
|
||||
const std::size_t width_t = GetMipWidth(level);
|
||||
const std::size_t height_t = GetMipHeight(level);
|
||||
const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
|
||||
return width_t * height_t * depth_t * rgba8_bpp;
|
||||
const std::size_t mip_width = GetMipWidth(level);
|
||||
const std::size_t mip_height = GetMipHeight(level);
|
||||
const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
|
||||
return mip_width * mip_height * mip_depth * rgba8_bpp;
|
||||
}
|
||||
|
||||
std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
|
||||
|
||||
@@ -20,8 +20,6 @@ namespace VideoCommon {
|
||||
|
||||
class FormatLookupTable;
|
||||
|
||||
using VideoCore::Surface::SurfaceCompression;
|
||||
|
||||
class SurfaceParams {
|
||||
public:
|
||||
/// Creates SurfaceCachedParams from a texture configuration.
|
||||
@@ -67,16 +65,14 @@ public:
|
||||
return GetInnerMemorySize(false, false, false);
|
||||
}
|
||||
|
||||
std::size_t GetHostSizeInBytes() const {
|
||||
std::size_t host_size_in_bytes;
|
||||
if (GetCompressionType() == SurfaceCompression::Converted) {
|
||||
// ASTC is uncompressed in software, in emulated as RGBA8
|
||||
host_size_in_bytes = 0;
|
||||
for (u32 level = 0; level < num_levels; ++level) {
|
||||
host_size_in_bytes += GetConvertedMipmapSize(level);
|
||||
}
|
||||
} else {
|
||||
host_size_in_bytes = GetInnerMemorySize(true, false, false);
|
||||
std::size_t GetHostSizeInBytes(bool is_converted) const {
|
||||
if (!is_converted) {
|
||||
return GetInnerMemorySize(true, false, false);
|
||||
}
|
||||
// ASTC is uncompressed in software, in emulated as RGBA8
|
||||
std::size_t host_size_in_bytes = 0;
|
||||
for (u32 level = 0; level < num_levels; ++level) {
|
||||
host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
|
||||
}
|
||||
return host_size_in_bytes;
|
||||
}
|
||||
@@ -107,9 +103,8 @@ public:
|
||||
u32 GetMipBlockDepth(u32 level) const;
|
||||
|
||||
/// Returns the best possible row/pitch alignment for the surface.
|
||||
u32 GetRowAlignment(u32 level) const {
|
||||
const u32 bpp =
|
||||
GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
|
||||
u32 GetRowAlignment(u32 level, bool is_converted) const {
|
||||
const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
|
||||
return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
|
||||
}
|
||||
|
||||
@@ -117,11 +112,7 @@ public:
|
||||
std::size_t GetGuestMipmapLevelOffset(u32 level) const;
|
||||
|
||||
/// Returns the offset in bytes in host memory (linear) of a given mipmap level.
|
||||
std::size_t GetHostMipmapLevelOffset(u32 level) const;
|
||||
|
||||
/// Returns the offset in bytes in host memory (linear) of a given mipmap level
|
||||
/// for a texture that is converted in host gpu.
|
||||
std::size_t GetConvertedMipmapOffset(u32 level) const;
|
||||
std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
|
||||
|
||||
/// Returns the size in bytes in guest memory of a given mipmap level.
|
||||
std::size_t GetGuestMipmapSize(u32 level) const {
|
||||
@@ -196,11 +187,6 @@ public:
|
||||
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
|
||||
}
|
||||
|
||||
/// Returns how the compression should be handled for this texture.
|
||||
SurfaceCompression GetCompressionType() const {
|
||||
return VideoCore::Surface::GetFormatCompressionType(pixel_format);
|
||||
}
|
||||
|
||||
/// Returns is the surface is a TextureBuffer type of surface.
|
||||
bool IsBuffer() const {
|
||||
return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
|
||||
|
||||
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
|
||||
|
||||
template <typename TSurface, typename TView>
|
||||
class TextureCache {
|
||||
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
|
||||
using IntervalType = typename IntervalMap::interval_type;
|
||||
|
||||
public:
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
|
||||
@@ -76,7 +74,7 @@ public:
|
||||
guard_samplers = new_guard;
|
||||
}
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
auto surfaces = GetSurfacesInRegion(addr, size);
|
||||
@@ -99,9 +97,9 @@ public:
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
|
||||
@@ -110,7 +108,7 @@ public:
|
||||
}
|
||||
|
||||
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
|
||||
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
|
||||
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
|
||||
if (guard_samplers) {
|
||||
sampled_textures.push_back(surface);
|
||||
}
|
||||
@@ -124,13 +122,13 @@ public:
|
||||
if (!gpu_addr) {
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
|
||||
}
|
||||
const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
|
||||
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
|
||||
const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
|
||||
if (guard_samplers) {
|
||||
sampled_textures.push_back(surface);
|
||||
}
|
||||
@@ -145,7 +143,7 @@ public:
|
||||
return any_rt;
|
||||
}
|
||||
|
||||
TView GetDepthBufferSurface(bool preserve_contents) {
|
||||
TView GetDepthBufferSurface() {
|
||||
std::lock_guard lock{mutex};
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
|
||||
@@ -159,14 +157,14 @@ public:
|
||||
SetEmptyDepthBuffer();
|
||||
return {};
|
||||
}
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
SetEmptyDepthBuffer();
|
||||
return {};
|
||||
}
|
||||
const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
|
||||
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
|
||||
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
|
||||
if (depth_buffer.target)
|
||||
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
|
||||
depth_buffer.target = surface_view.first;
|
||||
@@ -176,7 +174,7 @@ public:
|
||||
return surface_view.second;
|
||||
}
|
||||
|
||||
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
|
||||
TView GetColorBufferSurface(std::size_t index) {
|
||||
std::lock_guard lock{mutex};
|
||||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
@@ -199,16 +197,15 @@ public:
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
if (!cache_addr) {
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
SetEmptyColorBuffer(index);
|
||||
return {};
|
||||
}
|
||||
|
||||
auto surface_view =
|
||||
GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index),
|
||||
preserve_contents, true);
|
||||
auto surface_view = GetSurface(gpu_addr, *cpu_addr,
|
||||
SurfaceParams::CreateForFramebuffer(system, index), true);
|
||||
if (render_targets[index].target)
|
||||
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
||||
render_targets[index].target = surface_view.first;
|
||||
@@ -257,27 +254,26 @@ public:
|
||||
const GPUVAddr src_gpu_addr = src_config.Address();
|
||||
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
||||
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
||||
const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)};
|
||||
const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)};
|
||||
const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)};
|
||||
const auto src_cache_addr{ToCacheAddr(src_host_ptr)};
|
||||
const std::optional<VAddr> dst_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
|
||||
const std::optional<VAddr> src_cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
|
||||
std::pair<TSurface, TView> dst_surface =
|
||||
GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false);
|
||||
GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
|
||||
std::pair<TSurface, TView> src_surface =
|
||||
GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false);
|
||||
GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
|
||||
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
||||
dst_surface.first->MarkAsModified(true, Tick());
|
||||
}
|
||||
|
||||
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
|
||||
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
|
||||
if (!cache_addr) {
|
||||
TSurface TryFindFramebufferSurface(VAddr addr) {
|
||||
if (!addr) {
|
||||
return nullptr;
|
||||
}
|
||||
const CacheAddr page = cache_addr >> registry_page_bits;
|
||||
const VAddr page = addr >> registry_page_bits;
|
||||
std::vector<TSurface>& list = registry[page];
|
||||
for (auto& surface : list) {
|
||||
if (surface->GetCacheAddr() == cache_addr) {
|
||||
if (surface->GetCpuAddr() == addr) {
|
||||
return surface;
|
||||
}
|
||||
}
|
||||
@@ -289,8 +285,9 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||
: system{system}, rasterizer{rasterizer} {
|
||||
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
bool is_astc_supported)
|
||||
: system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
|
||||
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
||||
SetEmptyColorBuffer(i);
|
||||
}
|
||||
@@ -337,18 +334,14 @@ protected:
|
||||
|
||||
void Register(TSurface surface) {
|
||||
const GPUVAddr gpu_addr = surface->GetGpuAddr();
|
||||
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
|
||||
const std::size_t size = surface->GetSizeInBytes();
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
if (!cache_ptr || !cpu_addr) {
|
||||
if (!cpu_addr) {
|
||||
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
|
||||
gpu_addr);
|
||||
return;
|
||||
}
|
||||
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
|
||||
surface->MarkAsContinuous(continuous);
|
||||
surface->SetCacheAddr(cache_ptr);
|
||||
surface->SetCpuAddr(*cpu_addr);
|
||||
RegisterInnerCache(surface);
|
||||
surface->MarkAsRegistered(true);
|
||||
@@ -381,6 +374,7 @@ protected:
|
||||
}
|
||||
|
||||
Core::System& system;
|
||||
const bool is_astc_supported;
|
||||
|
||||
private:
|
||||
enum class RecycleStrategy : u32 {
|
||||
@@ -456,22 +450,18 @@ private:
|
||||
* @param overlaps The overlapping surfaces registered in the cache.
|
||||
* @param params The parameters for the new surface.
|
||||
* @param gpu_addr The starting address of the new surface.
|
||||
* @param preserve_contents Indicates that the new surface should be loaded from memory or left
|
||||
* blank.
|
||||
* @param untopological Indicates to the recycler that the texture has no way to match the
|
||||
* overlaps due to topological reasons.
|
||||
**/
|
||||
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
|
||||
const SurfaceParams& params, const GPUVAddr gpu_addr,
|
||||
const bool preserve_contents,
|
||||
const MatchTopologyResult untopological) {
|
||||
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
|
||||
for (auto& surface : overlaps) {
|
||||
Unregister(surface);
|
||||
}
|
||||
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
|
||||
case RecycleStrategy::Ignore: {
|
||||
return InitializeSurface(gpu_addr, params, do_load);
|
||||
return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
|
||||
}
|
||||
case RecycleStrategy::Flush: {
|
||||
std::sort(overlaps.begin(), overlaps.end(),
|
||||
@@ -481,7 +471,7 @@ private:
|
||||
for (auto& surface : overlaps) {
|
||||
FlushSurface(surface);
|
||||
}
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
return InitializeSurface(gpu_addr, params);
|
||||
}
|
||||
case RecycleStrategy::BufferCopy: {
|
||||
auto new_surface = GetUncachedSurface(gpu_addr, params);
|
||||
@@ -490,7 +480,7 @@ private:
|
||||
}
|
||||
default: {
|
||||
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
|
||||
return InitializeSurface(gpu_addr, params, do_load);
|
||||
return InitializeSurface(gpu_addr, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -626,14 +616,11 @@ private:
|
||||
* @param params The parameters on the new surface.
|
||||
* @param gpu_addr The starting address of the new surface.
|
||||
* @param cache_addr The starting address of the new surface on physical memory.
|
||||
* @param preserve_contents Indicates that the new surface should be loaded from memory or
|
||||
* left blank.
|
||||
*/
|
||||
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
|
||||
const SurfaceParams& params,
|
||||
const GPUVAddr gpu_addr,
|
||||
const CacheAddr cache_addr,
|
||||
bool preserve_contents) {
|
||||
const VAddr cpu_addr) {
|
||||
if (params.target == SurfaceTarget::Texture3D) {
|
||||
bool failed = false;
|
||||
if (params.num_levels > 1) {
|
||||
@@ -657,7 +644,7 @@ private:
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
|
||||
const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
|
||||
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
|
||||
modified |= surface->IsModified();
|
||||
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
|
||||
@@ -677,23 +664,23 @@ private:
|
||||
} else {
|
||||
for (const auto& surface : overlaps) {
|
||||
if (!surface->MatchTarget(params.target)) {
|
||||
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
|
||||
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
|
||||
if (Settings::values.use_accurate_gpu_emulation) {
|
||||
return std::nullopt;
|
||||
}
|
||||
Unregister(surface);
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
return InitializeSurface(gpu_addr, params);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
if (surface->GetCacheAddr() != cache_addr) {
|
||||
if (surface->GetCpuAddr() != cpu_addr) {
|
||||
continue;
|
||||
}
|
||||
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
|
||||
return {{surface, surface->GetMainView()}};
|
||||
}
|
||||
}
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
return InitializeSurface(gpu_addr, params);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -716,23 +703,19 @@ private:
|
||||
*
|
||||
* @param gpu_addr The starting address of the candidate surface.
|
||||
* @param params The parameters on the candidate surface.
|
||||
* @param preserve_contents Indicates that the new surface should be loaded from memory or
|
||||
* left blank.
|
||||
* @param is_render Whether or not the surface is a render target.
|
||||
**/
|
||||
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
|
||||
const SurfaceParams& params, bool preserve_contents,
|
||||
bool is_render) {
|
||||
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
|
||||
const SurfaceParams& params, bool is_render) {
|
||||
// Step 1
|
||||
// Check Level 1 Cache for a fast structural match. If candidate surface
|
||||
// matches at certain level we are pretty much done.
|
||||
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
|
||||
if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
|
||||
TSurface& current_surface = iter->second;
|
||||
const auto topological_result = current_surface->MatchesTopology(params);
|
||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||
std::vector<TSurface> overlaps{current_surface};
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
topological_result);
|
||||
return RecycleSurface(overlaps, params, gpu_addr, topological_result);
|
||||
}
|
||||
|
||||
const auto struct_result = current_surface->MatchesStructure(params);
|
||||
@@ -753,11 +736,11 @@ private:
|
||||
// Step 2
|
||||
// Obtain all possible overlaps in the memory region
|
||||
const std::size_t candidate_size = params.GetGuestSizeInBytes();
|
||||
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
|
||||
auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
|
||||
|
||||
// If none are found, we are done. we just load the surface and create it.
|
||||
if (overlaps.empty()) {
|
||||
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||
return InitializeSurface(gpu_addr, params);
|
||||
}
|
||||
|
||||
// Step 3
|
||||
@@ -767,15 +750,13 @@ private:
|
||||
for (const auto& surface : overlaps) {
|
||||
const auto topological_result = surface->MatchesTopology(params);
|
||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
topological_result);
|
||||
return RecycleSurface(overlaps, params, gpu_addr, topological_result);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's a 3D texture
|
||||
if (params.block_depth > 0) {
|
||||
auto surface =
|
||||
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
|
||||
auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
|
||||
if (surface) {
|
||||
return *surface;
|
||||
}
|
||||
@@ -795,8 +776,7 @@ private:
|
||||
return *view;
|
||||
}
|
||||
}
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
MatchTopologyResult::FullMatch);
|
||||
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
|
||||
}
|
||||
// Now we check if the candidate is a mipmap/layer of the overlap
|
||||
std::optional<TView> view =
|
||||
@@ -820,7 +800,7 @@ private:
|
||||
pair.first->EmplaceView(params, gpu_addr, candidate_size);
|
||||
if (mirage_view)
|
||||
return {pair.first, *mirage_view};
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
return RecycleSurface(overlaps, params, gpu_addr,
|
||||
MatchTopologyResult::FullMatch);
|
||||
}
|
||||
return {current_surface, *view};
|
||||
@@ -836,8 +816,7 @@ private:
|
||||
}
|
||||
}
|
||||
// We failed all the tests, recycle the overlaps into a new texture.
|
||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||
MatchTopologyResult::FullMatch);
|
||||
return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -850,16 +829,16 @@ private:
|
||||
* @param params The parameters on the candidate surface.
|
||||
**/
|
||||
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
|
||||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||
const std::optional<VAddr> cpu_addr =
|
||||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
|
||||
|
||||
if (!cache_addr) {
|
||||
if (!cpu_addr) {
|
||||
Deduction result{};
|
||||
result.type = DeductionType::DeductionFailed;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
|
||||
if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
|
||||
TSurface& current_surface = iter->second;
|
||||
const auto topological_result = current_surface->MatchesTopology(params);
|
||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||
@@ -878,7 +857,7 @@ private:
|
||||
}
|
||||
|
||||
const std::size_t candidate_size = params.GetGuestSizeInBytes();
|
||||
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
|
||||
auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
|
||||
|
||||
if (overlaps.empty()) {
|
||||
Deduction result{};
|
||||
@@ -995,10 +974,10 @@ private:
|
||||
}
|
||||
|
||||
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||
bool preserve_contents) {
|
||||
bool do_load = true) {
|
||||
auto new_surface{GetUncachedSurface(gpu_addr, params)};
|
||||
Register(new_surface);
|
||||
if (preserve_contents) {
|
||||
if (do_load) {
|
||||
LoadSurface(new_surface);
|
||||
}
|
||||
return {new_surface, new_surface->GetMainView()};
|
||||
@@ -1022,10 +1001,10 @@ private:
|
||||
}
|
||||
|
||||
void RegisterInnerCache(TSurface& surface) {
|
||||
const CacheAddr cache_addr = surface->GetCacheAddr();
|
||||
CacheAddr start = cache_addr >> registry_page_bits;
|
||||
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
|
||||
l1_cache[cache_addr] = surface;
|
||||
const VAddr cpu_addr = surface->GetCpuAddr();
|
||||
VAddr start = cpu_addr >> registry_page_bits;
|
||||
const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
|
||||
l1_cache[cpu_addr] = surface;
|
||||
while (start <= end) {
|
||||
registry[start].push_back(surface);
|
||||
start++;
|
||||
@@ -1033,10 +1012,10 @@ private:
|
||||
}
|
||||
|
||||
void UnregisterInnerCache(TSurface& surface) {
|
||||
const CacheAddr cache_addr = surface->GetCacheAddr();
|
||||
CacheAddr start = cache_addr >> registry_page_bits;
|
||||
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
|
||||
l1_cache.erase(cache_addr);
|
||||
const VAddr cpu_addr = surface->GetCpuAddr();
|
||||
VAddr start = cpu_addr >> registry_page_bits;
|
||||
const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
|
||||
l1_cache.erase(cpu_addr);
|
||||
while (start <= end) {
|
||||
auto& reg{registry[start]};
|
||||
reg.erase(std::find(reg.begin(), reg.end(), surface));
|
||||
@@ -1044,18 +1023,18 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
|
||||
std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
const CacheAddr cache_addr_end = cache_addr + size;
|
||||
CacheAddr start = cache_addr >> registry_page_bits;
|
||||
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
|
||||
const VAddr cpu_addr_end = cpu_addr + size;
|
||||
VAddr start = cpu_addr >> registry_page_bits;
|
||||
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
|
||||
std::vector<TSurface> surfaces;
|
||||
while (start <= end) {
|
||||
std::vector<TSurface>& list = registry[start];
|
||||
for (auto& surface : list) {
|
||||
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
|
||||
if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
|
||||
surface->MarkAsPicked(true);
|
||||
surfaces.push_back(surface);
|
||||
}
|
||||
@@ -1144,14 +1123,14 @@ private:
|
||||
// large in size.
|
||||
static constexpr u64 registry_page_bits{20};
|
||||
static constexpr u64 registry_page_size{1 << registry_page_bits};
|
||||
std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
|
||||
std::unordered_map<VAddr, std::vector<TSurface>> registry;
|
||||
|
||||
static constexpr u32 DEPTH_RT = 8;
|
||||
static constexpr u32 NO_RT = 0xFFFFFFFF;
|
||||
|
||||
// The L1 Cache is used for fast texture lookup before checking the overlaps
|
||||
// This avoids calculating size and other stuffs.
|
||||
std::unordered_map<CacheAddr, TSurface> l1_cache;
|
||||
std::unordered_map<VAddr, TSurface> l1_cache;
|
||||
|
||||
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
|
||||
/// previously been used. This is to prevent surfaces from being constantly created and
|
||||
|
||||
80
src/video_core/textures/texture.cpp
Normal file
80
src/video_core/textures/texture.cpp
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
|
||||
#include "core/settings.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Texture {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
|
||||
0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
|
||||
0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
|
||||
0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
|
||||
0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
|
||||
0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
|
||||
0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
|
||||
0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
|
||||
0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
|
||||
0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
|
||||
0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
|
||||
0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
|
||||
0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
|
||||
0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
|
||||
0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
|
||||
0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
|
||||
0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
|
||||
0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
|
||||
0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
|
||||
0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
|
||||
0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
|
||||
0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
|
||||
0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
|
||||
0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
|
||||
0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
|
||||
0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
|
||||
0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
|
||||
0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
|
||||
0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
|
||||
0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
|
||||
0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
|
||||
0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
|
||||
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
|
||||
};
|
||||
|
||||
unsigned SettingsMinimumAnisotropy() noexcept {
|
||||
switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
|
||||
default:
|
||||
case Anisotropy::Default:
|
||||
return 1U;
|
||||
case Anisotropy::Filter2x:
|
||||
return 2U;
|
||||
case Anisotropy::Filter4x:
|
||||
return 4U;
|
||||
case Anisotropy::Filter8x:
|
||||
return 8U;
|
||||
case Anisotropy::Filter16x:
|
||||
return 16U;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
|
||||
if (!srgb_conversion) {
|
||||
return border_color;
|
||||
}
|
||||
return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
|
||||
SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
|
||||
}
|
||||
|
||||
float TSCEntry::GetMaxAnisotropy() const noexcept {
|
||||
return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
|
||||
}
|
||||
|
||||
} // namespace Tegra::Texture
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace Tegra::Texture {
|
||||
|
||||
@@ -336,24 +335,9 @@ struct TSCEntry {
|
||||
std::array<u8, 0x20> raw;
|
||||
};
|
||||
|
||||
float GetMaxAnisotropy() const {
|
||||
const u32 min_value = [] {
|
||||
switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
|
||||
default:
|
||||
case Anisotropy::Default:
|
||||
return 1U;
|
||||
case Anisotropy::Filter2x:
|
||||
return 2U;
|
||||
case Anisotropy::Filter4x:
|
||||
return 4U;
|
||||
case Anisotropy::Filter8x:
|
||||
return 8U;
|
||||
case Anisotropy::Filter16x:
|
||||
return 16U;
|
||||
}
|
||||
}();
|
||||
return static_cast<float>(std::max(1U << max_anisotropy, min_value));
|
||||
}
|
||||
std::array<float, 4> GetBorderColor() const noexcept;
|
||||
|
||||
float GetMaxAnisotropy() const noexcept;
|
||||
|
||||
float GetMinLod() const {
|
||||
return static_cast<float>(min_lod_clamp) / 256.0f;
|
||||
@@ -368,15 +352,6 @@ struct TSCEntry {
|
||||
constexpr u32 mask = 1U << (13 - 1);
|
||||
return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
|
||||
}
|
||||
|
||||
std::array<float, 4> GetBorderColor() const {
|
||||
if (srgb_conversion) {
|
||||
return {static_cast<float>(srgb_border_color_r) / 255.0f,
|
||||
static_cast<float>(srgb_border_color_g) / 255.0f,
|
||||
static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
|
||||
}
|
||||
return border_color;
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
|
||||
|
||||
|
||||
@@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
|
||||
target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets)
|
||||
target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
|
||||
|
||||
if (ENABLE_VULKAN AND NOT WIN32)
|
||||
target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
target_compile_definitions(yuzu PRIVATE
|
||||
# Use QStringBuilder for string concatenation to reduce
|
||||
# the overall number of temporary strings created.
|
||||
|
||||
@@ -3,15 +3,22 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <QIcon>
|
||||
#include <fmt/format.h>
|
||||
#include "common/scm_rev.h"
|
||||
#include "ui_aboutdialog.h"
|
||||
#include "yuzu/about_dialog.h"
|
||||
|
||||
AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
|
||||
const auto build_id = std::string(Common::g_build_id);
|
||||
const auto fmt = std::string(Common::g_title_bar_format_idle);
|
||||
const auto yuzu_build_version =
|
||||
fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
|
||||
std::string{}, std::string{}, std::string{}, build_id);
|
||||
|
||||
ui->setupUi(this);
|
||||
ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
|
||||
ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
|
||||
QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
|
||||
QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch),
|
||||
QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
#include <QScreen>
|
||||
#include <QStringList>
|
||||
#include <QWindow>
|
||||
#ifdef HAS_VULKAN
|
||||
#include <QVulkanWindow>
|
||||
|
||||
#if !defined(WIN32) && HAS_VULKAN
|
||||
#include <qpa/qplatformnativeinterface.h>
|
||||
#endif
|
||||
|
||||
#include <fmt/format.h>
|
||||
@@ -224,7 +225,6 @@ public:
|
||||
}
|
||||
|
||||
context->MakeCurrent();
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
if (Core::System::GetInstance().Renderer().TryPresent(100)) {
|
||||
context->SwapBuffers();
|
||||
glFinish();
|
||||
@@ -238,16 +238,50 @@ private:
|
||||
#ifdef HAS_VULKAN
|
||||
class VulkanRenderWidget : public RenderWidget {
|
||||
public:
|
||||
explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance)
|
||||
: RenderWidget(parent) {
|
||||
explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {
|
||||
windowHandle()->setSurfaceType(QWindow::VulkanSurface);
|
||||
windowHandle()->setVulkanInstance(instance);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread)
|
||||
: QWidget(parent_), emu_thread(emu_thread) {
|
||||
static Core::Frontend::WindowSystemType GetWindowSystemType() {
|
||||
// Determine WSI type based on Qt platform.
|
||||
QString platform_name = QGuiApplication::platformName();
|
||||
if (platform_name == QStringLiteral("windows"))
|
||||
return Core::Frontend::WindowSystemType::Windows;
|
||||
else if (platform_name == QStringLiteral("xcb"))
|
||||
return Core::Frontend::WindowSystemType::X11;
|
||||
else if (platform_name == QStringLiteral("wayland"))
|
||||
return Core::Frontend::WindowSystemType::Wayland;
|
||||
|
||||
LOG_CRITICAL(Frontend, "Unknown Qt platform!");
|
||||
return Core::Frontend::WindowSystemType::Windows;
|
||||
}
|
||||
|
||||
static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
|
||||
Core::Frontend::EmuWindow::WindowSystemInfo wsi;
|
||||
wsi.type = GetWindowSystemType();
|
||||
|
||||
#ifdef HAS_VULKAN
|
||||
// Our Win32 Qt external doesn't have the private API.
|
||||
#if defined(WIN32) || defined(__APPLE__)
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#else
|
||||
QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
|
||||
wsi.display_connection = pni->nativeResourceForWindow("display", window);
|
||||
if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
|
||||
wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
|
||||
else
|
||||
wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
|
||||
#endif
|
||||
wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
|
||||
#endif
|
||||
|
||||
return wsi;
|
||||
}
|
||||
|
||||
GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_)
|
||||
: QWidget(parent_), emu_thread(emu_thread_) {
|
||||
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
|
||||
.arg(QString::fromUtf8(Common::g_build_name),
|
||||
QString::fromUtf8(Common::g_scm_branch),
|
||||
@@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Update the Window System information with the new render target
|
||||
window_info = GetWindowSystemInfo(child_widget->windowHandle());
|
||||
|
||||
child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
|
||||
layout()->addWidget(child_widget);
|
||||
// Reset minimum required size to avoid resizing issues on the main window after restarting.
|
||||
@@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() {
|
||||
|
||||
bool GRenderWindow::InitializeVulkan() {
|
||||
#ifdef HAS_VULKAN
|
||||
vk_instance = std::make_unique<QVulkanInstance>();
|
||||
vk_instance->setApiVersion(QVersionNumber(1, 1, 0));
|
||||
vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect);
|
||||
if (Settings::values.renderer_debug) {
|
||||
const auto supported_layers{vk_instance->supportedLayers()};
|
||||
const bool found =
|
||||
std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) {
|
||||
constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation";
|
||||
return layer.name == searched_layer;
|
||||
});
|
||||
if (found) {
|
||||
vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation");
|
||||
vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
}
|
||||
if (!vk_instance->create()) {
|
||||
QMessageBox::critical(
|
||||
this, tr("Error while initializing Vulkan 1.1!"),
|
||||
tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the "
|
||||
"latest graphics drivers."));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto child = new VulkanRenderWidget(this, vk_instance.get());
|
||||
auto child = new VulkanRenderWidget(this);
|
||||
child_widget = child;
|
||||
child_widget->windowHandle()->create();
|
||||
main_context = std::make_unique<DummyContext>();
|
||||
@@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() {
|
||||
#endif
|
||||
}
|
||||
|
||||
void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const {
|
||||
#ifdef HAS_VULKAN
|
||||
const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr");
|
||||
const VkInstance instance_copy = vk_instance->vkInstance();
|
||||
const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle());
|
||||
|
||||
std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
|
||||
std::memcpy(instance, &instance_copy, sizeof(instance_copy));
|
||||
std::memcpy(surface, &surface_copy, sizeof(surface_copy));
|
||||
#else
|
||||
UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan");
|
||||
#endif
|
||||
}
|
||||
|
||||
bool GRenderWindow::LoadOpenGL() {
|
||||
auto context = CreateSharedContext();
|
||||
auto scope = context->Acquire();
|
||||
|
||||
@@ -22,9 +22,6 @@ class GMainWindow;
|
||||
class QKeyEvent;
|
||||
class QTouchEvent;
|
||||
class QStringList;
|
||||
#ifdef HAS_VULKAN
|
||||
class QVulkanInstance;
|
||||
#endif
|
||||
|
||||
namespace VideoCore {
|
||||
enum class LoadCallbackStage;
|
||||
@@ -122,8 +119,6 @@ public:
|
||||
// EmuWindow implementation.
|
||||
void PollEvents() override;
|
||||
bool IsShown() const override;
|
||||
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const override;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
|
||||
|
||||
void BackupGeometry();
|
||||
@@ -186,10 +181,6 @@ private:
|
||||
// should instead be shared from
|
||||
std::shared_ptr<Core::Frontend::GraphicsContext> main_context;
|
||||
|
||||
#ifdef HAS_VULKAN
|
||||
std::unique_ptr<QVulkanInstance> vk_instance;
|
||||
#endif
|
||||
|
||||
/// Temporary storage of the screenshot taken
|
||||
QImage screenshot_image;
|
||||
|
||||
|
||||
@@ -15,6 +15,10 @@
|
||||
#include "ui_configure_graphics.h"
|
||||
#include "yuzu/configuration/configure_graphics.h"
|
||||
|
||||
#ifdef HAS_VULKAN
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
enum class Resolution : int {
|
||||
Auto,
|
||||
@@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
|
||||
|
||||
void ConfigureGraphics::RetrieveVulkanDevices() {
|
||||
#ifdef HAS_VULKAN
|
||||
QVulkanInstance instance;
|
||||
instance.setApiVersion(QVersionNumber(1, 1, 0));
|
||||
if (!instance.create()) {
|
||||
LOG_INFO(Frontend, "Vulkan 1.1 not available");
|
||||
return;
|
||||
}
|
||||
const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
|
||||
instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))};
|
||||
if (vkEnumeratePhysicalDevices == nullptr) {
|
||||
LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices");
|
||||
return;
|
||||
}
|
||||
u32 physical_device_count;
|
||||
if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) !=
|
||||
VK_SUCCESS) {
|
||||
LOG_INFO(Frontend, "Failed to get physical devices count");
|
||||
return;
|
||||
}
|
||||
std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
|
||||
if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count,
|
||||
physical_devices.data()) != VK_SUCCESS) {
|
||||
LOG_INFO(Frontend, "Failed to get physical devices");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
|
||||
instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))};
|
||||
if (vkGetPhysicalDeviceProperties == nullptr) {
|
||||
LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties");
|
||||
return;
|
||||
}
|
||||
for (const auto physical_device : physical_devices) {
|
||||
VkPhysicalDeviceProperties properties;
|
||||
vkGetPhysicalDeviceProperties(physical_device, &properties);
|
||||
vulkan_devices.push_back(QString::fromUtf8(properties.deviceName));
|
||||
vulkan_devices.clear();
|
||||
for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
|
||||
vulkan_devices.push_back(QString::fromStdString(name));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick(
|
||||
button->setText(tr("[press key]"));
|
||||
button->setFocus();
|
||||
|
||||
const auto iter = std::find(button_map.begin(), button_map.end(), button);
|
||||
ASSERT(iter != button_map.end());
|
||||
const auto index = std::distance(button_map.begin(), iter);
|
||||
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
|
||||
// Keyboard keys can only be used as button devices
|
||||
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
|
||||
if (want_keyboard_keys) {
|
||||
const auto iter = std::find(button_map.begin(), button_map.end(), button);
|
||||
ASSERT(iter != button_map.end());
|
||||
const auto index = std::distance(button_map.begin(), iter);
|
||||
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
|
||||
}
|
||||
|
||||
input_setter = new_input_setter;
|
||||
|
||||
device_pollers = InputCommon::Polling::GetPollers(type);
|
||||
|
||||
// Keyboard keys can only be used as button devices
|
||||
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
|
||||
|
||||
for (auto& poller : device_pollers) {
|
||||
poller->Start();
|
||||
}
|
||||
|
||||
@@ -927,7 +927,7 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<item row="0" column="2">
|
||||
<layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout">
|
||||
@@ -949,7 +949,7 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<item row="1" column="2">
|
||||
<layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout">
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout">
|
||||
|
||||
@@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) {
|
||||
// - Open any dialogs
|
||||
// - Block in any way
|
||||
|
||||
constexpr std::size_t PLAYER_0_INDEX = 0;
|
||||
constexpr std::size_t HANDHELD_INDEX = 8;
|
||||
|
||||
void HandheldOnProfileSelect() {
|
||||
@@ -53,8 +54,8 @@ void HandheldOnProfileSelect() {
|
||||
}
|
||||
|
||||
void DualJoyconsDockedOnProfileSelect() {
|
||||
Settings::values.players[0].connected = true;
|
||||
Settings::values.players[0].type = Settings::ControllerType::DualJoycon;
|
||||
Settings::values.players[PLAYER_0_INDEX].connected = true;
|
||||
Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon;
|
||||
|
||||
for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) {
|
||||
Settings::values.players[player].connected = false;
|
||||
@@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() {
|
||||
Settings::values.keyboard_enabled = false;
|
||||
Settings::values.mouse_enabled = false;
|
||||
Settings::values.debug_pad_enabled = false;
|
||||
Settings::values.touchscreen.enabled = false;
|
||||
Settings::values.touchscreen.enabled = true;
|
||||
}
|
||||
|
||||
// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure
|
||||
@@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{
|
||||
}},
|
||||
{QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect,
|
||||
[](ConfigureInputSimple* caller) {
|
||||
CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false);
|
||||
CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false);
|
||||
}},
|
||||
{QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>},
|
||||
}};
|
||||
|
||||
@@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick(
|
||||
button->setText(tr("[press key]"));
|
||||
button->setFocus();
|
||||
|
||||
const auto iter = std::find(button_map.begin(), button_map.end(), button);
|
||||
ASSERT(iter != button_map.end());
|
||||
const auto index = std::distance(button_map.begin(), iter);
|
||||
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
|
||||
// Keyboard keys can only be used as button devices
|
||||
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
|
||||
if (want_keyboard_keys) {
|
||||
const auto iter = std::find(button_map.begin(), button_map.end(), button);
|
||||
ASSERT(iter != button_map.end());
|
||||
const auto index = std::distance(button_map.begin(), iter);
|
||||
ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
|
||||
}
|
||||
|
||||
input_setter = new_input_setter;
|
||||
|
||||
device_pollers = InputCommon::Polling::GetPollers(type);
|
||||
|
||||
// Keyboard keys can only be used as button devices
|
||||
want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
|
||||
|
||||
for (auto& poller : device_pollers) {
|
||||
poller->Start();
|
||||
}
|
||||
|
||||
@@ -205,7 +205,13 @@ GMainWindow::GMainWindow()
|
||||
ConnectMenuEvents();
|
||||
ConnectWidgetEvents();
|
||||
|
||||
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
|
||||
const auto build_id = std::string(Common::g_build_id);
|
||||
const auto fmt = std::string(Common::g_title_bar_format_idle);
|
||||
const auto yuzu_build_version =
|
||||
fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
|
||||
std::string{}, std::string{}, std::string{}, build_id);
|
||||
|
||||
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
|
||||
Common::g_scm_desc);
|
||||
#ifdef ARCHITECTURE_x86_64
|
||||
LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
|
||||
|
||||
@@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() {
|
||||
SDL_GL_DeleteContext(window_context);
|
||||
}
|
||||
|
||||
void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const {
|
||||
// Should not have been called from OpenGL
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const {
|
||||
return std::make_unique<SDLGLContext>();
|
||||
}
|
||||
|
||||
@@ -15,10 +15,6 @@ public:
|
||||
|
||||
void Present() override;
|
||||
|
||||
/// Ignored in OpenGL
|
||||
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const override;
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
|
||||
|
||||
private:
|
||||
|
||||
@@ -2,102 +2,62 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <SDL.h>
|
||||
#include <SDL_vulkan.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scm_rev.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
|
||||
|
||||
// Include these late to avoid polluting everything with Xlib macros
|
||||
#include <SDL.h>
|
||||
#include <SDL_syswm.h>
|
||||
|
||||
EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
|
||||
: EmuWindow_SDL2{system, fullscreen} {
|
||||
if (SDL_Vulkan_LoadLibrary(nullptr) != 0) {
|
||||
LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError());
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
vkGetInstanceProcAddr =
|
||||
reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr());
|
||||
if (vkGetInstanceProcAddr == nullptr) {
|
||||
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name,
|
||||
Common::g_scm_branch, Common::g_scm_desc);
|
||||
render_window =
|
||||
SDL_CreateWindow(window_title.c_str(),
|
||||
SDL_WINDOWPOS_UNDEFINED, // x position
|
||||
SDL_WINDOWPOS_UNDEFINED, // y position
|
||||
SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
|
||||
Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
|
||||
SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN);
|
||||
SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
|
||||
|
||||
const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr);
|
||||
|
||||
u32 extra_ext_count{};
|
||||
if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) {
|
||||
LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}",
|
||||
SDL_GetError());
|
||||
exit(1);
|
||||
SDL_SysWMinfo wm;
|
||||
if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) {
|
||||
LOG_CRITICAL(Frontend, "Failed to get information from the window manager");
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count);
|
||||
if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) {
|
||||
LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError());
|
||||
exit(1);
|
||||
}
|
||||
std::vector<const char*> enabled_extensions;
|
||||
enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(),
|
||||
extra_ext_names.get() + extra_ext_count);
|
||||
|
||||
std::vector<const char*> enabled_layers;
|
||||
if (use_standard_layers) {
|
||||
enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation");
|
||||
}
|
||||
|
||||
VkApplicationInfo app_info{};
|
||||
app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||
app_info.apiVersion = VK_API_VERSION_1_1;
|
||||
app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
|
||||
app_info.pApplicationName = "yuzu-emu";
|
||||
app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
|
||||
app_info.pEngineName = "yuzu-emu";
|
||||
|
||||
VkInstanceCreateInfo instance_ci{};
|
||||
instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
instance_ci.pApplicationInfo = &app_info;
|
||||
instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size());
|
||||
instance_ci.ppEnabledExtensionNames = enabled_extensions.data();
|
||||
if (Settings::values.renderer_debug) {
|
||||
instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size());
|
||||
instance_ci.ppEnabledLayerNames = enabled_layers.data();
|
||||
}
|
||||
|
||||
const auto vkCreateInstance =
|
||||
reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance"));
|
||||
if (vkCreateInstance == nullptr ||
|
||||
vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
|
||||
vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance"));
|
||||
if (vkDestroyInstance == nullptr) {
|
||||
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) {
|
||||
LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError());
|
||||
exit(EXIT_FAILURE);
|
||||
switch (wm.subsystem) {
|
||||
#ifdef SDL_VIDEO_DRIVER_WINDOWS
|
||||
case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
|
||||
window_info.type = Core::Frontend::WindowSystemType::Windows;
|
||||
window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window);
|
||||
break;
|
||||
#endif
|
||||
#ifdef SDL_VIDEO_DRIVER_X11
|
||||
case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
|
||||
window_info.type = Core::Frontend::WindowSystemType::X11;
|
||||
window_info.display_connection = wm.info.x11.display;
|
||||
window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window);
|
||||
break;
|
||||
#endif
|
||||
#ifdef SDL_VIDEO_DRIVER_WAYLAND
|
||||
case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
|
||||
window_info.type = Core::Frontend::WindowSystemType::Wayland;
|
||||
window_info.display_connection = wm.info.wl.display;
|
||||
window_info.render_surface = wm.info.wl.surface;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
LOG_CRITICAL(Frontend, "Window manager subsystem not implemented");
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
OnResize();
|
||||
@@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
|
||||
Common::g_scm_branch, Common::g_scm_desc);
|
||||
}
|
||||
|
||||
EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() {
|
||||
vkDestroyInstance(vk_instance, nullptr);
|
||||
}
|
||||
|
||||
void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const {
|
||||
const auto instance_proc_addr = vkGetInstanceProcAddr;
|
||||
std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
|
||||
std::memcpy(instance, &vk_instance, sizeof(vk_instance));
|
||||
std::memcpy(surface, &vk_surface, sizeof(vk_surface));
|
||||
}
|
||||
EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default;
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const {
|
||||
if (!Settings::values.renderer_debug) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto vkEnumerateInstanceLayerProperties =
|
||||
reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>(
|
||||
vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties"));
|
||||
if (vkEnumerateInstanceLayerProperties == nullptr) {
|
||||
LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 available_layers_count{};
|
||||
if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
|
||||
return false;
|
||||
}
|
||||
std::vector<VkLayerProperties> layers(available_layers_count);
|
||||
if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) {
|
||||
LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
|
||||
return false;
|
||||
}
|
||||
|
||||
return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) {
|
||||
return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation");
|
||||
}) != layers.end();
|
||||
}
|
||||
|
||||
void EmuWindow_SDL2_VK::Present() {
|
||||
// TODO (bunnei): ImplementMe
|
||||
}
|
||||
|
||||
@@ -4,27 +4,21 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <memory>
|
||||
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
|
||||
public:
|
||||
explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen);
|
||||
~EmuWindow_SDL2_VK();
|
||||
|
||||
void Present() override;
|
||||
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const override;
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
|
||||
|
||||
private:
|
||||
bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const;
|
||||
|
||||
VkInstance vk_instance{};
|
||||
VkSurfaceKHR vk_surface{};
|
||||
|
||||
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
|
||||
PFN_vkDestroyInstance vkDestroyInstance{};
|
||||
};
|
||||
|
||||
@@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
class SDLGLContext : public Core::Frontend::GraphicsContext {
|
||||
public:
|
||||
explicit SDLGLContext() {
|
||||
|
||||
@@ -19,10 +19,6 @@ public:
|
||||
/// Whether the screen is being shown or not.
|
||||
bool IsShown() const override;
|
||||
|
||||
/// Retrieves Vulkan specific handlers from the window
|
||||
void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
|
||||
void* surface) const override;
|
||||
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
|
||||
|
||||
private:
|
||||
|
||||
Reference in New Issue
Block a user