Compare commits

..

24 Commits

Author SHA1 Message Date
Fernando Sahmkow
1c13c74295 Memory manager: Fix possible softlock 2023-05-04 00:15:21 +02:00
bunnei
a661c547d8 Merge pull request #10088 from FernandoS27/100-gelato-flavor-test-builds-later
Y.F.C Implement Asynchronous Fence manager and Rework Query async downloads
2023-05-03 15:10:22 -07:00
bunnei
737e1ca101 Merge pull request #10117 from liamwhite/sync-register
kernel: match calls to Register and Unregister
2023-05-03 09:07:19 -07:00
liamwhite
ffeb5cdd8d Merge pull request #10151 from GPUCode/no-softlocks-please
Fix softlocks when disabling async present
2023-05-03 10:54:24 -04:00
Morph
3ba95402fd Merge pull request #10146 from liamwhite/catch3
catch2: update to 3.3.1
2023-05-03 10:53:12 -04:00
Morph
8dd3baa562 Merge pull request #10144 from liamwhite/dont-turbo
vulkan: disable turbo when debugging tool is attached
2023-05-03 10:53:03 -04:00
Morph
daf7936095 Merge pull request #10143 from liamwhite/fruit-company-moment
video_core: fix build on Apple Clang
2023-05-03 10:52:56 -04:00
Morph
627022bef9 Merge pull request #10124 from liamwhite/pebkac
settings: rename extended memory layout to unsafe, move from general to system
2023-05-03 10:52:45 -04:00
GPUCode
f3fcc15ad5 vk_present_manager: Fix softlocks when disabling async present 2023-05-03 07:50:10 +03:00
bunnei
8f43b05d6b Merge pull request #9973 from GPUCode/async-present
Implement asynchronous presentation
2023-05-02 17:54:57 -07:00
Liam
4df49631de vulkan: disable turbo when debugging tool is attached 2023-05-02 18:14:57 -04:00
Liam
e1c74cea10 video_core: fix build on Apple Clang 2023-05-02 18:05:30 -04:00
GPUCode
f403d27941 vk_present_manager: Add toggle for async presentation 2023-05-01 23:13:24 +03:00
GPUCode
1d7abac84b vk_blit_screen: Recreate FSR when frame is recreated
* Depends on the layout dimentions and thus should be recreated as well
2023-05-01 23:13:24 +03:00
GPUCode
50791cb974 renderer_vulkan: Fix crashing when updating descriptors
* During pipeline configure the function would acquire some payload space from the descriptor update queue,
  write the descriptor data on the GPU thread and give the scheduler a pointer to the beginning of said space to update it later.
  TickFrame resets the payload cursor, used to track acquires, back to the beginning of the buffer.
  This wasn't a problem before since WaitWorker was called at the end of the frame but now it is.
  If a frame writes to a cursor before the scheduler catches up, it will crash

* To fix this the payload buffer has been increased to account for the in flight frames that are allowed to exist now.
  TickFrame will switch between the payload spaces instead of resetting
2023-05-01 23:13:24 +03:00
GPUCode
2ad9acf795 renderer_vulkan: Async presentation 2023-05-01 23:13:24 +03:00
Liam
2cd9e1ecb6 settings: rename extended memory layout to unsafe, move from general to system 2023-04-30 14:24:22 -04:00
Liam
1b5c87ab6a kernel: match calls to Register and Unregister 2023-04-29 21:52:26 -04:00
Fernando Sahmkow
2f15876524 QueryCache: Fix write invalidation. 2023-04-28 23:53:46 +02:00
Fernando Sahmkow
9a7c172f76 MemoryManager: Fix race conditions. 2023-04-28 23:53:02 +02:00
Fernando Sahmkow
e4dc73f61e Clang format and ddress feedback 2023-04-24 12:38:47 +02:00
Fernando Sahmkow
e29ced29fa QueryCache: rework async downloads. 2023-04-23 22:04:14 +02:00
Fernando Sahmkow
7e76c1642c Accuracy Normal: reduce accuracy further for perf improvements in Project Lime 2023-04-23 22:03:44 +02:00
Fernando Sahmkow
fca72beb2d Fence Manager: implement async fence management in a sepparate thread. 2023-04-23 04:48:50 +02:00
55 changed files with 1156 additions and 347 deletions

View File

@@ -45,6 +45,7 @@ void LogSettings() {
log_setting("System_LanguageIndex", values.language_index.GetValue());
log_setting("System_RegionIndex", values.region_index.GetValue());
log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue());
log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
@@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Core
values.use_multi_core.SetGlobal(true);
values.use_extended_memory_layout.SetGlobal(true);
values.use_unsafe_extended_memory_layout.SetGlobal(true);
// CPU
values.cpu_accuracy.SetGlobal(true);
@@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.async_presentation.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.fullscreen_mode.SetGlobal(true);

View File

@@ -388,7 +388,8 @@ struct Values {
// Core
SwitchableSetting<bool> use_multi_core{true, "use_multi_core"};
SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"};
SwitchableSetting<bool> use_unsafe_extended_memory_layout{false,
"use_unsafe_extended_memory_layout"};
// Cpu
SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
@@ -422,6 +423,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> async_presentation{false, "async_presentation"};
SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};

View File

@@ -137,7 +137,7 @@ struct System::Impl {
device_memory = std::make_unique<Core::DeviceMemory>();
is_multicore = Settings::values.use_multi_core.GetValue();
extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
core_timing.SetMulticore(is_multicore);
core_timing.Initialize([&system]() { system.RegisterHostThread(); });
@@ -169,7 +169,7 @@ struct System::Impl {
void ReinitializeIfNecessary(System& system) {
const bool must_reinitialize =
is_multicore != Settings::values.use_multi_core.GetValue() ||
extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue();
if (!must_reinitialize) {
return;
@@ -178,7 +178,7 @@ struct System::Impl {
LOG_DEBUG(Kernel, "Re-initializing");
is_multicore = Settings::values.use_multi_core.GetValue();
extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue();
extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
Initialize(system);
}
@@ -293,6 +293,7 @@ struct System::Impl {
ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
Kernel::KProcess::ProcessType::Userland, resource_limit)
.IsSuccess());
Kernel::KProcess::Register(system.Kernel(), main_process);
kernel.MakeApplicationProcess(main_process);
const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
if (load_result != Loader::ResultStatus::Success) {

View File

@@ -35,12 +35,13 @@ namespace {
using namespace Common::Literals;
u32 GetMemorySizeForInit() {
return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB;
return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB
: Smc::MemorySize_4GB;
}
Smc::MemoryArrangement GetMemoryArrangeForInit() {
return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB
: Smc::MemoryArrangement_4GB;
return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB
: Smc::MemoryArrangement_4GB;
}
} // namespace

View File

@@ -182,8 +182,8 @@ public:
explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}
static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
const u64 lid = lhs.GetId();
const u64 rid = rhs.GetId();
const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));
if (lid < rid) {
return -1;

View File

@@ -95,7 +95,7 @@ struct KernelCore::Impl {
pt_heap_region.GetSize());
}
InitializeHackSharedMemory();
InitializeHackSharedMemory(kernel);
RegisterHostThread(nullptr);
}
@@ -216,10 +216,12 @@ struct KernelCore::Impl {
auto* main_thread{Kernel::KThread::Create(system.Kernel())};
main_thread->SetCurrentCore(core);
ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
KThread::Register(system.Kernel(), main_thread);
auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
idle_thread->SetCurrentCore(core);
ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
KThread::Register(system.Kernel(), idle_thread);
schedulers[i]->Initialize(main_thread, idle_thread, core);
}
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
const Core::Timing::CoreTiming& core_timing) {
system_resource_limit = KResourceLimit::Create(system.Kernel());
system_resource_limit->Initialize(&core_timing);
KResourceLimit::Register(kernel, system_resource_limit);
const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
const auto total_size{sizes.first};
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
core_id)
.IsSuccess());
KThread::Register(system.Kernel(), shutdown_threads[core_id]);
}
}
@@ -729,7 +733,7 @@ struct KernelCore::Impl {
memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
}
void InitializeHackSharedMemory() {
void InitializeHackSharedMemory(KernelCore& kernel) {
// Setup memory regions for emulated processes
// TODO(bunnei): These should not be hardcoded regions initialized within the kernel
constexpr std::size_t hid_size{0x40000};
@@ -746,14 +750,23 @@ struct KernelCore::Impl {
hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, hid_size);
KSharedMemory::Register(kernel, hid_shared_mem);
font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, font_size);
KSharedMemory::Register(kernel, font_shared_mem);
irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, irs_size);
KSharedMemory::Register(kernel, irs_shared_mem);
time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, time_size);
KSharedMemory::Register(kernel, time_shared_mem);
hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
Svc::MemoryPermission::Read, hidbus_size);
KSharedMemory::Register(kernel, hidbus_shared_mem);
}
std::mutex registered_objects_lock;
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
// Commit the thread reservation.
thread_reservation.Commit();
// Register the thread.
KThread::Register(kernel, thread);
return std::jthread(
[&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
// Set the thread name.
Common::SetCurrentThreadName(thread_name.c_str());
// Register the thread.
// Set the thread as current.
kernel.RegisterHostThread(thread);
// Run the callback.
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
// Ensure that we don't hold onto any extra references.
SCOPE_EXIT({ process->Close(); });
// Register the new process.
KProcess::Register(*this, process);
// Run the host thread.
return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
}
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
// Ensure that we don't hold onto any extra references.
SCOPE_EXIT({ process->Close(); });
// Register the new process.
KProcess::Register(*this, process);
// Reserve a new thread from the process resource limit.
KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
ASSERT(thread_reservation.Succeeded());
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
// Commit the thread reservation.
thread_reservation.Commit();
// Register the new thread.
KThread::Register(*this, thread);
// Begin running the thread.
ASSERT(R_SUCCEEDED(thread->Run()));
}

View File

@@ -156,6 +156,7 @@ public:
auto* session = Kernel::KSession::Create(kernel);
session->Initialize(nullptr, 0);
Kernel::KSession::Register(kernel, session);
auto next_manager = std::make_shared<Service::SessionRequestManager>(
kernel, manager->GetServerManager());

View File

@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
Kernel::KProcess::ProcessType::KernelInternal,
kernel.GetSystemResourceLimit())
.IsSuccess());
// Register the process.
Kernel::KProcess::Register(kernel, process);
process_created = true;
}

View File

@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
m_event = Kernel::KEvent::Create(system.Kernel());
m_event->Initialize(nullptr);
// Register the event.
Kernel::KEvent::Register(system.Kernel(), m_event);
ASSERT(R_SUCCEEDED(m_event->Signal()));
}

View File

@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
// Initialize event.
m_event = Kernel::KEvent::Create(system.Kernel());
m_event->Initialize(nullptr);
// Register event.
Kernel::KEvent::Register(system.Kernel(), m_event);
}
ServerManager::~ServerManager() {
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
// Initialize the event.
m_deferral_event->Initialize(nullptr);
// Register the event.
Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
// Set the output.
*out_event = m_deferral_event;

View File

@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
auto* port = Kernel::KPort::Create(kernel);
port->Initialize(ServerSessionCountMax, false, 0);
// Register the port.
Kernel::KPort::Register(kernel, port);
service_ports.emplace(name, port);
registered_services.emplace(name, handler);
if (deferral_event) {

View File

@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
// Commit the session reservation.
session_reservation.Commit();
// Register the session.
Kernel::KSession::Register(system.Kernel(), session);
// Register with server manager.
session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
session_manager);

View File

@@ -462,7 +462,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@@ -179,6 +179,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_present_manager.cpp
renderer_vulkan/vk_present_manager.h
renderer_vulkan/vk_query_cache.cpp
renderer_vulkan/vk_query_cache.h
renderer_vulkan/vk_rasterizer.cpp

View File

@@ -1426,7 +1426,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
.size = sub_size,
});
total_size_bytes += sub_size;
largest_copy = std::max(largest_copy, sub_size);
largest_copy = std::max<u64>(largest_copy, sub_size);
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);

View File

@@ -170,7 +170,8 @@ private:
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
while (remaining_size > 0) {
const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
if (manager) {
if constexpr (BOOL_BREAK) {
@@ -206,7 +207,8 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
while (remaining_size > 0) {
const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
const std::size_t copy_amount{
std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
auto* manager{top_tier[page_index]};
const auto execute = [&] {
auto [new_begin, new_end] = func(manager, page_offset, copy_amount);

View File

@@ -4,13 +4,20 @@
#pragma once
#include <algorithm>
#include <condition_variable>
#include <cstring>
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <queue>
#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
@@ -23,15 +30,26 @@ class FenceBase {
public:
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
bool IsStubbed() const {
return is_stubbed;
}
protected:
bool is_stubbed;
};
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
template <typename Traits>
class FenceManager {
using TFence = typename Traits::FenceType;
using TTextureCache = typename Traits::TextureCacheType;
using TBufferCache = typename Traits::BufferCacheType;
using TQueryCache = typename Traits::QueryCacheType;
static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
public:
/// Notify the fence manager about a new frame
void TickFrame() {
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Tick();
}
@@ -46,17 +64,33 @@ public:
}
void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences();
rasterizer.InvalidateGPUCache();
bool delay_fence = Settings::IsGPULevelHigh();
if constexpr (!can_async_check) {
TryReleasePendingFences<false>();
}
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
uncommitted_operations.emplace_back(std::move(func));
CommitOperations();
TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence);
if constexpr (can_async_check) {
guard.lock();
}
if (delay_fence) {
uncommitted_operations.emplace_back(std::move(func));
}
pending_operations.emplace_back(std::move(uncommitted_operations));
QueueFence(new_fence);
if (!delay_fence) {
func();
}
fences.push(std::move(new_fence));
if (should_flush) {
rasterizer.FlushCommands();
}
if constexpr (can_async_check) {
guard.unlock();
cv.notify_all();
}
}
void SignalSyncPoint(u32 value) {
@@ -66,29 +100,30 @@ public:
}
void WaitPendingFences() {
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
operation();
}
PopFence();
if constexpr (!can_async_check) {
TryReleasePendingFences<true>();
}
}
protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
TQueryCache& query_cache_)
: rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
if constexpr (can_async_check) {
fence_thread =
std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
}
}
virtual ~FenceManager() = default;
virtual ~FenceManager() {
if constexpr (can_async_check) {
fence_thread.request_stop();
cv.notify_all();
fence_thread.join();
}
}
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -104,15 +139,20 @@ protected:
Tegra::GPU& gpu;
Tegra::Host1x::SyncpointManager& syncpoint_manager;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
template <bool force_wait>
void TryReleasePendingFences() {
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
return;
if constexpr (force_wait) {
WaitFence(current_fence);
} else {
return;
}
}
PopAsyncFlushes();
auto operations = std::move(pending_operations.front());
@@ -120,7 +160,49 @@ private:
for (auto& operation : operations) {
operation();
}
PopFence();
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
fences.pop();
}
}
void ReleaseThreadFunc(std::stop_token stop_token) {
std::string name = "GPUFencingThread";
MicroProfileOnThreadCreate(name.c_str());
// Cleanup
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
Common::SetCurrentThreadName(name.c_str());
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
TFence current_fence;
std::deque<std::function<void()>> current_operations;
while (!stop_token.stop_requested()) {
{
std::unique_lock lock(guard);
cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
if (stop_token.stop_requested()) [[unlikely]] {
return;
}
current_fence = std::move(fences.front());
current_operations = std::move(pending_operations.front());
fences.pop();
pending_operations.pop_front();
}
if (!current_fence->IsStubbed()) {
WaitFence(current_fence);
}
PopAsyncFlushes();
for (auto& operation : current_operations) {
operation();
}
{
std::unique_lock lock(ring_guard);
delayed_destruction_ring.Push(std::move(current_fence));
}
}
}
@@ -154,19 +236,16 @@ private:
query_cache.CommitAsyncFlushes();
}
void PopFence() {
delayed_destruction_ring.Push(std::move(fences.front()));
fences.pop();
}
void CommitOperations() {
pending_operations.emplace_back(std::move(uncommitted_operations));
}
std::queue<TFence> fences;
std::deque<std::function<void()>> uncommitted_operations;
std::deque<std::deque<std::function<void()>>> pending_operations;
std::mutex guard;
std::mutex ring_guard;
std::condition_variable cv;
std::jthread fence_thread;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};

View File

@@ -82,6 +82,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
}
PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
std::unique_lock<std::mutex> lock(guard);
return kind_map.GetValueAt(gpu_addr);
}
@@ -160,7 +161,10 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
}
remaining_size -= big_page_size;
}
kind_map.Map(gpu_addr, gpu_addr + size, kind);
{
std::unique_lock<std::mutex> lock(guard);
kind_map.Map(gpu_addr, gpu_addr + size, kind);
}
return gpu_addr;
}
@@ -553,6 +557,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
}
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
std::unique_lock<std::mutex> lock(guard);
return kind_map.GetContinuousSizeFrom(gpu_addr);
}
@@ -745,10 +750,10 @@ void MemoryManager::FlushCaching() {
return;
}
accumulator->Callback([this](GPUVAddr addr, size_t size) {
GetSubmappedRangeImpl<false>(addr, size, page_stash);
GetSubmappedRangeImpl<false>(addr, size, page_stash2);
});
rasterizer->InnerInvalidation(page_stash);
page_stash.clear();
rasterizer->InnerInvalidation(page_stash2);
page_stash2.clear();
accumulator->Clear();
}

View File

@@ -5,6 +5,7 @@
#include <atomic>
#include <map>
#include <mutex>
#include <optional>
#include <vector>
@@ -215,6 +216,9 @@ private:
std::vector<u64> big_page_continuous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
mutable std::mutex guard;
static constexpr size_t continuous_bits = 64;

View File

@@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <cstring>
#include <functional>
#include <iterator>
#include <list>
#include <memory>
@@ -17,13 +18,19 @@
#include "common/assert.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/texture_cache/slot_vector.h"
namespace VideoCommon {
using AsyncJobId = SlotId;
static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
template <class QueryCache, class HostCounter>
class CounterStreamBase {
public:
@@ -93,9 +100,13 @@ private:
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
: rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_)
: rasterizer{rasterizer_},
cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {
(void)slot_async_jobs.insert(); // Null value
}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -126,10 +137,15 @@ public:
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushQuery(*cpu_addr);
auto result = query->BindCounter(Stream(type).Current(), timestamp);
if (result) {
auto async_job_id = query->GetAsyncJob();
auto& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = *result;
query->SetAsyncJob(NULL_ASYNC_JOB_ID);
}
AsyncFlushQuery(query, timestamp, lock);
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -173,15 +189,18 @@ public:
}
void CommitAsyncFlushes() {
std::unique_lock lock{mutex};
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
}
bool HasUncommittedFlushes() const {
std::unique_lock lock{mutex};
return uncommitted_flushes != nullptr;
}
bool ShouldWaitAsyncFlushes() const {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return false;
}
@@ -189,6 +208,7 @@ public:
}
void PopAsyncFlushes() {
std::unique_lock lock{mutex};
if (committed_flushes.empty()) {
return;
}
@@ -197,15 +217,25 @@ public:
committed_flushes.pop_front();
return;
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
for (AsyncJobId async_job_id : *flush_list) {
AsyncJob& async_job = slot_async_jobs[async_job_id];
if (!async_job.collected) {
FlushAndRemoveRegion(async_job.query_location, 2, true);
}
}
committed_flushes.pop_front();
}
private:
struct AsyncJob {
bool collected = false;
u64 value = 0;
VAddr query_location = 0;
std::optional<u64> timestamp{};
};
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
const u64 addr_begin = addr;
const u64 addr_end = addr_begin + size;
const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -226,7 +256,16 @@ private:
continue;
}
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
query.Flush();
AsyncJobId async_job_id = query.GetAsyncJob();
auto flush_result = query.Flush(async);
if (async_job_id == NULL_ASYNC_JOB_ID) {
ASSERT_MSG(false, "This should not be reachable at all");
continue;
}
AsyncJob& async_job = slot_async_jobs[async_job_id];
async_job.collected = true;
async_job.value = flush_result;
query.SetAsyncJob(NULL_ASYNC_JOB_ID);
}
std::erase_if(contents, in_range);
}
@@ -253,26 +292,60 @@ private:
return found != std::end(contents) ? &*found : nullptr;
}
void AsyncFlushQuery(VAddr addr) {
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<VAddr>>();
void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
std::unique_lock<std::recursive_mutex>& lock) {
const AsyncJobId new_async_job_id = slot_async_jobs.insert();
{
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
query->SetAsyncJob(new_async_job_id);
async_job.query_location = query->GetCpuAddr();
async_job.collected = false;
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
}
uncommitted_flushes->push_back(new_async_job_id);
}
uncommitted_flushes->push_back(addr);
lock.unlock();
std::function<void()> operation([this, new_async_job_id, timestamp] {
std::unique_lock local_lock{mutex};
AsyncJob& async_job = slot_async_jobs[new_async_job_id];
u64 value = async_job.value;
VAddr address = async_job.query_location;
slot_async_jobs.erase(new_async_job_id);
local_lock.unlock();
if (timestamp) {
u64 timestamp_value = *timestamp;
cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
VideoCommon::CacheType::NoQueryCache);
} else {
u32 small_value = static_cast<u32>(value);
cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
rasterizer.InvalidateRegion(address, sizeof(u32),
VideoCommon::CacheType::NoQueryCache);
}
});
rasterizer.SyncOperation(std::move(operation));
}
static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
SlotVector<AsyncJob> slot_async_jobs;
std::recursive_mutex mutex;
VideoCore::RasterizerInterface& rasterizer;
Core::Memory::Memory& cpu_memory;
mutable std::recursive_mutex mutex;
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes;
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
};
template <class QueryCache, class HostCounter>
@@ -291,12 +364,12 @@ public:
virtual ~HostCounterBase() = default;
/// Returns the current value of the query.
u64 Query() {
u64 Query(bool async = false) {
if (result) {
return *result;
}
u64 value = BlockingQuery() + base_result;
u64 value = BlockingQuery(async) + base_result;
if (dependency) {
value += dependency->Query();
dependency = nullptr;
@@ -317,7 +390,7 @@ public:
protected:
/// Returns the value of query from the backend API blocking as needed.
virtual u64 BlockingQuery() const = 0;
virtual u64 BlockingQuery(bool async = false) const = 0;
private:
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -340,26 +413,33 @@ public:
CachedQueryBase& operator=(const CachedQueryBase&) = delete;
/// Flushes the query to guest memory.
virtual void Flush() {
virtual u64 Flush(bool async = false) {
// When counter is nullptr it means that it's just been reset. We are supposed to write a
// zero in these cases.
const u64 value = counter ? counter->Query() : 0;
const u64 value = counter ? counter->Query(async) : 0;
if (async) {
return value;
}
std::memcpy(host_ptr, &value, sizeof(u64));
if (timestamp) {
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
}
return value;
}
/// Binds a counter to this query.
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
std::optional<u64> timestamp_) {
std::optional<u64> result{};
if (counter) {
// If there's an old counter set it means the query is being rewritten by the game.
// To avoid losing the data forever, flush here.
Flush();
result = std::make_optional(Flush());
}
counter = std::move(counter_);
timestamp = timestamp_;
return result;
}
VAddr GetCpuAddr() const noexcept {
@@ -374,6 +454,14 @@ public:
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
}
void SetAsyncJob(AsyncJobId assigned_async_job_) {
assigned_async_job = assigned_async_job_;
}
AsyncJobId GetAsyncJob() const {
return assigned_async_job;
}
protected:
/// Returns true when querying the counter may potentially block.
bool WaitPending() const noexcept {
@@ -389,6 +477,7 @@ private:
u8* host_ptr; ///< Writable host pointer.
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
AsyncJobId assigned_async_job;
};
} // namespace VideoCommon

View File

@@ -30,7 +30,17 @@ private:
};
using Fence = std::shared_ptr<GLInnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = false;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManagerOpenGL final : public GenericFenceManager {
public:

View File

@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
: QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
glEndQuery(GetTarget(type));
}
u64 HostCounter::BlockingQuery() const {
u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
GLint64 value;
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
return *this;
}
void CachedQuery::Flush() {
u64 CachedQuery::Flush([[maybe_unused]] bool async) {
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
// To avoid this disable and re-enable keeping the dependency stream.
// But we only have to do this if we have pending waits to be done.
@@ -106,11 +106,13 @@ void CachedQuery::Flush() {
stream.Update(false);
}
VideoCommon::CachedQueryBase<HostCounter>::Flush();
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
if (slice_counter) {
stream.Update(true);
}
return result;
}
} // namespace OpenGL

View File

@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(RasterizerOpenGL& rasterizer_);
explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery() const override;
u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
u64 Flush(bool async = false) override;
private:
QueryCache* cache;

View File

@@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache, texture_cache),
query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}

View File

@@ -93,8 +93,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
state_tracker(), scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
present_manager(render_window, device, memory_allocator, scheduler, swapchain),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
scheduler, screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
return;
}
// Update screen info if the framebuffer size has changed.
if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) {
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
}
screen_info.width = framebuffer->width;
screen_info.height = framebuffer->height;
const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false;
const auto recreate_swapchain = [&](u32 width, u32 height) {
if (!has_been_recreated) {
has_been_recreated = true;
scheduler.Finish();
}
swapchain.Create(width, height, is_srgb);
};
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
swapchain.GetHeight() != layout.height) {
recreate_swapchain(layout.width, layout.height);
}
bool is_outdated;
do {
swapchain.AcquireNextImage();
is_outdated = swapchain.IsOutDated();
if (is_outdated) {
recreate_swapchain(layout.width, layout.height);
}
} while (is_outdated);
if (has_been_recreated) {
blit_screen.Recreate();
}
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
scheduler.Flush(render_semaphore, present_semaphore);
scheduler.WaitWorker();
swapchain.Present(render_semaphore);
Frame* frame = present_manager.GetRenderFrame();
blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
scheduler.Flush(*frame->render_ready);
present_manager.Present(frame);
gpu.RendererFrameEndNotify();
rasterizer.TickFrame();
@@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
@@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,

View File

@@ -9,6 +9,7 @@
#include "common/dynamic_library.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -76,6 +77,7 @@ private:
StateTracker state_tracker;
Scheduler scheduler;
Swapchain swapchain;
PresentManager present_manager;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;

View File

@@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
const Device& device_, MemoryAllocator& memory_allocator_,
Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_)
Swapchain& swapchain_, PresentManager& present_manager_,
Scheduler& scheduler_, const ScreenInfo& screen_info_)
: cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_},
image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
resource_ticks.resize(image_count);
CreateStaticResources();
@@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
BlitScreen::~BlitScreen() = default;
void BlitScreen::Recreate() {
present_manager.WaitPresent();
scheduler.Finish();
device.GetLogical().WaitIdle();
CreateDynamicResources();
}
VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated) {
void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
VkExtent2D render_area, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
image_count = swapchain_images;
Recreate();
}
const std::size_t image_index = swapchain.GetImageIndex();
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
@@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
const u64 image_offset = GetRawImageOffset(framebuffer);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
.depth = 1,
},
};
scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[image_index];
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
const VkImage image = *raw_images[index];
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
UpdateAADescriptorSet(image_index, source_image_view, false);
UpdateAADescriptorSet(source_image_view, false);
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,
.height = (up_scale * framebuffer.height) >> down_shift,
};
scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) {
scheduler.Record([this, index = image_index, size,
anti_alias_pass](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier base_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
aa_descriptor_sets[image_index], {});
aa_descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
@@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
};
VkImageView fsr_image_view =
fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
UpdateDescriptorSet(image_index, fsr_image_view, true);
UpdateDescriptorSet(fsr_image_view, true);
} else {
const bool is_nn =
Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
UpdateDescriptorSet(image_index, source_image_view, is_nn);
UpdateDescriptorSet(source_image_view, is_nn);
}
scheduler.Record(
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
scheduler.Record([this, host_framebuffer, index = image_index,
size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
const VkClearValue clear_color{
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
};
const VkRenderPassBeginInfo renderpass_bi{
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.pNext = nullptr,
.renderPass = *renderpass,
.framebuffer = host_framebuffer,
.renderArea =
{
.offset = {0, 0},
.extent = size,
},
.clearValueCount = 1,
.pClearValues = &clear_color,
};
const VkViewport viewport{
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(size.width),
.height = static_cast<float>(size.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const VkRect2D scissor{
.offset = {0, 0},
.extent = size,
};
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
auto graphics_pipeline = [this]() {
switch (Settings::values.scaling_filter.GetValue()) {
case Settings::ScalingFilter::NearestNeighbor:
case Settings::ScalingFilter::Bilinear:
return *bilinear_pipeline;
case Settings::ScalingFilter::Bicubic:
return *bicubic_pipeline;
case Settings::ScalingFilter::Gaussian:
return *gaussian_pipeline;
case Settings::ScalingFilter::ScaleForce:
return *scaleforce_pipeline;
default:
return *bilinear_pipeline;
}
}();
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
return *semaphores[image_index];
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[index], {});
cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass();
});
}
VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb) {
// Recreate dynamic resources if the the image count or colorspace changed
if (const std::size_t swapchain_images = swapchain.GetImageCount();
swapchain_images != image_count || current_srgb != is_srgb) {
current_srgb = is_srgb;
image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
image_count = swapchain_images;
Recreate();
}
// Recreate the presentation frame if the dimensions of the window changed
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
if (layout.width != frame->width || layout.height != frame->height ||
is_srgb != frame->is_srgb) {
Recreate();
present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
image_view_format, *renderpass);
}
const VkExtent2D render_area{frame->width, frame->height};
Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
if (++image_index >= image_count) {
image_index = 0;
}
}
vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() {
}
void BlitScreen::CreateDynamicResources() {
CreateSemaphores();
CreateDescriptorPool();
CreateDescriptorSetLayout();
CreateDescriptorSets();
CreatePipelineLayout();
CreateRenderPass();
CreateFramebuffers();
CreateGraphicsPipeline();
fsr.reset();
smaa.reset();
@@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() {
}
}
void BlitScreen::CreateSemaphores() {
semaphores.resize(image_count);
std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
}
void BlitScreen::CreateDescriptorPool() {
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
{
@@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() {
}
void BlitScreen::CreateRenderPass() {
renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat());
renderpass = CreateRenderPassImpl(image_view_format);
}
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) {
vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
const VkAttachmentDescription color_attachment{
.flags = 0,
.format = format,
@@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
const VkAttachmentReference color_attachment_ref{
@@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() {
nn_sampler = device.GetLogical().CreateSampler(ci_nn);
}
void BlitScreen::CreateFramebuffers() {
const VkExtent2D size{swapchain.GetSize()};
framebuffers.resize(image_count);
for (std::size_t i = 0; i < image_count; ++i) {
const VkImageView image_view{swapchain.GetImageViewIndex(i)};
framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
}
}
void BlitScreen::ReleaseRawImages() {
for (const u64 tick : resource_ticks) {
scheduler.Wait(tick);
@@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
return;
}
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false);
aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
}
void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorImageInfo image_info{
.sampler = nn ? *nn_sampler : *sampler,
.imageView = image_view,
@@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
}
void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view,
bool nn) const {
void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = offsetof(BufferData, uniform),
@@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
}
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const {
u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
}

View File

@@ -5,6 +5,7 @@
#include <memory>
#include "core/frontend/framebuffer_layout.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -42,6 +43,9 @@ class RasterizerVulkan;
class Scheduler;
class SMAA;
class Swapchain;
class PresentManager;
struct Frame;
struct ScreenInfo {
VkImage image{};
@@ -55,18 +59,17 @@ class BlitScreen {
public:
explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
Scheduler& scheduler, const ScreenInfo& screen_info);
PresentManager& present_manager, Scheduler& scheduler,
const ScreenInfo& screen_info);
~BlitScreen();
void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated);
void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated, bool is_srgb);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
@@ -79,10 +82,9 @@ private:
void CreateStaticResources();
void CreateShaders();
void CreateSemaphores();
void CreateDescriptorPool();
void CreateRenderPass();
vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true);
vk::RenderPass CreateRenderPassImpl(VkFormat format);
void CreateDescriptorSetLayout();
void CreateDescriptorSets();
void CreatePipelineLayout();
@@ -90,15 +92,14 @@ private:
void CreateSampler();
void CreateDynamicResources();
void CreateFramebuffers();
void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
void ReleaseRawImages();
void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const;
void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
@@ -107,16 +108,17 @@ private:
void CreateFSR();
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Swapchain& swapchain;
PresentManager& present_manager;
Scheduler& scheduler;
std::size_t image_count;
std::size_t image_index{};
const ScreenInfo& screen_info;
vk::ShaderModule vertex_shader;
@@ -135,7 +137,6 @@ private:
vk::Pipeline gaussian_pipeline;
vk::Pipeline scaleforce_pipeline;
vk::RenderPass renderpass;
std::vector<vk::Framebuffer> framebuffers;
vk::DescriptorSets descriptor_sets;
vk::Sampler nn_sampler;
vk::Sampler sampler;
@@ -145,7 +146,6 @@ private:
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
@@ -164,6 +164,8 @@ private:
u32 raw_width = 0;
u32 raw_height = 0;
Service::android::PixelFormat pixel_format{};
bool current_srgb;
VkFormat image_view_format;
std::unique_ptr<FSR> fsr;
std::unique_ptr<SMAA> smaa;

View File

@@ -5,6 +5,7 @@
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_fence_manager.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"

View File

@@ -40,7 +40,16 @@ private:
};
using Fence = std::shared_ptr<InnerFence>;
using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;
struct FenceManagerParams {
using FenceType = Fence;
using BufferCacheType = BufferCache;
using TextureCacheType = TextureCache;
using QueryCacheType = QueryCache;
static constexpr bool HAS_ASYNC_CHECK = true;
};
using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
class FenceManager final : public GenericFenceManager {
public:

View File

@@ -0,0 +1,457 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_present_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
namespace {
bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
const VkFormatProperties props{physical_device.GetFormatProperties(format)};
return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
}
[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
return VkImageSubresourceLayers{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
};
}
[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
s32 swapchain_height) {
return VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = frame_width,
.y = frame_height,
.z = 1,
},
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffsets =
{
{
.x = 0,
.y = 0,
.z = 0,
},
{
.x = swapchain_width,
.y = swapchain_height,
.z = 1,
},
},
};
}
[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
u32 swapchain_height) {
return VkImageCopy{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffset =
{
.x = 0,
.y = 0,
.z = 0,
},
.extent =
{
.width = std::min(frame_width, swapchain_width),
.height = std::min(frame_height, swapchain_height),
.depth = 1,
},
};
}
} // Anonymous namespace
PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
Swapchain& swapchain_)
: render_window{render_window_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
use_present_thread{Settings::values.async_presentation.GetValue()},
image_count{swapchain.GetImageCount()} {
auto& dld = device.GetLogical();
cmdpool = dld.CreateCommandPool({
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.GetGraphicsFamily(),
});
auto cmdbuffers = cmdpool.Allocate(image_count);
frames.resize(image_count);
for (u32 i = 0; i < frames.size(); i++) {
Frame& frame = frames[i];
frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
frame.render_ready = dld.CreateSemaphore({
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
});
frame.present_done = dld.CreateFence({
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
});
free_queue.push(&frame);
}
if (use_present_thread) {
present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
}
}
PresentManager::~PresentManager() = default;
Frame* PresentManager::GetRenderFrame() {
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
// Wait for free presentation frames
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
// Take the frame from the queue
Frame* frame = free_queue.front();
free_queue.pop();
// Wait for the presentation to be finished so all frame resources are free
frame->present_done.Wait();
frame->present_done.Reset();
return frame;
}
void PresentManager::Present(Frame* frame) {
if (!use_present_thread) {
scheduler.WaitWorker();
CopyToSwapchain(frame);
free_queue.push(frame);
return;
}
scheduler.Record([this, frame](vk::CommandBuffer) {
std::unique_lock lock{queue_mutex};
present_queue.push(frame);
frame_cv.notify_one();
});
}
void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd) {
auto& dld = device.GetLogical();
frame->width = width;
frame->height = height;
frame->is_srgb = is_srgb;
frame->image = dld.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = swapchain.GetImageFormat(),
.extent =
{
.width = width,
.height = height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *frame->image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
const VkImageView image_view{*frame->image_view};
frame->framebuffer = dld.CreateFramebuffer({
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = rd,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = width,
.height = height,
.layers = 1,
});
}
void PresentManager::WaitPresent() {
if (!use_present_thread) {
return;
}
// Wait for the present queue to be empty
{
std::unique_lock queue_lock{queue_mutex};
frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
}
// The above condition will be satisfied when the last frame is taken from the queue.
// To ensure that frame has been presented as well take hold of the swapchain
// mutex.
std::scoped_lock swapchain_lock{swapchain_mutex};
}
void PresentManager::PresentThread(std::stop_token token) {
Common::SetCurrentThreadName("VulkanPresent");
while (!token.stop_requested()) {
std::unique_lock lock{queue_mutex};
// Wait for presentation frames
Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
if (token.stop_requested()) {
return;
}
// Take the frame and notify anyone waiting
Frame* frame = present_queue.front();
present_queue.pop();
frame_cv.notify_one();
// By exchanging the lock ownership we take the swapchain lock
// before the queue lock goes out of scope. This way the swapchain
// lock in WaitPresent is guaranteed to occur after here.
std::exchange(lock, std::unique_lock{swapchain_mutex});
CopyToSwapchain(frame);
// Free the frame for reuse
std::scoped_lock fl{free_mutex};
free_queue.push(frame);
free_cv.notify_one();
}
}
void PresentManager::CopyToSwapchain(Frame* frame) {
MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
const auto recreate_swapchain = [&] {
swapchain.Create(frame->width, frame->height, frame->is_srgb);
image_count = swapchain.GetImageCount();
};
// If the size or colorspace of the incoming frames has changed, recreate the swapchain
// to account for that.
const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
if (srgb_changed || size_changed) {
recreate_swapchain();
}
while (swapchain.AcquireNextImage()) {
recreate_swapchain();
}
const vk::CommandBuffer cmdbuf{frame->cmdbuf};
cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
const VkImage image{swapchain.CurrentImage()};
const VkExtent2D extent = swapchain.GetExtent();
const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const std::array post_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *frame->image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
{}, {}, pre_barriers);
if (blit_supported) {
cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
VK_FILTER_LINEAR);
} else {
cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
}
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
{}, {}, post_barriers);
cmdbuf.End();
const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
};
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = 2U,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = 1U,
.pSignalSemaphores = &render_semaphore,
};
// Submit the image copy/blit to the swapchain
{
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result =
device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
device.ReportLoss();
[[fallthrough]];
default:
vk::Check(result);
break;
}
}
// Present
swapchain.Present(render_semaphore);
}
} // namespace Vulkan

View File

@@ -0,0 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core::Frontend {
class EmuWindow;
} // namespace Core::Frontend
namespace Vulkan {
class Device;
class Scheduler;
class Swapchain;
struct Frame {
u32 width;
u32 height;
bool is_srgb;
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;
};
class PresentManager {
public:
PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
~PresentManager();
/// Returns the last used presentation frame
Frame* GetRenderFrame();
/// Pushes a frame for presentation
void Present(Frame* frame);
/// Recreates the present frame to match the provided parameters
void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
VkFormat image_view_format, VkRenderPass rd);
/// Waits for the present thread to finish presenting all queued frames.
void WaitPresent();
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
private:
Core::Frontend::EmuWindow& render_window;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Swapchain& swapchain;
vk::CommandPool cmdpool;
std::vector<Frame> frames;
std::queue<Frame*> present_queue;
std::queue<Frame*> free_queue;
std::condition_variable_any frame_cv;
std::condition_variable free_cv;
std::mutex swapchain_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool blit_supported;
bool use_present_thread;
std::size_t image_count;
};
} // namespace Vulkan

View File

@@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
}
}
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_)
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
: QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
@@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
const vk::Device* logical = &cache.GetDevice().GetLogical();
cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
logical->ResetQueryPool(query.first, query.second, 1);
cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
cmdbuf.BeginQuery(query.first, query.second,
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
}
@@ -112,8 +115,10 @@ void HostCounter::EndQuery() {
[query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
}
u64 HostCounter::BlockingQuery() const {
cache.GetScheduler().Wait(tick);
u64 HostCounter::BlockingQuery(bool async) const {
if (!async) {
cache.GetScheduler().Wait(tick);
}
u64 data;
const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
query.first, query.second, 1, sizeof(data), &data, sizeof(data),

View File

@@ -52,7 +52,8 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
Core::Memory::Memory& cpu_memory_, const Device& device_,
Scheduler& scheduler_);
~QueryCache();
@@ -83,7 +84,7 @@ public:
void EndQuery();
private:
u64 BlockingQuery() const override;
u64 BlockingQuery(bool async = false) const override;
QueryCache& cache;
const VideoCore::QueryType type;

View File

@@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler),
query_cache{*this, cpu_memory_, device, scheduler},
accelerate_dma(buffer_cache, texture_cache, scheduler),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache)) {
VideoCommon::CacheType::BufferCache |
VideoCommon::CacheType::QueryCache)) {
return true;
}
return false;

View File

@@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
Scheduler::~Scheduler() = default;
void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal_semaphore, wait_semaphore);
const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
AllocateNewContext();
return signal_value;
}
void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() {
});
}
void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
EndPendingOperations();
InvalidateState();
@@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
on_submit();
}
std::scoped_lock lock{submit_mutex};
switch (const VkResult result = master_semaphore->SubmitQueue(
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
case VK_SUCCESS:
@@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
});
chunk->MarkSubmit();
DispatchWork();
return signal_value;
}
void Scheduler::AllocateNewContext() {

View File

@@ -34,7 +34,7 @@ public:
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -106,6 +106,8 @@ public:
return *master_semaphore;
}
std::mutex submit_mutex;
private:
class Command {
public:
@@ -201,7 +203,7 @@ private:
void AllocateWorkerCommandBuffer();
void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
void AllocateNewContext();

View File

@@ -99,18 +99,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
return;
}
device.GetLogical().WaitIdle();
Destroy();
CreateSwapchain(capabilities, srgb);
CreateSemaphores();
CreateImageViews();
resource_ticks.clear();
resource_ticks.resize(image_count);
}
void Swapchain::AcquireNextImage() {
bool Swapchain::AcquireNextImage() {
const VkResult result = device.GetLogical().AcquireNextImageKHR(
*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
VK_NULL_HANDLE, &image_index);
@@ -127,8 +125,11 @@ void Swapchain::AcquireNextImage() {
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
return is_suboptimal || is_outdated;
}
void Swapchain::Present(VkSemaphore render_semaphore) {
@@ -143,6 +144,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
.pImageIndices = &image_index,
.pResults = nullptr,
};
std::scoped_lock lock{scheduler.submit_mutex};
switch (const VkResult result = present_queue.Present(present_info)) {
case VK_SUCCESS:
break;
@@ -168,7 +170,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
surface_format = ChooseSwapSurfaceFormat(formats);
present_mode = ChooseSwapPresentMode(present_modes);
u32 requested_image_count{capabilities.minImageCount + 1};
@@ -193,7 +195,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
.imageColorSpace = surface_format.colorSpace,
.imageExtent = {},
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
@@ -241,45 +243,14 @@ void Swapchain::CreateSemaphores() {
present_semaphores.resize(image_count);
std::ranges::generate(present_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::CreateImageViews() {
VkImageViewCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = {},
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_view_format,
.components =
{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange =
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
image_views.resize(image_count);
for (std::size_t i = 0; i < image_count; i++) {
ci.image = images[i];
image_views[i] = device.GetLogical().CreateImageView(ci);
}
render_semaphores.resize(image_count);
std::ranges::generate(render_semaphores,
[this] { return device.GetLogical().CreateSemaphore(); });
}
void Swapchain::Destroy() {
frame_index = 0;
present_semaphores.clear();
framebuffers.clear();
image_views.clear();
swapchain.reset();
}

View File

@@ -27,7 +27,7 @@ public:
void Create(u32 width, u32 height, bool srgb);
/// Acquires the next image in the swapchain, waits as needed.
void AcquireNextImage();
bool AcquireNextImage();
/// Presents the rendered image to the swapchain.
void Present(VkSemaphore render_semaphore);
@@ -52,6 +52,11 @@ public:
return is_suboptimal;
}
/// Returns true when the swapchain format is in the srgb color space
bool IsSrgb() const {
return current_srgb;
}
VkExtent2D GetSize() const {
return extent;
}
@@ -64,22 +69,34 @@ public:
return image_index;
}
std::size_t GetFrameIndex() const {
return frame_index;
}
VkImage GetImageIndex(std::size_t index) const {
return images[index];
}
VkImageView GetImageViewIndex(std::size_t index) const {
return *image_views[index];
VkImage CurrentImage() const {
return images[image_index];
}
VkFormat GetImageViewFormat() const {
return image_view_format;
}
VkFormat GetImageFormat() const {
return surface_format.format;
}
VkSemaphore CurrentPresentSemaphore() const {
return *present_semaphores[frame_index];
}
VkSemaphore CurrentRenderSemaphore() const {
return *render_semaphores[frame_index];
}
u32 GetWidth() const {
return width;
}
@@ -88,6 +105,10 @@ public:
return height;
}
VkExtent2D GetExtent() const {
return extent;
}
private:
void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
void CreateSemaphores();
@@ -107,10 +128,9 @@ private:
std::size_t image_count{};
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
std::vector<vk::Semaphore> render_semaphores;
u32 width;
u32 height;
@@ -121,6 +141,7 @@ private:
VkFormat image_view_format{};
VkExtent2D extent{};
VkPresentModeKHR present_mode{};
VkSurfaceFormatKHR surface_format{};
bool current_srgb{};
bool current_fps_unlocked{};

View File

@@ -14,13 +14,18 @@ namespace Vulkan {
UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
: device{device_}, scheduler{scheduler_} {
payload_start = payload.data();
payload_cursor = payload.data();
}
UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload.data();
if (++frame_index >= FRAMES_IN_FLIGHT) {
frame_index = 0;
}
payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
@@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() {
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload_cursor = payload.data();
payload_cursor = payload_start;
}
upload_start = payload_cursor;
}

View File

@@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
};
class UpdateDescriptorQueue final {
// This should be plenty for the vast majority of cases. Most desktop platforms only
// provide up to 3 swapchain images.
static constexpr size_t FRAMES_IN_FLIGHT = 5;
static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000;
static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
public:
explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
~UpdateDescriptorQueue();
@@ -73,9 +79,11 @@ private:
const Device& device;
Scheduler& scheduler;
size_t frame_index{0};
DescriptorUpdateEntry* payload_cursor = nullptr;
DescriptorUpdateEntry* payload_start = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
std::array<DescriptorUpdateEntry, 0x10000> payload;
std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
};
} // namespace Vulkan

View File

@@ -888,7 +888,7 @@ void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* i
buffer,
download_map.buffer,
};
std::array buffer_offsets{
std::array<u64, 2> buffer_offsets{
buffer_offset,
download_map.offset,
};

View File

@@ -617,7 +617,9 @@ bool Device::ShouldBoostClocks() const {
const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
return validated_driver && !is_steam_deck;
const bool is_debugging = this->HasDebuggingToolAttached();
return validated_driver && !is_steam_deck && !is_debugging;
}
bool Device::GetSuitability(bool requires_swapchain) {

View File

@@ -497,7 +497,7 @@ void Config::ReadCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
ReadGlobalSetting(Settings::values.use_multi_core);
ReadGlobalSetting(Settings::values.use_extended_memory_layout);
ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
qt_config->endGroup();
}
@@ -692,6 +692,7 @@ void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
ReadGlobalSetting(Settings::values.renderer_backend);
ReadGlobalSetting(Settings::values.async_presentation);
ReadGlobalSetting(Settings::values.renderer_force_max_clock);
ReadGlobalSetting(Settings::values.vulkan_device);
ReadGlobalSetting(Settings::values.fullscreen_mode);
@@ -1161,7 +1162,7 @@ void Config::SaveCoreValues() {
qt_config->beginGroup(QStringLiteral("Core"));
WriteGlobalSetting(Settings::values.use_multi_core);
WriteGlobalSetting(Settings::values.use_extended_memory_layout);
WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
qt_config->endGroup();
}
@@ -1313,6 +1314,7 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
Settings::values.renderer_backend.UsingGlobal());
WriteGlobalSetting(Settings::values.async_presentation);
WriteGlobalSetting(Settings::values.renderer_force_max_clock);
WriteGlobalSetting(Settings::values.vulkan_device);
WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),

View File

@@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() {
ui->use_multi_core->setEnabled(runtime_lock);
ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue());
ui->use_extended_memory_layout->setEnabled(runtime_lock);
ui->use_extended_memory_layout->setChecked(
Settings::values.use_extended_memory_layout.GetValue());
ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue());
ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue());
@@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() {
void ConfigureGeneral::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core,
use_multi_core);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout,
ui->use_extended_memory_layout,
use_extended_memory_layout);
if (Settings::IsConfiguringGlobal()) {
UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
@@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() {
Settings::values.use_speed_limit, use_speed_limit);
ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core,
use_multi_core);
ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout,
Settings::values.use_extended_memory_layout,
use_extended_memory_layout);
connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() {
ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() &&

View File

@@ -47,7 +47,6 @@ private:
ConfigurationShared::CheckState use_speed_limit;
ConfigurationShared::CheckState use_multi_core;
ConfigurationShared::CheckState use_extended_memory_layout;
const Core::System& system;
};

View File

@@ -61,13 +61,6 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_extended_memory_layout">
<property name="text">
<string>Extended memory layout (8GB DRAM)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="toggle_check_exit">
<property name="text">

View File

@@ -22,11 +22,13 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !system.IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
ui->async_present->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
ui->async_astc->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->async_present->setChecked(Settings::values.async_presentation.GetValue());
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
@@ -54,6 +56,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
ui->async_present, async_present);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
ui->renderer_force_max_clock,
renderer_force_max_clock);
@@ -90,6 +94,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
// Disable if not global (only happens during game)
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal());
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
@@ -107,6 +112,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
return;
}
ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation,
async_present);
ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);

View File

@@ -36,6 +36,7 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState async_present;
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
ConfigurationShared::CheckState async_astc;

View File

@@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>404</width>
<height>321</height>
<height>376</height>
</rect>
</property>
<property name="windowTitle">
@@ -69,6 +69,13 @@
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="async_present">
<property name="text">
<string>Enable asynchronous presentation (Vulkan only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="renderer_force_max_clock">
<property name="toolTip">
@@ -112,7 +119,7 @@
<item>
<widget class="QCheckBox" name="use_fast_gpu_time">
<property name="toolTip">
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
</property>
<property name="text">
<string>Use Fast GPU Time (Hack)</string>
@@ -122,7 +129,7 @@
<item>
<widget class="QCheckBox" name="use_pessimistic_flushes">
<property name="toolTip">
<string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
<string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
</property>
<property name="text">
<string>Use pessimistic buffer flushes (Hack)</string>
@@ -132,7 +139,7 @@
<item>
<widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
<property name="toolTip">
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
<string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
</property>
<property name="text">
<string>Use Vulkan pipeline cache</string>

View File

@@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() {
ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time));
ui->device_name_edit->setText(
QString::fromUtf8(Settings::values.device_name.GetValue().c_str()));
ui->use_unsafe_extended_memory_layout->setEnabled(enabled);
ui->use_unsafe_extended_memory_layout->setChecked(
Settings::values.use_unsafe_extended_memory_layout.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue());
@@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index,
ui->combo_time_zone);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout,
ui->use_unsafe_extended_memory_layout,
use_unsafe_extended_memory_layout);
if (Settings::IsConfiguringGlobal()) {
// Guard if during game and set to game-specific value
@@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() {
Settings::values.rng_seed.GetValue().has_value(),
Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed);
ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout,
Settings::values.use_unsafe_extended_memory_layout,
use_unsafe_extended_memory_layout);
ui->custom_rtc_checkbox->setVisible(false);
ui->custom_rtc_edit->setVisible(false);
}

View File

@@ -41,6 +41,7 @@ private:
bool enabled = false;
ConfigurationShared::CheckState use_rng_seed;
ConfigurationShared::CheckState use_unsafe_extended_memory_layout;
Core::System& system;
};

View File

@@ -478,6 +478,13 @@
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="use_unsafe_extended_memory_layout">
<property name="text">
<string>Unsafe extended memory layout (8GB DRAM)</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>

View File

@@ -274,7 +274,7 @@ void Config::ReadValues() {
// Core
ReadSetting("Core", Settings::values.use_multi_core);
ReadSetting("Core", Settings::values.use_extended_memory_layout);
ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout);
// Cpu
ReadSetting("Cpu", Settings::values.cpu_accuracy);
@@ -300,6 +300,7 @@ void Config::ReadValues() {
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.async_presentation);
ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);

View File

@@ -163,9 +163,9 @@ keyboard_enabled =
# 0: Disabled, 1 (default): Enabled
use_multi_core =
# Enable extended guest system memory layout (8GB DRAM)
# Enable unsafe extended guest system memory layout (8GB DRAM)
# 0 (default): Disabled, 1: Enabled
use_extended_memory_layout =
use_unsafe_extended_memory_layout =
[Cpu]
# Adjusts various optimizations.
@@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor =
# 0: OpenGL, 1 (default): Vulkan
backend =
# Whether to enable asynchronous presentation (Vulkan only)
# 0 (default): Off, 1: On
async_presentation =
# Enable graphics API debugging mode.
# 0 (default): Disabled, 1: Enabled
debug =