Compare commits
57 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2a2a556b9 | ||
|
|
80436c1330 | ||
|
|
319c4d2108 | ||
|
|
6888d776ff | ||
|
|
2effdeb924 | ||
|
|
dc96a59fa0 | ||
|
|
b392a5986e | ||
|
|
3142f1b597 | ||
|
|
9c548146ca | ||
|
|
5be00cba15 | ||
|
|
ee9b4a7f9a | ||
|
|
5aeff9aff5 | ||
|
|
322d6a0311 | ||
|
|
5b01f80a12 | ||
|
|
ceb851b590 | ||
|
|
85bb6a6f08 | ||
|
|
984563b773 | ||
|
|
8306703a7d | ||
|
|
09908207fb | ||
|
|
89fc75d769 | ||
|
|
56e450a3f7 | ||
|
|
6fe51f398f | ||
|
|
cd0a7dfdbc | ||
|
|
3dd6b55851 | ||
|
|
64c5631579 | ||
|
|
6e347d8d1b | ||
|
|
624a0f7f3f | ||
|
|
c332c66eb2 | ||
|
|
0d6d8129c4 | ||
|
|
ae0e481677 | ||
|
|
1fe7df4517 | ||
|
|
0986caa8d8 | ||
|
|
028b2718ed | ||
|
|
b3371ed09e | ||
|
|
7bd447355f | ||
|
|
4cbb363d3f | ||
|
|
287d5921cf | ||
|
|
cb9dd01ffd | ||
|
|
f2c61bbe13 | ||
|
|
f846e3d6d0 | ||
|
|
8a76f816a4 | ||
|
|
5b989f189f | ||
|
|
3813af2f3c | ||
|
|
c83bf7cd1e | ||
|
|
a5bb1ac6e3 | ||
|
|
5619d24377 | ||
|
|
4af569ee47 | ||
|
|
91d35559e5 | ||
|
|
38d3a48873 | ||
|
|
cf27b59493 | ||
|
|
da0aa4da6b | ||
|
|
e09c1fbc1f | ||
|
|
844e4a297b | ||
|
|
a87c85eba2 | ||
|
|
3d2c44848b | ||
|
|
3d9fff82c0 | ||
|
|
3c95e49c42 |
2
externals/dynarmic
vendored
2
externals/dynarmic
vendored
Submodule externals/dynarmic updated: 087a74417a...f6ae9e1c33
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
|
||||
config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
|
||||
config.page_table_address_space_bits = address_space_bits;
|
||||
config.silently_mirror_page_table = false;
|
||||
config.absolute_offset_page_table = true;
|
||||
|
||||
// Multi-process state
|
||||
config.processor_id = core_index;
|
||||
|
||||
@@ -46,7 +46,6 @@
|
||||
#include "core/settings.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "core/tools/freezer.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
@@ -341,7 +340,6 @@ struct System::Impl {
|
||||
std::unique_ptr<Loader::AppLoader> app_loader;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
std::unique_ptr<Tegra::GPU> gpu_core;
|
||||
std::shared_ptr<Tegra::DebugContext> debug_context;
|
||||
std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
|
||||
Memory::Memory memory;
|
||||
CpuCoreManager cpu_core_manager;
|
||||
@@ -580,14 +578,6 @@ Loader::AppLoader& System::GetAppLoader() const {
|
||||
return *impl->app_loader;
|
||||
}
|
||||
|
||||
void System::SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
|
||||
impl->debug_context = std::move(context);
|
||||
}
|
||||
|
||||
Tegra::DebugContext* System::GetGPUDebugContext() const {
|
||||
return impl->debug_context.get();
|
||||
}
|
||||
|
||||
void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
|
||||
impl->virtual_filesystem = std::move(vfs);
|
||||
}
|
||||
|
||||
@@ -307,10 +307,6 @@ public:
|
||||
Service::SM::ServiceManager& ServiceManager();
|
||||
const Service::SM::ServiceManager& ServiceManager() const;
|
||||
|
||||
void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context);
|
||||
|
||||
Tegra::DebugContext* GetGPUDebugContext() const;
|
||||
|
||||
void SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs);
|
||||
|
||||
std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "core/hle/kernel/writable_event.h"
|
||||
#include "core/hle/service/nifm/nifm.h"
|
||||
#include "core/hle/service/service.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace Service::NIFM {
|
||||
|
||||
@@ -86,7 +87,12 @@ private:
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushEnum(RequestState::Connected);
|
||||
|
||||
if (Settings::values.bcat_backend == "none") {
|
||||
rb.PushEnum(RequestState::NotSubmitted);
|
||||
} else {
|
||||
rb.PushEnum(RequestState::Connected);
|
||||
}
|
||||
}
|
||||
|
||||
void GetResult(Kernel::HLERequestContext& ctx) {
|
||||
@@ -194,14 +200,22 @@ private:
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u8>(1);
|
||||
if (Settings::values.bcat_backend == "none") {
|
||||
rb.Push<u8>(0);
|
||||
} else {
|
||||
rb.Push<u8>(1);
|
||||
}
|
||||
}
|
||||
void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
|
||||
LOG_WARNING(Service_NIFM, "(STUBBED) called");
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u8>(1);
|
||||
if (Settings::values.bcat_backend == "none") {
|
||||
rb.Push<u8>(0);
|
||||
} else {
|
||||
rb.Push<u8>(1);
|
||||
}
|
||||
}
|
||||
Core::System& system;
|
||||
};
|
||||
|
||||
@@ -104,10 +104,12 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
|
||||
|
||||
ASSERT(object->status == nvmap::Object::Status::Allocated);
|
||||
|
||||
u64 size = static_cast<u64>(entry.pages) << 0x10;
|
||||
const u64 size = static_cast<u64>(entry.pages) << 0x10;
|
||||
ASSERT(size <= object->size);
|
||||
const u64 map_offset = static_cast<u64>(entry.map_offset) << 0x10;
|
||||
|
||||
GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
|
||||
const GPUVAddr returned =
|
||||
gpu.MemoryManager().MapBufferEx(object->addr + map_offset, offset, size);
|
||||
ASSERT(returned == offset);
|
||||
}
|
||||
std::memcpy(output.data(), entries.data(), output.size());
|
||||
|
||||
@@ -62,7 +62,7 @@ private:
|
||||
u16_le flags;
|
||||
u16_le kind;
|
||||
u32_le nvmap_handle;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
u32_le map_offset;
|
||||
u32_le offset;
|
||||
u32_le pages;
|
||||
};
|
||||
|
||||
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
|
||||
return layer_id;
|
||||
}
|
||||
|
||||
void NVFlinger::CloseLayer(u64 layer_id) {
|
||||
for (auto& display : displays) {
|
||||
display.CloseLayer(layer_id);
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
|
||||
const auto* const layer = FindLayer(display_id, layer_id);
|
||||
|
||||
@@ -192,7 +198,7 @@ void NVFlinger::Compose() {
|
||||
|
||||
const auto& igbp_buffer = buffer->get().igbp_buffer;
|
||||
|
||||
const auto& gpu = system.GPU();
|
||||
auto& gpu = system.GPU();
|
||||
const auto& multi_fence = buffer->get().multi_fence;
|
||||
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
|
||||
const auto& fence = multi_fence.fences[fence_id];
|
||||
|
||||
@@ -54,6 +54,9 @@ public:
|
||||
/// If an invalid display ID is specified, then an empty optional is returned.
|
||||
std::optional<u64> CreateLayer(u64 display_id);
|
||||
|
||||
/// Closes a layer on all displays for the given layer ID.
|
||||
void CloseLayer(u64 layer_id);
|
||||
|
||||
/// Finds the buffer queue ID of the specified layer in the specified display.
|
||||
///
|
||||
/// If an invalid display ID or layer ID is provided, then an empty optional is returned.
|
||||
|
||||
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
|
||||
Display::~Display() = default;
|
||||
|
||||
Layer& Display::GetLayer(std::size_t index) {
|
||||
return layers.at(index);
|
||||
return *layers.at(index);
|
||||
}
|
||||
|
||||
const Layer& Display::GetLayer(std::size_t index) const {
|
||||
return layers.at(index);
|
||||
return *layers.at(index);
|
||||
}
|
||||
|
||||
std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
|
||||
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
|
||||
// TODO(Subv): Support more than 1 layer.
|
||||
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
|
||||
|
||||
layers.emplace_back(id, buffer_queue);
|
||||
layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
|
||||
}
|
||||
|
||||
void Display::CloseLayer(u64 id) {
|
||||
layers.erase(
|
||||
std::remove_if(layers.begin(), layers.end(),
|
||||
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
|
||||
layers.end());
|
||||
}
|
||||
|
||||
Layer* Display::FindLayer(u64 id) {
|
||||
const auto itr = std::find_if(layers.begin(), layers.end(),
|
||||
[id](const VI::Layer& layer) { return layer.GetID() == id; });
|
||||
const auto itr =
|
||||
std::find_if(layers.begin(), layers.end(),
|
||||
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
|
||||
|
||||
if (itr == layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
return itr->get();
|
||||
}
|
||||
|
||||
const Layer* Display::FindLayer(u64 id) const {
|
||||
const auto itr = std::find_if(layers.begin(), layers.end(),
|
||||
[id](const VI::Layer& layer) { return layer.GetID() == id; });
|
||||
const auto itr =
|
||||
std::find_if(layers.begin(), layers.end(),
|
||||
[id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });
|
||||
|
||||
if (itr == layers.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &*itr;
|
||||
return itr->get();
|
||||
}
|
||||
|
||||
} // namespace Service::VI
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@@ -69,6 +70,12 @@ public:
|
||||
///
|
||||
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
|
||||
|
||||
/// Closes and removes a layer from this display with the given ID.
|
||||
///
|
||||
/// @param id The ID assigned to the layer to close.
|
||||
///
|
||||
void CloseLayer(u64 id);
|
||||
|
||||
/// Attempts to find a layer with the given ID.
|
||||
///
|
||||
/// @param id The layer ID.
|
||||
@@ -91,7 +98,7 @@ private:
|
||||
u64 id;
|
||||
std::string name;
|
||||
|
||||
std::vector<Layer> layers;
|
||||
std::vector<std::shared_ptr<Layer>> layers;
|
||||
Kernel::EventPair vsync_event;
|
||||
};
|
||||
|
||||
|
||||
@@ -1066,6 +1066,18 @@ private:
|
||||
rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
|
||||
}
|
||||
|
||||
void CloseLayer(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
const auto layer_id{rp.Pop<u64>()};
|
||||
|
||||
LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
|
||||
|
||||
nv_flinger->CloseLayer(layer_id);
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
}
|
||||
|
||||
void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
const u32 flags = rp.Pop<u32>();
|
||||
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
|
||||
{1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
|
||||
{1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
|
||||
{2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
|
||||
{2021, nullptr, "CloseLayer"},
|
||||
{2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
|
||||
{2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
|
||||
{2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
|
||||
{2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
|
||||
|
||||
@@ -146,7 +146,7 @@ struct Memory::Impl {
|
||||
u8* GetPointer(const VAddr vaddr) {
|
||||
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
|
||||
if (page_pointer != nullptr) {
|
||||
return page_pointer + (vaddr & PAGE_MASK);
|
||||
return page_pointer + vaddr;
|
||||
}
|
||||
|
||||
if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
|
||||
@@ -229,7 +229,8 @@ struct Memory::Impl {
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
|
||||
const u8* const src_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
std::memcpy(dest_buffer, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -276,7 +277,8 @@ struct Memory::Impl {
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
|
||||
u8* const dest_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
std::memcpy(dest_ptr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -322,7 +324,8 @@ struct Memory::Impl {
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
|
||||
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
|
||||
u8* dest_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
std::memset(dest_ptr, 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -368,7 +371,8 @@ struct Memory::Impl {
|
||||
}
|
||||
case Common::PageType::Memory: {
|
||||
DEBUG_ASSERT(page_table.pointers[page_index]);
|
||||
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
|
||||
const u8* src_ptr =
|
||||
page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
|
||||
WriteBlock(process, dest_addr, src_ptr, copy_amount);
|
||||
break;
|
||||
}
|
||||
@@ -446,7 +450,8 @@ struct Memory::Impl {
|
||||
page_type = Common::PageType::Unmapped;
|
||||
} else {
|
||||
page_type = Common::PageType::Memory;
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS] =
|
||||
pointer - (vaddr & ~PAGE_MASK);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -493,7 +498,9 @@ struct Memory::Impl {
|
||||
memory);
|
||||
} else {
|
||||
while (base != end) {
|
||||
page_table.pointers[base] = memory;
|
||||
page_table.pointers[base] = memory - (base << PAGE_BITS);
|
||||
ASSERT_MSG(page_table.pointers[base],
|
||||
"memory mapping base yield a nullptr within the table");
|
||||
|
||||
base += 1;
|
||||
memory += PAGE_SIZE;
|
||||
@@ -518,7 +525,7 @@ struct Memory::Impl {
|
||||
if (page_pointer != nullptr) {
|
||||
// NOTE: Avoid adding any extra logic to this fast-path block
|
||||
T value;
|
||||
std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
|
||||
std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
|
||||
return value;
|
||||
}
|
||||
|
||||
@@ -559,7 +566,7 @@ struct Memory::Impl {
|
||||
u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
|
||||
if (page_pointer != nullptr) {
|
||||
// NOTE: Avoid adding any extra logic to this fast-path block
|
||||
std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
|
||||
std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ add_library(video_core STATIC
|
||||
buffer_cache/map_interval.h
|
||||
dma_pusher.cpp
|
||||
dma_pusher.h
|
||||
debug_utils/debug_utils.cpp
|
||||
debug_utils/debug_utils.h
|
||||
engines/const_buffer_engine_interface.h
|
||||
engines/const_buffer_info.h
|
||||
engines/engine_upload.cpp
|
||||
@@ -157,10 +155,23 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/maxwell_to_vk.h
|
||||
renderer_vulkan/vk_buffer_cache.cpp
|
||||
renderer_vulkan/vk_buffer_cache.h
|
||||
renderer_vulkan/vk_compute_pipeline.cpp
|
||||
renderer_vulkan/vk_compute_pipeline.h
|
||||
renderer_vulkan/vk_descriptor_pool.cpp
|
||||
renderer_vulkan/vk_descriptor_pool.h
|
||||
renderer_vulkan/vk_device.cpp
|
||||
renderer_vulkan/vk_device.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_image.cpp
|
||||
renderer_vulkan/vk_image.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
renderer_vulkan/vk_memory_manager.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_resource_manager.cpp
|
||||
renderer_vulkan/vk_resource_manager.h
|
||||
renderer_vulkan/vk_sampler_cache.cpp
|
||||
@@ -169,10 +180,14 @@ if (ENABLE_VULKAN)
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_shader_decompiler.cpp
|
||||
renderer_vulkan/vk_shader_decompiler.h
|
||||
renderer_vulkan/vk_staging_buffer_pool.cpp
|
||||
renderer_vulkan/vk_staging_buffer_pool.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h)
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h)
|
||||
|
||||
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
|
||||
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
void DebugContext::DoOnEvent(Event event, void* data) {
|
||||
{
|
||||
std::unique_lock lock{breakpoint_mutex};
|
||||
|
||||
// TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
|
||||
// show on debug widgets
|
||||
|
||||
// TODO: Should stop the CPU thread here once we multithread emulation.
|
||||
|
||||
active_breakpoint = event;
|
||||
at_breakpoint = true;
|
||||
|
||||
// Tell all observers that we hit a breakpoint
|
||||
for (auto& breakpoint_observer : breakpoint_observers) {
|
||||
breakpoint_observer->OnMaxwellBreakPointHit(event, data);
|
||||
}
|
||||
|
||||
// Wait until another thread tells us to Resume()
|
||||
resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
|
||||
}
|
||||
}
|
||||
|
||||
void DebugContext::Resume() {
|
||||
{
|
||||
std::lock_guard lock{breakpoint_mutex};
|
||||
|
||||
// Tell all observers that we are about to resume
|
||||
for (auto& breakpoint_observer : breakpoint_observers) {
|
||||
breakpoint_observer->OnMaxwellResume();
|
||||
}
|
||||
|
||||
// Resume the waiting thread (i.e. OnEvent())
|
||||
at_breakpoint = false;
|
||||
}
|
||||
|
||||
resume_from_breakpoint.notify_one();
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
@@ -1,157 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class DebugContext {
|
||||
public:
|
||||
enum class Event {
|
||||
FirstEvent = 0,
|
||||
|
||||
MaxwellCommandLoaded = FirstEvent,
|
||||
MaxwellCommandProcessed,
|
||||
IncomingPrimitiveBatch,
|
||||
FinishedPrimitiveBatch,
|
||||
|
||||
NumEvents
|
||||
};
|
||||
|
||||
/**
|
||||
* Inherit from this class to be notified of events registered to some debug context.
|
||||
* Most importantly this is used for our debugger GUI.
|
||||
*
|
||||
* To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
|
||||
* @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
|
||||
* access
|
||||
* @todo Evaluate an alternative interface, in which there is only one managing observer and
|
||||
* multiple child observers running (by design) on the same thread.
|
||||
*/
|
||||
class BreakPointObserver {
|
||||
public:
|
||||
/// Constructs the object such that it observes events of the given DebugContext.
|
||||
explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
|
||||
: context_weak(debug_context) {
|
||||
std::unique_lock lock{debug_context->breakpoint_mutex};
|
||||
debug_context->breakpoint_observers.push_back(this);
|
||||
}
|
||||
|
||||
virtual ~BreakPointObserver() {
|
||||
auto context = context_weak.lock();
|
||||
if (context) {
|
||||
{
|
||||
std::unique_lock lock{context->breakpoint_mutex};
|
||||
context->breakpoint_observers.remove(this);
|
||||
}
|
||||
|
||||
// If we are the last observer to be destroyed, tell the debugger context that
|
||||
// it is free to continue. In particular, this is required for a proper yuzu
|
||||
// shutdown, when the emulation thread is waiting at a breakpoint.
|
||||
if (context->breakpoint_observers.empty())
|
||||
context->Resume();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Action to perform when a breakpoint was reached.
|
||||
* @param event Type of event which triggered the breakpoint
|
||||
* @param data Optional data pointer (if unused, this is a nullptr)
|
||||
* @note This function will perform nothing unless it is overridden in the child class.
|
||||
*/
|
||||
virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
|
||||
|
||||
/**
|
||||
* Action to perform when emulation is resumed from a breakpoint.
|
||||
* @note This function will perform nothing unless it is overridden in the child class.
|
||||
*/
|
||||
virtual void OnMaxwellResume() {}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Weak context pointer. This need not be valid, so when requesting a shared_ptr via
|
||||
* context_weak.lock(), always compare the result against nullptr.
|
||||
*/
|
||||
std::weak_ptr<DebugContext> context_weak;
|
||||
};
|
||||
|
||||
/**
|
||||
* Simple structure defining a breakpoint state
|
||||
*/
|
||||
struct BreakPoint {
|
||||
bool enabled = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Static constructor used to create a shared_ptr of a DebugContext.
|
||||
*/
|
||||
static std::shared_ptr<DebugContext> Construct() {
|
||||
return std::shared_ptr<DebugContext>(new DebugContext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by the emulation core when a given event has happened. If a breakpoint has been set
|
||||
* for this event, OnEvent calls the event handlers of the registered breakpoint observers.
|
||||
* The current thread then is halted until Resume() is called from another thread (or until
|
||||
* emulation is stopped).
|
||||
* @param event Event which has happened
|
||||
* @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
|
||||
* Resume() is called.
|
||||
*/
|
||||
void OnEvent(Event event, void* data) {
|
||||
// This check is left in the header to allow the compiler to inline it.
|
||||
if (!breakpoints[(int)event].enabled)
|
||||
return;
|
||||
// For the rest of event handling, call a separate function.
|
||||
DoOnEvent(event, data);
|
||||
}
|
||||
|
||||
void DoOnEvent(Event event, void* data);
|
||||
|
||||
/**
|
||||
* Resume from the current breakpoint.
|
||||
* @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
|
||||
* Calling from any other thread is safe.
|
||||
*/
|
||||
void Resume();
|
||||
|
||||
/**
|
||||
* Delete all set breakpoints and resume emulation.
|
||||
*/
|
||||
void ClearBreakpoints() {
|
||||
for (auto& bp : breakpoints) {
|
||||
bp.enabled = false;
|
||||
}
|
||||
Resume();
|
||||
}
|
||||
|
||||
// TODO: Evaluate if access to these members should be hidden behind a public interface.
|
||||
std::array<BreakPoint, static_cast<int>(Event::NumEvents)> breakpoints;
|
||||
Event active_breakpoint{};
|
||||
bool at_breakpoint = false;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Private default constructor to make sure people always construct this through Construct()
|
||||
* instead.
|
||||
*/
|
||||
DebugContext() = default;
|
||||
|
||||
/// Mutex protecting current breakpoint state and the observer list.
|
||||
std::mutex breakpoint_mutex;
|
||||
|
||||
/// Used by OnEvent to wait for resumption.
|
||||
std::condition_variable resume_from_breakpoint;
|
||||
|
||||
/// List of registered observers
|
||||
std::list<BreakPointObserver*> breakpoint_observers;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
@@ -7,7 +7,6 @@
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
@@ -88,11 +87,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
|
||||
color_mask.A.Assign(1);
|
||||
}
|
||||
|
||||
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
|
||||
// NVN games expect these values to be enabled at boot
|
||||
regs.rasterize_enable = 1;
|
||||
regs.rt_separate_frag_data = 1;
|
||||
|
||||
// Some games (like Super Mario Odyssey) assume that SRGB is enabled.
|
||||
regs.framebuffer_srgb = 1;
|
||||
|
||||
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
|
||||
mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
|
||||
mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
|
||||
@@ -273,8 +272,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
|
||||
}
|
||||
|
||||
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
auto debug_context = system.GetGPUDebugContext();
|
||||
|
||||
const u32 method = method_call.method;
|
||||
|
||||
if (method == cb_data_state.current) {
|
||||
@@ -315,10 +312,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Maxwell3D register, increase the size of the Regs structure");
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
|
||||
}
|
||||
|
||||
if (regs.reg_array[method] != method_call.argument) {
|
||||
regs.reg_array[method] = method_call.argument;
|
||||
const std::size_t dirty_reg = dirty_pointers[method];
|
||||
@@ -424,10 +417,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
|
||||
@@ -485,12 +474,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
|
||||
|
||||
auto debug_context = system.GetGPUDebugContext();
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
|
||||
}
|
||||
|
||||
// Both instance configuration registers can not be set at the same time.
|
||||
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
|
||||
"Illegal combination of instancing parameters");
|
||||
@@ -500,10 +483,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
|
||||
rasterizer.DrawMultiBatch(is_indexed);
|
||||
}
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
|
||||
}
|
||||
|
||||
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
|
||||
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
|
||||
// it's possible that it is incorrect and that there is some other register used to specify the
|
||||
@@ -650,12 +629,6 @@ void Maxwell3D::DrawArrays() {
|
||||
regs.vertex_buffer.count);
|
||||
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
|
||||
|
||||
auto debug_context = system.GetGPUDebugContext();
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
|
||||
}
|
||||
|
||||
// Both instance configuration registers can not be set at the same time.
|
||||
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
|
||||
"Illegal combination of instancing parameters");
|
||||
@@ -673,10 +646,6 @@ void Maxwell3D::DrawArrays() {
|
||||
rasterizer.DrawBatch(is_indexed);
|
||||
}
|
||||
|
||||
if (debug_context) {
|
||||
debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
|
||||
}
|
||||
|
||||
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
|
||||
// the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
|
||||
// it's possible that it is incorrect and that there is some other register used to specify the
|
||||
|
||||
@@ -657,7 +657,11 @@ public:
|
||||
std::array<f32, 4> tess_level_outer;
|
||||
std::array<f32, 2> tess_level_inner;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x102);
|
||||
INSERT_UNION_PADDING_WORDS(0x10);
|
||||
|
||||
u32 rasterize_enable;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xF1);
|
||||
|
||||
u32 tfb_enabled;
|
||||
|
||||
@@ -1427,6 +1431,7 @@ ASSERT_REG_POSITION(sync_info, 0xB2);
|
||||
ASSERT_REG_POSITION(tess_mode, 0xC8);
|
||||
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
|
||||
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
|
||||
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
|
||||
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
|
||||
ASSERT_REG_POSITION(rt, 0x200);
|
||||
ASSERT_REG_POSITION(viewport_transform, 0x280);
|
||||
|
||||
@@ -1051,7 +1051,7 @@ union Instruction {
|
||||
BitField<40, 1, R2pMode> mode;
|
||||
BitField<41, 2, u64> byte;
|
||||
BitField<20, 7, u64> immediate_mask;
|
||||
} r2p;
|
||||
} p2r_r2p;
|
||||
|
||||
union {
|
||||
BitField<39, 3, u64> pred39;
|
||||
@@ -1239,7 +1239,7 @@ union Instruction {
|
||||
BitField<35, 1, u64> ndv_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
BitField<50, 1, u64> dc_flag;
|
||||
BitField<54, 2, u64> info;
|
||||
BitField<54, 2, u64> offset_mode;
|
||||
BitField<56, 2, u64> component;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
@@ -1251,9 +1251,9 @@ union Instruction {
|
||||
case TextureMiscMode::DC:
|
||||
return dc_flag != 0;
|
||||
case TextureMiscMode::AOFFI:
|
||||
return info == 1;
|
||||
return offset_mode == 1;
|
||||
case TextureMiscMode::PTP:
|
||||
return info == 2;
|
||||
return offset_mode == 2;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1265,7 +1265,7 @@ union Instruction {
|
||||
BitField<35, 1, u64> ndv_flag;
|
||||
BitField<49, 1, u64> nodep_flag;
|
||||
BitField<50, 1, u64> dc_flag;
|
||||
BitField<33, 2, u64> info;
|
||||
BitField<33, 2, u64> offset_mode;
|
||||
BitField<37, 2, u64> component;
|
||||
|
||||
bool UsesMiscMode(TextureMiscMode mode) const {
|
||||
@@ -1277,9 +1277,9 @@ union Instruction {
|
||||
case TextureMiscMode::DC:
|
||||
return dc_flag != 0;
|
||||
case TextureMiscMode::AOFFI:
|
||||
return info == 1;
|
||||
return offset_mode == 1;
|
||||
case TextureMiscMode::PTP:
|
||||
return info == 2;
|
||||
return offset_mode == 2;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1801,6 +1801,7 @@ public:
|
||||
PSET,
|
||||
CSETP,
|
||||
R2P_IMM,
|
||||
P2R_IMM,
|
||||
XMAD_IMM,
|
||||
XMAD_CR,
|
||||
XMAD_RC,
|
||||
@@ -2106,6 +2107,7 @@ private:
|
||||
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
|
||||
INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
|
||||
INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
|
||||
INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
|
||||
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
|
||||
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
|
||||
INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
|
||||
|
||||
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
|
||||
return *dma_pusher;
|
||||
}
|
||||
|
||||
void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
|
||||
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
|
||||
// Synced GPU, is always in sync
|
||||
if (!is_async) {
|
||||
return;
|
||||
}
|
||||
MICROPROFILE_SCOPE(GPU_wait);
|
||||
while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
|
||||
}
|
||||
std::unique_lock lock{sync_mutex};
|
||||
sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
|
||||
}
|
||||
|
||||
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
|
||||
syncpoints[syncpoint_id]++;
|
||||
std::lock_guard lock{sync_mutex};
|
||||
sync_cv.notify_all();
|
||||
if (!syncpt_interrupts[syncpoint_id].empty()) {
|
||||
u32 value = syncpoints[syncpoint_id].load();
|
||||
auto it = syncpt_interrupts[syncpoint_id].begin();
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@@ -181,7 +182,7 @@ public:
|
||||
virtual void WaitIdle() const = 0;
|
||||
|
||||
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
|
||||
void WaitFence(u32 syncpoint_id, u32 value) const;
|
||||
void WaitFence(u32 syncpoint_id, u32 value);
|
||||
|
||||
void IncrementSyncPoint(u32 syncpoint_id);
|
||||
|
||||
@@ -312,6 +313,8 @@ private:
|
||||
|
||||
std::mutex sync_mutex;
|
||||
|
||||
std::condition_variable sync_cv;
|
||||
|
||||
const bool is_async;
|
||||
};
|
||||
|
||||
|
||||
@@ -271,6 +271,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
shader_program_manager->UseTrivialGeometryShader();
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
shader_program_manager->UseTrivialFragmentShader();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -514,6 +517,7 @@ void RasterizerOpenGL::Clear() {
|
||||
ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
|
||||
|
||||
SyncViewport(clear_state);
|
||||
SyncRasterizeEnable(clear_state);
|
||||
if (regs.clear_flags.scissor) {
|
||||
SyncScissorTest(clear_state);
|
||||
}
|
||||
@@ -541,6 +545,7 @@ void RasterizerOpenGL::Clear() {
|
||||
void RasterizerOpenGL::DrawPrelude() {
|
||||
auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
SyncRasterizeEnable(state);
|
||||
SyncColorMask();
|
||||
SyncFragmentColorClampState();
|
||||
SyncMultiSampleState();
|
||||
@@ -1133,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
current_state.rasterizer_discard = regs.rasterize_enable == 0;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncColorMask() {
|
||||
auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
if (!maxwell3d.dirty.color_mask) {
|
||||
|
||||
@@ -168,6 +168,9 @@ private:
|
||||
/// Syncs the point state to match the guest state
|
||||
void SyncPointState();
|
||||
|
||||
/// Syncs the rasterizer enable state to match the guest state
|
||||
void SyncRasterizeEnable(OpenGLState& current_state);
|
||||
|
||||
/// Syncs Color Mask
|
||||
void SyncColorMask();
|
||||
|
||||
|
||||
@@ -48,10 +48,10 @@ class ExprDecompiler;
|
||||
|
||||
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||
|
||||
struct TextureAoffi {};
|
||||
struct TextureOffset {};
|
||||
struct TextureDerivates {};
|
||||
using TextureArgument = std::pair<Type, Node>;
|
||||
using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
|
||||
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
|
||||
|
||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
|
||||
@@ -751,6 +751,9 @@ private:
|
||||
|
||||
Expression Visit(const Node& node) {
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
if (const auto amend_index = operation->GetAmendIndex()) {
|
||||
Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
|
||||
}
|
||||
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
|
||||
if (operation_index >= operation_decompilers.size()) {
|
||||
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
|
||||
@@ -872,6 +875,9 @@ private:
|
||||
}
|
||||
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
if (const auto amend_index = conditional->GetAmendIndex()) {
|
||||
Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
|
||||
}
|
||||
// It's invalid to call conditional on nested nodes, use an operation instead
|
||||
code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
|
||||
++code.scope;
|
||||
@@ -1077,7 +1083,7 @@ private:
|
||||
}
|
||||
|
||||
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
|
||||
const std::vector<TextureIR>& extras, bool sepparate_dc = false) {
|
||||
const std::vector<TextureIR>& extras, bool separate_dc = false) {
|
||||
constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
|
||||
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
@@ -1090,10 +1096,12 @@ private:
|
||||
std::string expr = "texture" + function_suffix;
|
||||
if (!meta->aoffi.empty()) {
|
||||
expr += "Offset";
|
||||
} else if (!meta->ptp.empty()) {
|
||||
expr += "Offsets";
|
||||
}
|
||||
expr += '(' + GetSampler(meta->sampler) + ", ";
|
||||
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
|
||||
(has_shadow && !sepparate_dc ? 1 : 0) - 1);
|
||||
(has_shadow && !separate_dc ? 1 : 0) - 1);
|
||||
expr += '(';
|
||||
for (std::size_t i = 0; i < count; ++i) {
|
||||
expr += Visit(operation[i]).AsFloat();
|
||||
@@ -1106,7 +1114,7 @@ private:
|
||||
expr += ", float(" + Visit(meta->array).AsInt() + ')';
|
||||
}
|
||||
if (has_shadow) {
|
||||
if (sepparate_dc) {
|
||||
if (separate_dc) {
|
||||
expr += "), " + Visit(meta->depth_compare).AsFloat();
|
||||
} else {
|
||||
expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
|
||||
@@ -1118,8 +1126,12 @@ private:
|
||||
for (const auto& variant : extras) {
|
||||
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
|
||||
expr += GenerateTextureArgument(*argument);
|
||||
} else if (std::holds_alternative<TextureAoffi>(variant)) {
|
||||
expr += GenerateTextureAoffi(meta->aoffi);
|
||||
} else if (std::holds_alternative<TextureOffset>(variant)) {
|
||||
if (!meta->aoffi.empty()) {
|
||||
expr += GenerateTextureAoffi(meta->aoffi);
|
||||
} else if (!meta->ptp.empty()) {
|
||||
expr += GenerateTexturePtp(meta->ptp);
|
||||
}
|
||||
} else if (std::holds_alternative<TextureDerivates>(variant)) {
|
||||
expr += GenerateTextureDerivates(meta->derivates);
|
||||
} else {
|
||||
@@ -1160,6 +1172,20 @@ private:
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string ReadTextureOffset(const Node& value) {
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
|
||||
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
|
||||
// to be constant by the standard).
|
||||
return std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else if (device.HasVariableAoffi()) {
|
||||
// Avoid using variable AOFFI on unsupported devices.
|
||||
return Visit(value).AsInt();
|
||||
} else {
|
||||
// Insert 0 on devices not supporting variable AOFFI.
|
||||
return "0";
|
||||
}
|
||||
}
|
||||
|
||||
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
|
||||
if (aoffi.empty()) {
|
||||
return {};
|
||||
@@ -1170,18 +1196,7 @@ private:
|
||||
expr += '(';
|
||||
|
||||
for (std::size_t index = 0; index < aoffi.size(); ++index) {
|
||||
const auto operand{aoffi.at(index)};
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
|
||||
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
|
||||
// to be constant by the standard).
|
||||
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
|
||||
} else if (device.HasVariableAoffi()) {
|
||||
// Avoid using variable AOFFI on unsupported devices.
|
||||
expr += Visit(operand).AsInt();
|
||||
} else {
|
||||
// Insert 0 on devices not supporting variable AOFFI.
|
||||
expr += '0';
|
||||
}
|
||||
expr += ReadTextureOffset(aoffi.at(index));
|
||||
if (index + 1 < aoffi.size()) {
|
||||
expr += ", ";
|
||||
}
|
||||
@@ -1191,6 +1206,20 @@ private:
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
|
||||
static constexpr std::size_t num_vectors = 4;
|
||||
ASSERT(ptp.size() == num_vectors * 2);
|
||||
|
||||
std::string expr = ", ivec2[](";
|
||||
for (std::size_t vector = 0; vector < num_vectors; ++vector) {
|
||||
const bool has_next = vector + 1 < num_vectors;
|
||||
expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
|
||||
ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
|
||||
}
|
||||
expr += ')';
|
||||
return expr;
|
||||
}
|
||||
|
||||
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
|
||||
if (derivates.empty()) {
|
||||
return {};
|
||||
@@ -1689,7 +1718,7 @@ private:
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(
|
||||
operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
|
||||
operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1701,7 +1730,7 @@ private:
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(
|
||||
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
|
||||
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
|
||||
if (meta->sampler.IsShadow()) {
|
||||
expr = "vec4(" + expr + ')';
|
||||
}
|
||||
@@ -1709,21 +1738,19 @@ private:
|
||||
}
|
||||
|
||||
Expression TextureGather(Operation operation) {
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
|
||||
|
||||
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
if (meta->sampler.IsShadow()) {
|
||||
return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) +
|
||||
GetSwizzle(meta->element),
|
||||
Type::Float};
|
||||
const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
|
||||
const bool separate_dc = meta.sampler.IsShadow();
|
||||
|
||||
std::vector<TextureIR> ir;
|
||||
if (meta.sampler.IsShadow()) {
|
||||
ir = {TextureOffset{}};
|
||||
} else {
|
||||
return {GenerateTexture(operation, "Gather",
|
||||
{TextureAoffi{}, TextureArgument{type, meta->component}},
|
||||
false) +
|
||||
GetSwizzle(meta->element),
|
||||
Type::Float};
|
||||
ir = {TextureOffset{}, TextureArgument{type, meta.component}};
|
||||
}
|
||||
return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
|
||||
Type::Float};
|
||||
}
|
||||
|
||||
Expression TextureQueryDimensions(Operation operation) {
|
||||
@@ -1794,7 +1821,8 @@ private:
|
||||
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
|
||||
ASSERT(meta);
|
||||
|
||||
std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
|
||||
std::string expr =
|
||||
GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
|
||||
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
|
||||
}
|
||||
|
||||
|
||||
@@ -50,6 +50,10 @@ public:
|
||||
current_state.geometry_shader = 0;
|
||||
}
|
||||
|
||||
void UseTrivialFragmentShader() {
|
||||
current_state.fragment_shader = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
struct PipelineState {
|
||||
bool operator==(const PipelineState& rhs) const {
|
||||
|
||||
@@ -182,6 +182,10 @@ void OpenGLState::ApplyCulling() {
|
||||
}
|
||||
}
|
||||
|
||||
void OpenGLState::ApplyRasterizerDiscard() {
|
||||
Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
|
||||
}
|
||||
|
||||
void OpenGLState::ApplyColorMask() {
|
||||
if (!dirty.color_mask) {
|
||||
return;
|
||||
@@ -455,6 +459,7 @@ void OpenGLState::Apply() {
|
||||
ApplyPointSize();
|
||||
ApplyFragmentColorClamp();
|
||||
ApplyMultisample();
|
||||
ApplyRasterizerDiscard();
|
||||
ApplyColorMask();
|
||||
ApplyDepthClamp();
|
||||
ApplyViewport();
|
||||
|
||||
@@ -48,6 +48,8 @@ public:
|
||||
GLuint index = 0;
|
||||
} primitive_restart; // GL_PRIMITIVE_RESTART
|
||||
|
||||
bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
|
||||
|
||||
struct ColorMask {
|
||||
GLboolean red_enabled = GL_TRUE;
|
||||
GLboolean green_enabled = GL_TRUE;
|
||||
@@ -56,6 +58,7 @@ public:
|
||||
};
|
||||
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
|
||||
color_mask; // GL_COLOR_WRITEMASK
|
||||
|
||||
struct {
|
||||
bool test_enabled = false; // GL_STENCIL_TEST
|
||||
struct {
|
||||
@@ -174,6 +177,7 @@ public:
|
||||
void ApplyMultisample();
|
||||
void ApplySRgb();
|
||||
void ApplyCulling();
|
||||
void ApplyRasterizerDiscard();
|
||||
void ApplyColorMask();
|
||||
void ApplyDepth();
|
||||
void ApplyPrimitiveRestart();
|
||||
|
||||
@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
return GL_POINTS;
|
||||
case Maxwell::PrimitiveTopology::Lines:
|
||||
return GL_LINES;
|
||||
case Maxwell::PrimitiveTopology::LineLoop:
|
||||
return GL_LINE_LOOP;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return GL_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
return GL_TRIANGLE_FAN;
|
||||
case Maxwell::PrimitiveTopology::Quads:
|
||||
return GL_QUADS;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
case Maxwell::PrimitiveTopology::QuadStrip:
|
||||
return GL_QUAD_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Polygon:
|
||||
return GL_POLYGON;
|
||||
case Maxwell::PrimitiveTopology::LinesAdjacency:
|
||||
return GL_LINES_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::LineStripAdjacency:
|
||||
return GL_LINE_STRIP_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
|
||||
return GL_TRIANGLES_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
|
||||
return GL_TRIANGLE_STRIP_ADJACENCY;
|
||||
case Maxwell::PrimitiveTopology::Patches:
|
||||
return GL_PATCHES;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
|
||||
return GL_POINTS;
|
||||
}
|
||||
|
||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
|
||||
@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
|
||||
const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
|
||||
const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
|
||||
|
||||
const auto& clip = regs.view_volume_clip_control;
|
||||
const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
|
||||
|
||||
Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
|
||||
if (regs.screen_y_control.triangle_rast_flip != 0 &&
|
||||
regs.viewport_transform[0].scale_y > 0.0f) {
|
||||
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
|
||||
}
|
||||
|
||||
const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
|
||||
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
|
||||
regs.cull.cull_face, front_face);
|
||||
return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
|
||||
depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
|
||||
front_face);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
|
||||
std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
|
||||
return static_cast<std::size_t>(cull_enable) ^
|
||||
(static_cast<std::size_t>(depth_bias_enable) << 1) ^
|
||||
(static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
|
||||
(static_cast<std::size_t>(depth_clamp_enable) << 2) ^
|
||||
(static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
|
||||
(static_cast<std::size_t>(cull_face) << 24) ^
|
||||
(static_cast<std::size_t>(front_face) << 48);
|
||||
}
|
||||
|
||||
bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
|
||||
return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
|
||||
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
|
||||
rhs.front_face);
|
||||
return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
|
||||
cull_face, front_face) ==
|
||||
std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
|
||||
rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
|
||||
}
|
||||
|
||||
std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
|
||||
|
||||
@@ -170,15 +170,17 @@ struct FixedPipelineState {
|
||||
};
|
||||
|
||||
struct Rasterizer {
|
||||
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
|
||||
Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
|
||||
constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
|
||||
bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
|
||||
Maxwell::Cull::FrontFace front_face)
|
||||
: cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
|
||||
ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
|
||||
front_face} {}
|
||||
depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
|
||||
cull_face{cull_face}, front_face{front_face} {}
|
||||
Rasterizer() = default;
|
||||
|
||||
bool cull_enable;
|
||||
bool depth_bias_enable;
|
||||
bool depth_clamp_enable;
|
||||
bool ndc_minus_one_to_one;
|
||||
Maxwell::Cull::CullFace cull_face;
|
||||
Maxwell::Cull::FrontFace front_face;
|
||||
|
||||
@@ -2,124 +2,145 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
|
||||
std::size_t alignment, u8* host_ptr)
|
||||
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
|
||||
alignment{alignment} {}
|
||||
namespace {
|
||||
|
||||
VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
|
||||
Memory::Memory& cpu_memory_,
|
||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
|
||||
: RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
|
||||
cpu_memory_} {
|
||||
const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
|
||||
vk::AccessFlagBits::eUniformRead;
|
||||
stream_buffer =
|
||||
std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
|
||||
vk::PipelineStageFlagBits::eAllCommands);
|
||||
buffer_handle = stream_buffer->GetBuffer();
|
||||
const auto BufferUsage =
|
||||
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
|
||||
|
||||
const auto UploadPipelineStage =
|
||||
vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
|
||||
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eComputeShader;
|
||||
|
||||
const auto UploadAccessBarriers =
|
||||
vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
|
||||
vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
|
||||
vk::AccessFlagBits::eIndexRead;
|
||||
|
||||
auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
|
||||
return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
CacheAddr cache_addr, std::size_t size)
|
||||
: VideoCommon::BufferBlock{cache_addr, size} {
|
||||
const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
|
||||
BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst,
|
||||
vk::SharingMode::eExclusive, 0, nullptr);
|
||||
|
||||
const auto& dld{device.GetDispatchLoader()};
|
||||
const auto dev{device.GetLogical()};
|
||||
buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
buffer.commit = memory_manager.Commit(*buffer.handle, false);
|
||||
}
|
||||
|
||||
CachedBufferBlock::~CachedBufferBlock() = default;
|
||||
|
||||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
|
||||
: VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
|
||||
CreateStreamBuffer(device,
|
||||
scheduler)},
|
||||
device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
|
||||
staging_pool} {}
|
||||
|
||||
VKBufferCache::~VKBufferCache() = default;
|
||||
|
||||
u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
|
||||
const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
|
||||
ASSERT_MSG(cpu_addr, "Invalid GPU address");
|
||||
|
||||
// Cache management is a big overhead, so only cache entries with a given size.
|
||||
// TODO: Figure out which size is the best for given games.
|
||||
cache &= size >= 2048;
|
||||
|
||||
u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
|
||||
if (cache) {
|
||||
const auto entry = TryGet(host_ptr);
|
||||
if (entry) {
|
||||
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
|
||||
return entry->GetOffset();
|
||||
}
|
||||
Unregister(entry);
|
||||
}
|
||||
}
|
||||
|
||||
AlignBuffer(alignment);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
if (host_ptr == nullptr) {
|
||||
return uploaded_offset;
|
||||
}
|
||||
|
||||
std::memcpy(buffer_ptr, host_ptr, size);
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
if (cache) {
|
||||
auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
|
||||
alignment, host_ptr);
|
||||
Register(entry);
|
||||
}
|
||||
|
||||
return uploaded_offset;
|
||||
Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
|
||||
return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
|
||||
}
|
||||
|
||||
u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
std::memcpy(buffer_ptr, raw_pointer, size);
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return uploaded_offset;
|
||||
const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
|
||||
return buffer->GetHandle();
|
||||
}
|
||||
|
||||
std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
|
||||
AlignBuffer(alignment);
|
||||
u8* const uploaded_ptr = buffer_ptr;
|
||||
const u64 uploaded_offset = buffer_offset;
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return {uploaded_ptr, uploaded_offset};
|
||||
const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
|
||||
size = std::max(size, std::size_t(4));
|
||||
const auto& empty = staging_pool.GetUnusedBuffer(size, false);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
|
||||
cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
|
||||
});
|
||||
return &*empty.handle;
|
||||
}
|
||||
|
||||
void VKBufferCache::Reserve(std::size_t max_size) {
|
||||
bool invalidate;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
|
||||
buffer_offset = buffer_offset_base;
|
||||
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) {
|
||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||
std::memcpy(staging.commit->Map(size), data, size);
|
||||
|
||||
if (invalidate) {
|
||||
InvalidateAll();
|
||||
}
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
|
||||
size](auto cmdbuf, auto& dld) {
|
||||
cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
|
||||
cmdbuf.pipelineBarrier(
|
||||
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
|
||||
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
|
||||
offset, size)},
|
||||
{}, dld);
|
||||
});
|
||||
}
|
||||
|
||||
void VKBufferCache::Send() {
|
||||
stream_buffer->Send(buffer_offset - buffer_offset_base);
|
||||
void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) {
|
||||
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
|
||||
size](auto cmdbuf, auto& dld) {
|
||||
cmdbuf.pipelineBarrier(
|
||||
vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eTransfer, {}, {},
|
||||
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
|
||||
vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
|
||||
{}, dld);
|
||||
cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
|
||||
});
|
||||
scheduler.Finish();
|
||||
|
||||
std::memcpy(data, staging.commit->Map(size), size);
|
||||
}
|
||||
|
||||
void VKBufferCache::AlignBuffer(std::size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
|
||||
buffer_ptr += offset_aligned - buffer_offset;
|
||||
buffer_offset = offset_aligned;
|
||||
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) {
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
|
||||
dst_offset, size](auto cmdbuf, auto& dld) {
|
||||
cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
|
||||
cmdbuf.pipelineBarrier(
|
||||
vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
|
||||
{vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
|
||||
vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
|
||||
vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
|
||||
dst_offset, size)},
|
||||
{}, dld);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -5,105 +5,74 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKMemoryManager;
|
||||
class VKStreamBuffer;
|
||||
class VKScheduler;
|
||||
|
||||
class CachedBufferEntry final : public RasterizerCacheObject {
|
||||
class CachedBufferBlock final : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
|
||||
u8* host_ptr);
|
||||
explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
CacheAddr cache_addr, std::size_t size);
|
||||
~CachedBufferBlock();
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
u64 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::size_t GetAlignment() const {
|
||||
return alignment;
|
||||
const vk::Buffer* GetHandle() const {
|
||||
return &*buffer.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
u64 offset{};
|
||||
std::size_t alignment{};
|
||||
VKBuffer buffer;
|
||||
};
|
||||
|
||||
class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
|
||||
using Buffer = std::shared_ptr<CachedBufferBlock>;
|
||||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
|
||||
public:
|
||||
explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
|
||||
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
|
||||
VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
|
||||
~VKBufferCache();
|
||||
|
||||
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
|
||||
/// allocated.
|
||||
u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
|
||||
|
||||
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
|
||||
u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
|
||||
std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
|
||||
|
||||
/// Reserves a region of memory to be used in subsequent upload/reserve operations.
|
||||
void Reserve(std::size_t max_size);
|
||||
|
||||
/// Ensures that the set data is sent to the device.
|
||||
void Send();
|
||||
|
||||
/// Returns the buffer cache handle.
|
||||
vk::Buffer GetBuffer() const {
|
||||
return buffer_handle;
|
||||
}
|
||||
const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
||||
void WriteBarrier() override {}
|
||||
|
||||
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
|
||||
|
||||
const vk::Buffer* ToHandle(const Buffer& buffer) override;
|
||||
|
||||
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
const u8* data) override;
|
||||
|
||||
void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||
u8* data) override;
|
||||
|
||||
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
|
||||
std::size_t dst_offset, std::size_t size) override;
|
||||
|
||||
private:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
|
||||
Tegra::MemoryManager& tegra_memory_manager;
|
||||
Memory::Memory& cpu_memory;
|
||||
|
||||
std::unique_ptr<VKStreamBuffer> stream_buffer;
|
||||
vk::Buffer buffer_handle;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
u64 buffer_offset_base = 0;
|
||||
const VKDevice& device;
|
||||
VKMemoryManager& memory_manager;
|
||||
VKScheduler& scheduler;
|
||||
VKStagingBufferPool& staging_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
112
src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Normal file
112
src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
const SPIRVShader& shader)
|
||||
: device{device}, scheduler{scheduler}, entries{shader.entries},
|
||||
descriptor_set_layout{CreateDescriptorSetLayout()},
|
||||
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
|
||||
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
|
||||
descriptor_template{CreateDescriptorUpdateTemplate()},
|
||||
shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
|
||||
|
||||
VKComputePipeline::~VKComputePipeline() = default;
|
||||
|
||||
vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
|
||||
if (!descriptor_template) {
|
||||
return {};
|
||||
}
|
||||
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
|
||||
update_descriptor_queue.Send(*descriptor_template, set);
|
||||
return set;
|
||||
}
|
||||
|
||||
UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
u32 binding = 0;
|
||||
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
|
||||
// TODO(Rodrigo): Maybe make individual bindings here?
|
||||
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
|
||||
bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
|
||||
nullptr);
|
||||
}
|
||||
};
|
||||
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
|
||||
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
|
||||
{}, static_cast<u32>(bindings.size()), bindings.data());
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
|
||||
const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
|
||||
const auto dev = device.GetLogical();
|
||||
return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
|
||||
}
|
||||
|
||||
UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
|
||||
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
|
||||
u32 binding = 0;
|
||||
u32 offset = 0;
|
||||
FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
|
||||
if (template_entries.empty()) {
|
||||
// If the shader doesn't use descriptor sets, skip template creation.
|
||||
return UniqueDescriptorUpdateTemplate{};
|
||||
}
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
|
||||
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
|
||||
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
|
||||
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
|
||||
const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
|
||||
const auto dev = device.GetLogical();
|
||||
return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
|
||||
}
|
||||
|
||||
UniquePipeline VKComputePipeline::CreatePipeline() const {
|
||||
vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
|
||||
*shader_module, "main", nullptr);
|
||||
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
|
||||
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
|
||||
if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
|
||||
shader_stage_ci.pNext = &subgroup_size_ci;
|
||||
}
|
||||
|
||||
const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
|
||||
const auto dev = device.GetLogical();
|
||||
return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
66
src/video_core/renderer_vulkan/vk_compute_pipeline.h
Normal file
66
src/video_core/renderer_vulkan/vk_compute_pipeline.h
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class VKComputePipeline final {
|
||||
public:
|
||||
explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
const SPIRVShader& shader);
|
||||
~VKComputePipeline();
|
||||
|
||||
vk::DescriptorSet CommitDescriptorSet();
|
||||
|
||||
vk::Pipeline GetHandle() const {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
vk::PipelineLayout GetLayout() const {
|
||||
return *layout;
|
||||
}
|
||||
|
||||
const ShaderEntries& GetEntries() {
|
||||
return entries;
|
||||
}
|
||||
|
||||
private:
|
||||
UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
|
||||
|
||||
UniquePipelineLayout CreatePipelineLayout() const;
|
||||
|
||||
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
|
||||
|
||||
UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
|
||||
|
||||
UniquePipeline CreatePipeline() const;
|
||||
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
ShaderEntries entries;
|
||||
|
||||
UniqueDescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
UniquePipelineLayout layout;
|
||||
UniqueDescriptorUpdateTemplate descriptor_template;
|
||||
UniqueShaderModule shader_module;
|
||||
UniquePipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
89
src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
Normal file
89
src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
|
||||
constexpr std::size_t SETS_GROW_RATE = 0x20;
|
||||
|
||||
DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
|
||||
vk::DescriptorSetLayout layout)
|
||||
: VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
|
||||
|
||||
DescriptorAllocator::~DescriptorAllocator() = default;
|
||||
|
||||
vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
|
||||
return *descriptors[CommitResource(fence)];
|
||||
}
|
||||
|
||||
void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
|
||||
auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
|
||||
descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
|
||||
std::make_move_iterator(new_sets.end()));
|
||||
}
|
||||
|
||||
VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
|
||||
: device{device}, active_pool{AllocateNewPool()} {}
|
||||
|
||||
VKDescriptorPool::~VKDescriptorPool() = default;
|
||||
|
||||
vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
|
||||
static constexpr u32 num_sets = 0x20000;
|
||||
static constexpr vk::DescriptorPoolSize pool_sizes[] = {
|
||||
{vk::DescriptorType::eUniformBuffer, num_sets * 90},
|
||||
{vk::DescriptorType::eStorageBuffer, num_sets * 60},
|
||||
{vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
|
||||
{vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
|
||||
{vk::DescriptorType::eStorageImage, num_sets * 40}};
|
||||
|
||||
const vk::DescriptorPoolCreateInfo create_info(
|
||||
vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
|
||||
static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
|
||||
const auto dev = device.GetLogical();
|
||||
return *pools.emplace_back(
|
||||
dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
|
||||
}
|
||||
|
||||
std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
|
||||
vk::DescriptorSetLayout layout, std::size_t count) {
|
||||
std::vector layout_copies(count, layout);
|
||||
vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
|
||||
layout_copies.data());
|
||||
|
||||
std::vector<vk::DescriptorSet> sets(count);
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eErrorOutOfPoolMemory:
|
||||
active_pool = AllocateNewPool();
|
||||
allocate_info.descriptorPool = active_pool;
|
||||
if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default:
|
||||
vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
|
||||
}
|
||||
|
||||
vk::PoolFree deleter(dev, active_pool, dld);
|
||||
std::vector<UniqueDescriptorSet> unique_sets;
|
||||
unique_sets.reserve(count);
|
||||
for (const auto set : sets) {
|
||||
unique_sets.push_back(UniqueDescriptorSet{set, deleter});
|
||||
}
|
||||
return unique_sets;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
56
src/video_core/renderer_vulkan/vk_descriptor_pool.h
Normal file
56
src/video_core/renderer_vulkan/vk_descriptor_pool.h
Normal file
@@ -0,0 +1,56 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDescriptorPool;
|
||||
|
||||
class DescriptorAllocator final : public VKFencedPool {
|
||||
public:
|
||||
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
|
||||
~DescriptorAllocator() override;
|
||||
|
||||
DescriptorAllocator(const DescriptorAllocator&) = delete;
|
||||
|
||||
vk::DescriptorSet Commit(VKFence& fence);
|
||||
|
||||
protected:
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
|
||||
private:
|
||||
VKDescriptorPool& descriptor_pool;
|
||||
const vk::DescriptorSetLayout layout;
|
||||
|
||||
std::vector<UniqueDescriptorSet> descriptors;
|
||||
};
|
||||
|
||||
class VKDescriptorPool final {
|
||||
friend DescriptorAllocator;
|
||||
|
||||
public:
|
||||
explicit VKDescriptorPool(const VKDevice& device);
|
||||
~VKDescriptorPool();
|
||||
|
||||
private:
|
||||
vk::DescriptorPool AllocateNewPool();
|
||||
|
||||
std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
|
||||
std::size_t count);
|
||||
|
||||
const VKDevice& device;
|
||||
|
||||
std::vector<UniqueDescriptorPool> pools;
|
||||
vk::DescriptorPool active_pool;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
271
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
271
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
@@ -0,0 +1,271 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <vector>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||
|
||||
namespace {
|
||||
|
||||
vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
|
||||
return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
|
||||
MaxwellToVK::StencilOp(face.action_depth_pass),
|
||||
MaxwellToVK::StencilOp(face.action_depth_fail),
|
||||
MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
|
||||
}
|
||||
|
||||
bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
|
||||
static constexpr std::array unsupported_topologies = {
|
||||
vk::PrimitiveTopology::ePointList,
|
||||
vk::PrimitiveTopology::eLineList,
|
||||
vk::PrimitiveTopology::eTriangleList,
|
||||
vk::PrimitiveTopology::eLineListWithAdjacency,
|
||||
vk::PrimitiveTopology::eTriangleListWithAdjacency,
|
||||
vk::PrimitiveTopology::ePatchList};
|
||||
return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
|
||||
topology) == std::end(unsupported_topologies);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache,
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
const SPIRVProgram& program)
|
||||
: device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
|
||||
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
|
||||
descriptor_allocator{descriptor_pool, *descriptor_set_layout},
|
||||
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
|
||||
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
|
||||
program)},
|
||||
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
|
||||
key.renderpass_params,
|
||||
program)} {}
|
||||
|
||||
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
|
||||
|
||||
vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
|
||||
if (!descriptor_template) {
|
||||
return {};
|
||||
}
|
||||
const auto set = descriptor_allocator.Commit(scheduler.GetFence());
|
||||
update_descriptor_queue.Send(*descriptor_template, set);
|
||||
return set;
|
||||
}
|
||||
|
||||
UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
|
||||
const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
|
||||
{}, static_cast<u32>(bindings.size()), bindings.data());
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
|
||||
const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
|
||||
nullptr);
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
|
||||
const SPIRVProgram& program) const {
|
||||
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
|
||||
u32 binding = 0;
|
||||
u32 offset = 0;
|
||||
for (const auto& stage : program) {
|
||||
if (stage) {
|
||||
FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
|
||||
template_entries);
|
||||
}
|
||||
}
|
||||
if (template_entries.empty()) {
|
||||
// If the shader doesn't use descriptor sets, skip template creation.
|
||||
return UniqueDescriptorUpdateTemplate{};
|
||||
}
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_ci(
|
||||
{}, static_cast<u32>(template_entries.size()), template_entries.data(),
|
||||
vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
|
||||
vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
|
||||
const SPIRVProgram& program) const {
|
||||
std::vector<UniqueShaderModule> modules;
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
|
||||
const auto& stage = program[i];
|
||||
if (!stage) {
|
||||
continue;
|
||||
}
|
||||
const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
|
||||
stage->code.data());
|
||||
modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
|
||||
}
|
||||
return modules;
|
||||
}
|
||||
|
||||
UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
|
||||
const SPIRVProgram& program) const {
|
||||
const auto& vi = fixed_state.vertex_input;
|
||||
const auto& ia = fixed_state.input_assembly;
|
||||
const auto& ds = fixed_state.depth_stencil;
|
||||
const auto& cd = fixed_state.color_blending;
|
||||
const auto& ts = fixed_state.tessellation;
|
||||
const auto& rs = fixed_state.rasterizer;
|
||||
|
||||
std::vector<vk::VertexInputBindingDescription> vertex_bindings;
|
||||
std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
|
||||
for (std::size_t i = 0; i < vi.num_bindings; ++i) {
|
||||
const auto& binding = vi.bindings[i];
|
||||
const bool instanced = binding.divisor != 0;
|
||||
const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
|
||||
vertex_bindings.emplace_back(binding.index, binding.stride, rate);
|
||||
if (instanced) {
|
||||
vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
|
||||
const auto& input_attributes = program[0]->entries.attributes;
|
||||
for (std::size_t i = 0; i < vi.num_attributes; ++i) {
|
||||
const auto& attribute = vi.attributes[i];
|
||||
if (input_attributes.find(attribute.index) == input_attributes.end()) {
|
||||
// Skip attributes not used by the vertex shaders.
|
||||
continue;
|
||||
}
|
||||
vertex_attributes.emplace_back(attribute.index, attribute.buffer,
|
||||
MaxwellToVK::VertexFormat(attribute.type, attribute.size),
|
||||
attribute.offset);
|
||||
}
|
||||
|
||||
vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
|
||||
{}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
|
||||
static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
|
||||
|
||||
const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
|
||||
static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
|
||||
if (!vertex_binding_divisors.empty()) {
|
||||
vertex_input_ci.pNext = &vertex_input_divisor_ci;
|
||||
}
|
||||
|
||||
const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
|
||||
{}, primitive_topology,
|
||||
ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
|
||||
|
||||
const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
|
||||
Maxwell::NumViewports, nullptr);
|
||||
|
||||
// TODO(Rodrigo): Find out what's the default register value for front face
|
||||
const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
|
||||
{}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
|
||||
rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
|
||||
rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
|
||||
rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
|
||||
{}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
|
||||
|
||||
const vk::CompareOp depth_test_compare = ds.depth_test_enable
|
||||
? MaxwellToVK::ComparisonOp(ds.depth_test_function)
|
||||
: vk::CompareOp::eAlways;
|
||||
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
|
||||
{}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
|
||||
ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
|
||||
GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
|
||||
|
||||
std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
|
||||
const std::size_t num_attachments =
|
||||
std::min(cd.attachments_count, renderpass_params.color_attachments.size());
|
||||
for (std::size_t i = 0; i < num_attachments; ++i) {
|
||||
constexpr std::array component_table{
|
||||
vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
|
||||
vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
|
||||
const auto& blend = cd.attachments[i];
|
||||
|
||||
vk::ColorComponentFlags color_components{};
|
||||
for (std::size_t j = 0; j < component_table.size(); ++j) {
|
||||
if (blend.components[j])
|
||||
color_components |= component_table[j];
|
||||
}
|
||||
|
||||
cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
|
||||
blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
|
||||
MaxwellToVK::BlendFactor(blend.dst_rgb_func),
|
||||
MaxwellToVK::BlendEquation(blend.rgb_equation),
|
||||
MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
|
||||
MaxwellToVK::BlendEquation(blend.a_equation), color_components);
|
||||
}
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
|
||||
static_cast<u32>(num_attachments),
|
||||
cb_attachments.data(), {});
|
||||
|
||||
constexpr std::array dynamic_states = {
|
||||
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
|
||||
vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants,
|
||||
vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask,
|
||||
vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
|
||||
{}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
|
||||
|
||||
vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
|
||||
subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
|
||||
|
||||
std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
|
||||
std::size_t module_index = 0;
|
||||
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
if (!program[stage]) {
|
||||
continue;
|
||||
}
|
||||
const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
|
||||
const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
|
||||
auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
|
||||
*modules[module_index++], "main", nullptr);
|
||||
if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
|
||||
stage_ci.pNext = &subgroup_size_ci;
|
||||
}
|
||||
}
|
||||
|
||||
const vk::GraphicsPipelineCreateInfo create_info(
|
||||
{}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
|
||||
&input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
|
||||
&depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
90
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
90
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsPipelineCacheKey;
|
||||
|
||||
class VKDescriptorPool;
|
||||
class VKDevice;
|
||||
class VKRenderPassCache;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
|
||||
|
||||
class VKGraphicsPipeline final {
|
||||
public:
|
||||
explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache,
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
const SPIRVProgram& program);
|
||||
~VKGraphicsPipeline();
|
||||
|
||||
vk::DescriptorSet CommitDescriptorSet();
|
||||
|
||||
vk::Pipeline GetHandle() const {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
vk::PipelineLayout GetLayout() const {
|
||||
return *layout;
|
||||
}
|
||||
|
||||
vk::RenderPass GetRenderPass() const {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
private:
|
||||
UniqueDescriptorSetLayout CreateDescriptorSetLayout(
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
|
||||
|
||||
UniquePipelineLayout CreatePipelineLayout() const;
|
||||
|
||||
UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
|
||||
const SPIRVProgram& program) const;
|
||||
|
||||
std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
|
||||
|
||||
UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
|
||||
const SPIRVProgram& program) const;
|
||||
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
const FixedPipelineState fixed_state;
|
||||
const u64 hash;
|
||||
|
||||
UniqueDescriptorSetLayout descriptor_set_layout;
|
||||
DescriptorAllocator descriptor_allocator;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
UniquePipelineLayout layout;
|
||||
UniqueDescriptorUpdateTemplate descriptor_template;
|
||||
std::vector<UniqueShaderModule> modules;
|
||||
|
||||
vk::RenderPass renderpass;
|
||||
UniquePipeline pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
106
src/video_core/renderer_vulkan/vk_image.cpp
Normal file
106
src/video_core/renderer_vulkan/vk_image.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_image.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
|
||||
const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask)
|
||||
: device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
|
||||
image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
|
||||
UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
|
||||
"Queue family tracking is not implemented");
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
|
||||
|
||||
const u32 num_ranges = image_num_layers * image_num_levels;
|
||||
barriers.resize(num_ranges);
|
||||
subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
|
||||
}
|
||||
|
||||
VKImage::~VKImage() = default;
|
||||
|
||||
void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
|
||||
vk::ImageLayout new_layout) {
|
||||
if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t cursor = 0;
|
||||
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
|
||||
for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
|
||||
const u32 layer = base_layer + layer_it;
|
||||
const u32 level = base_level + level_it;
|
||||
auto& state = GetSubrangeState(layer, level);
|
||||
barriers[cursor] = vk::ImageMemoryBarrier(
|
||||
state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1});
|
||||
state.access = new_access;
|
||||
state.layout = new_layout;
|
||||
}
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) {
|
||||
// TODO(Rodrigo): Implement a way to use the latest stage across subresources.
|
||||
constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands;
|
||||
cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr,
|
||||
static_cast<u32>(cursor), barriers.data(), dld);
|
||||
});
|
||||
}
|
||||
|
||||
bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept {
|
||||
const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
|
||||
base_level == 0 && num_levels == image_num_levels;
|
||||
if (!is_full_range) {
|
||||
state_diverged = true;
|
||||
}
|
||||
|
||||
if (!state_diverged) {
|
||||
auto& state = GetSubrangeState(0, 0);
|
||||
if (state.access != new_access || state.layout != new_layout) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
|
||||
for (u32 level_it = 0; level_it < num_levels; ++level_it) {
|
||||
const u32 layer = base_layer + layer_it;
|
||||
const u32 level = base_level + level_it;
|
||||
auto& state = GetSubrangeState(layer, level);
|
||||
if (state.access != new_access || state.layout != new_layout) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void VKImage::CreatePresentView() {
|
||||
// Image type has to be 2D to be presented.
|
||||
const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {},
|
||||
{aspect_mask, 0, 1, 0, 1});
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld);
|
||||
}
|
||||
|
||||
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
|
||||
return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
|
||||
static_cast<std::size_t>(level)];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
84
src/video_core/renderer_vulkan/vk_image.h
Normal file
84
src/video_core/renderer_vulkan/vk_image.h
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKScheduler;
|
||||
|
||||
class VKImage {
|
||||
public:
|
||||
explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
|
||||
const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask);
|
||||
~VKImage();
|
||||
|
||||
/// Records in the passed command buffer an image transition and updates the state of the image.
|
||||
void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
|
||||
vk::ImageLayout new_layout);
|
||||
|
||||
/// Returns a view compatible with presentation, the image has to be 2D.
|
||||
vk::ImageView GetPresentView() {
|
||||
if (!present_view) {
|
||||
CreatePresentView();
|
||||
}
|
||||
return *present_view;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan image handler.
|
||||
vk::Image GetHandle() const {
|
||||
return *image;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan format for this image.
|
||||
vk::Format GetFormat() const {
|
||||
return format;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan aspect mask.
|
||||
vk::ImageAspectFlags GetAspectMask() const {
|
||||
return aspect_mask;
|
||||
}
|
||||
|
||||
private:
|
||||
struct SubrangeState final {
|
||||
vk::AccessFlags access{}; ///< Current access bits.
|
||||
vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout.
|
||||
};
|
||||
|
||||
bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept;
|
||||
|
||||
/// Creates a presentation view.
|
||||
void CreatePresentView();
|
||||
|
||||
/// Returns the subrange state for a layer and layer.
|
||||
SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
|
||||
|
||||
const VKDevice& device; ///< Device handler.
|
||||
VKScheduler& scheduler; ///< Device scheduler.
|
||||
|
||||
const vk::Format format; ///< Vulkan format.
|
||||
const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
|
||||
const u32 image_num_layers; ///< Number of layers.
|
||||
const u32 image_num_levels; ///< Number of mipmap levels.
|
||||
|
||||
UniqueImage image; ///< Image handle.
|
||||
UniqueImageView present_view; ///< Image view compatible with presentation.
|
||||
|
||||
std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers.
|
||||
std::vector<SubrangeState> subrange_states; ///< Current subrange state.
|
||||
|
||||
bool state_diverged = false; ///< True when subresources mismatch in layout.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
@@ -16,34 +17,32 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// TODO(Rodrigo): Fine tune this number
|
||||
constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
|
||||
namespace {
|
||||
|
||||
u64 GetAllocationChunkSize(u64 required_size) {
|
||||
static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
|
||||
auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
|
||||
return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class VKMemoryAllocation final {
|
||||
public:
|
||||
explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
|
||||
vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
|
||||
: device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
|
||||
shifted_type{ShiftType(type)}, is_mappable{properties &
|
||||
vk::MemoryPropertyFlagBits::eHostVisible} {
|
||||
if (is_mappable) {
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
|
||||
}
|
||||
}
|
||||
vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
|
||||
: device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
|
||||
shifted_type{ShiftType(type)} {}
|
||||
|
||||
~VKMemoryAllocation() {
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
if (is_mappable)
|
||||
dev.unmapMemory(memory, dld);
|
||||
dev.free(memory, nullptr, dld);
|
||||
}
|
||||
|
||||
VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
|
||||
auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
|
||||
static_cast<u64>(alignment));
|
||||
auto found = TryFindFreeSection(free_iterator, allocation_size,
|
||||
static_cast<u64>(commit_size), static_cast<u64>(alignment));
|
||||
if (!found) {
|
||||
found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
|
||||
static_cast<u64>(alignment));
|
||||
@@ -52,8 +51,7 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
u8* address = is_mappable ? base_address + *found : nullptr;
|
||||
auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
|
||||
auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
|
||||
*found + commit_size);
|
||||
commits.push_back(commit.get());
|
||||
|
||||
@@ -65,12 +63,10 @@ public:
|
||||
|
||||
void Free(const VKMemoryCommitImpl* commit) {
|
||||
ASSERT(commit);
|
||||
const auto it =
|
||||
std::find_if(commits.begin(), commits.end(),
|
||||
[&](const auto& stored_commit) { return stored_commit == commit; });
|
||||
|
||||
const auto it = std::find(std::begin(commits), std::end(commits), commit);
|
||||
if (it == commits.end()) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
|
||||
UNREACHABLE();
|
||||
UNREACHABLE_MSG("Freeing unallocated commit!");
|
||||
return;
|
||||
}
|
||||
commits.erase(it);
|
||||
@@ -88,11 +84,11 @@ private:
|
||||
}
|
||||
|
||||
/// A memory allocator, it may return a free region between "start" and "end" with the solicited
|
||||
/// requeriments.
|
||||
/// requirements.
|
||||
std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
|
||||
u64 iterator = start;
|
||||
while (iterator + size < end) {
|
||||
const u64 try_left = Common::AlignUp(iterator, alignment);
|
||||
u64 iterator = Common::AlignUp(start, alignment);
|
||||
while (iterator + size <= end) {
|
||||
const u64 try_left = iterator;
|
||||
const u64 try_right = try_left + size;
|
||||
|
||||
bool overlap = false;
|
||||
@@ -100,7 +96,7 @@ private:
|
||||
const auto [commit_left, commit_right] = commit->interval;
|
||||
if (try_left < commit_right && commit_left < try_right) {
|
||||
// There's an overlap, continue the search where the overlapping commit ends.
|
||||
iterator = commit_right;
|
||||
iterator = Common::AlignUp(commit_right, alignment);
|
||||
overlap = true;
|
||||
break;
|
||||
}
|
||||
@@ -110,6 +106,7 @@ private:
|
||||
return try_left;
|
||||
}
|
||||
}
|
||||
|
||||
// No free regions where found, return an empty optional.
|
||||
return std::nullopt;
|
||||
}
|
||||
@@ -117,12 +114,8 @@ private:
|
||||
const VKDevice& device; ///< Vulkan device.
|
||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||
const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
|
||||
const u64 alloc_size; ///< Size of this allocation.
|
||||
const u64 allocation_size; ///< Size of this allocation.
|
||||
const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
|
||||
const bool is_mappable; ///< Whether the allocation is mappable.
|
||||
|
||||
/// Base address of the mapped pointer.
|
||||
u8* base_address{};
|
||||
|
||||
/// Hints where the next free region is likely going to be.
|
||||
u64 free_iterator{};
|
||||
@@ -132,13 +125,15 @@ private:
|
||||
};
|
||||
|
||||
VKMemoryManager::VKMemoryManager(const VKDevice& device)
|
||||
: device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
|
||||
is_memory_unified{GetMemoryUnified(props)} {}
|
||||
: device{device}, properties{device.GetPhysical().getMemoryProperties(
|
||||
device.GetDispatchLoader())},
|
||||
is_memory_unified{GetMemoryUnified(properties)} {}
|
||||
|
||||
VKMemoryManager::~VKMemoryManager() = default;
|
||||
|
||||
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
|
||||
ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
|
||||
VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
|
||||
bool host_visible) {
|
||||
const u64 chunk_size = GetAllocationChunkSize(requirements.size);
|
||||
|
||||
// When a host visible commit is asked, search for host visible and coherent, otherwise search
|
||||
// for a fast device local type.
|
||||
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
|
||||
? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
|
||||
: vk::MemoryPropertyFlagBits::eDeviceLocal;
|
||||
|
||||
const auto TryCommit = [&]() -> VKMemoryCommit {
|
||||
for (auto& alloc : allocs) {
|
||||
if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
|
||||
continue;
|
||||
|
||||
if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
};
|
||||
|
||||
if (auto commit = TryCommit(); commit) {
|
||||
if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
|
||||
return commit;
|
||||
}
|
||||
|
||||
// Commit has failed, allocate more memory.
|
||||
if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
|
||||
// TODO(Rodrigo): Try to use host memory.
|
||||
LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
|
||||
UNREACHABLE();
|
||||
if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
|
||||
// TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
|
||||
// Allocation has failed, panic.
|
||||
UNREACHABLE_MSG("Ran out of VRAM!");
|
||||
return {};
|
||||
}
|
||||
|
||||
// Commit again, this time it won't fail since there's a fresh allocation above. If it does,
|
||||
// there's a bug.
|
||||
auto commit = TryCommit();
|
||||
auto commit = TryAllocCommit(requirements, wanted_properties);
|
||||
ASSERT(commit);
|
||||
return commit;
|
||||
}
|
||||
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
|
||||
VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
|
||||
auto commit = Commit(requeriments, host_visible);
|
||||
auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
|
||||
dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
|
||||
return commit;
|
||||
}
|
||||
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
|
||||
VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
const auto requeriments = dev.getImageMemoryRequirements(image, dld);
|
||||
auto commit = Commit(requeriments, host_visible);
|
||||
auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
|
||||
dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
|
||||
return commit;
|
||||
}
|
||||
|
||||
bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
|
||||
u64 size) {
|
||||
const u32 type = [&]() {
|
||||
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
|
||||
const auto flags = props.memoryTypes[type_index].propertyFlags;
|
||||
const u32 type = [&] {
|
||||
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
|
||||
const auto flags = properties.memoryTypes[type_index].propertyFlags;
|
||||
if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
|
||||
// The type matches in type and in the wanted properties.
|
||||
return type_index;
|
||||
}
|
||||
}
|
||||
LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
|
||||
UNREACHABLE();
|
||||
return 0u;
|
||||
UNREACHABLE_MSG("Couldn't find a compatible memory type!");
|
||||
return 0U;
|
||||
}();
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||
// Try to allocate found type.
|
||||
const vk::MemoryAllocateInfo memory_ai(size, type);
|
||||
vk::DeviceMemory memory;
|
||||
if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
|
||||
if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
|
||||
res != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
|
||||
return false;
|
||||
}
|
||||
allocs.push_back(
|
||||
allocations.push_back(
|
||||
std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
|
||||
for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
|
||||
if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
|
||||
VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
|
||||
vk::MemoryPropertyFlags wanted_properties) {
|
||||
for (auto& allocation : allocations) {
|
||||
if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
|
||||
continue;
|
||||
}
|
||||
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
/*static*/ bool VKMemoryManager::GetMemoryUnified(
|
||||
const vk::PhysicalDeviceMemoryProperties& properties) {
|
||||
for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
|
||||
if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
|
||||
// Memory is considered unified when heaps are device local only.
|
||||
return false;
|
||||
}
|
||||
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
|
||||
return true;
|
||||
}
|
||||
|
||||
VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
|
||||
u8* data, u64 begin, u64 end)
|
||||
: interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
|
||||
VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
|
||||
vk::DeviceMemory memory, u64 begin, u64 end)
|
||||
: device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
|
||||
|
||||
VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
||||
allocation->Free(this);
|
||||
}
|
||||
|
||||
u8* VKMemoryCommitImpl::GetData() const {
|
||||
ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
|
||||
return data;
|
||||
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
|
||||
const auto dev = device.GetLogical();
|
||||
const auto address = reinterpret_cast<u8*>(
|
||||
dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
|
||||
return MemoryMap{this, address};
|
||||
}
|
||||
|
||||
void VKMemoryCommitImpl::Unmap() const {
|
||||
const auto dev = device.GetLogical();
|
||||
dev.unmapMemory(memory, device.GetDispatchLoader());
|
||||
}
|
||||
|
||||
MemoryMap VKMemoryCommitImpl::Map() const {
|
||||
return Map(interval.second - interval.first);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class MemoryMap;
|
||||
class VKDevice;
|
||||
class VKMemoryAllocation;
|
||||
class VKMemoryCommitImpl;
|
||||
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
|
||||
class VKMemoryManager final {
|
||||
public:
|
||||
explicit VKMemoryManager(const VKDevice& device);
|
||||
VKMemoryManager(const VKMemoryManager&) = delete;
|
||||
~VKMemoryManager();
|
||||
|
||||
/**
|
||||
* Commits a memory with the specified requeriments.
|
||||
* @param reqs Requeriments returned from a Vulkan call.
|
||||
* @param requirements Requirements returned from a Vulkan call.
|
||||
* @param host_visible Signals the allocator that it *must* use host visible and coherent
|
||||
* memory. When passing false, it will try to allocate device local memory.
|
||||
* memory. When passing false, it will try to allocate device local memory.
|
||||
* @returns A memory commit.
|
||||
*/
|
||||
VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
|
||||
@@ -47,25 +49,35 @@ private:
|
||||
/// Allocates a chunk of memory.
|
||||
bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
|
||||
|
||||
/// Returns true if the device uses an unified memory model.
|
||||
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
|
||||
/// Tries to allocate a memory commit.
|
||||
VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
|
||||
vk::MemoryPropertyFlags wanted_properties);
|
||||
|
||||
const VKDevice& device; ///< Device handler.
|
||||
const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties.
|
||||
const bool is_memory_unified; ///< True if memory model is unified.
|
||||
std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
|
||||
/// Returns true if the device uses an unified memory model.
|
||||
static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
|
||||
|
||||
const VKDevice& device; ///< Device handler.
|
||||
const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||
const bool is_memory_unified; ///< True if memory model is unified.
|
||||
std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
|
||||
};
|
||||
|
||||
class VKMemoryCommitImpl final {
|
||||
friend VKMemoryAllocation;
|
||||
friend MemoryMap;
|
||||
|
||||
public:
|
||||
explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
|
||||
u64 begin, u64 end);
|
||||
explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
|
||||
vk::DeviceMemory memory, u64 begin, u64 end);
|
||||
~VKMemoryCommitImpl();
|
||||
|
||||
/// Returns the writeable memory map. The commit has to be mappable.
|
||||
u8* GetData() const;
|
||||
/// Maps a memory region and returns a pointer to it.
|
||||
/// It's illegal to have more than one memory map at the same time.
|
||||
MemoryMap Map(u64 size, u64 offset = 0) const;
|
||||
|
||||
/// Maps the whole commit and returns a pointer to it.
|
||||
/// It's illegal to have more than one memory map at the same time.
|
||||
MemoryMap Map() const;
|
||||
|
||||
/// Returns the Vulkan memory handler.
|
||||
vk::DeviceMemory GetMemory() const {
|
||||
@@ -78,10 +90,46 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Unmaps memory.
|
||||
void Unmap() const;
|
||||
|
||||
const VKDevice& device; ///< Vulkan device.
|
||||
std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
|
||||
vk::DeviceMemory memory; ///< Vulkan device memory handler.
|
||||
VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
|
||||
u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
|
||||
};
|
||||
|
||||
/// Holds ownership of a memory map.
|
||||
class MemoryMap final {
|
||||
public:
|
||||
explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
|
||||
: commit{commit}, address{address} {}
|
||||
|
||||
~MemoryMap() {
|
||||
if (commit) {
|
||||
commit->Unmap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Prematurely releases the memory map.
|
||||
void Release() {
|
||||
commit->Unmap();
|
||||
commit = nullptr;
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map.
|
||||
u8* GetAddress() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map;
|
||||
operator u8*() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
private:
|
||||
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
|
||||
u8* address{}; ///< Address to the mapped memory.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
395
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
395
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
@@ -0,0 +1,395 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||
|
||||
using Tegra::Engines::ShaderType;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
||||
VideoCommon::Shader::CompileDepth::FullDecompile};
|
||||
|
||||
/// Gets the address for the specified shader stage program
|
||||
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
|
||||
const auto& gpu{system.GPU().Maxwell3D()};
|
||||
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
|
||||
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
|
||||
}
|
||||
|
||||
/// Gets if the current instruction offset is a scheduler instruction
|
||||
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
|
||||
// Sched instructions appear once every 4 instructions.
|
||||
constexpr std::size_t SchedPeriod = 4;
|
||||
const std::size_t absolute_offset = offset - main_offset;
|
||||
return (absolute_offset % SchedPeriod) == 0;
|
||||
}
|
||||
|
||||
/// Calculates the size of a program stream
|
||||
std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
|
||||
const std::size_t start_offset = is_compute ? 0 : 10;
|
||||
// This is the encoded version of BRA that jumps to itself. All Nvidia
|
||||
// shaders end with one.
|
||||
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
|
||||
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
|
||||
std::size_t offset = start_offset;
|
||||
while (offset < program.size()) {
|
||||
const u64 instruction = program[offset];
|
||||
if (!IsSchedInstruction(offset, start_offset)) {
|
||||
if ((instruction & mask) == self_jumping_branch) {
|
||||
// End on Maxwell's "nop" instruction
|
||||
break;
|
||||
}
|
||||
if (instruction == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
++offset;
|
||||
}
|
||||
// The last instruction is included in the program size
|
||||
return std::min(offset + 1, program.size());
|
||||
}
|
||||
|
||||
/// Gets the shader program code from memory for the specified address
|
||||
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
|
||||
const u8* host_ptr, bool is_compute) {
|
||||
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
|
||||
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
|
||||
std::fill(program_code.begin(), program_code.end(), 0);
|
||||
return program_code;
|
||||
});
|
||||
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
|
||||
program_code.size() * sizeof(u64));
|
||||
program_code.resize(CalculateProgramSize(program_code, is_compute));
|
||||
return program_code;
|
||||
}
|
||||
|
||||
constexpr std::size_t GetStageFromProgram(std::size_t program) {
|
||||
return program == 0 ? 0 : program - 1;
|
||||
}
|
||||
|
||||
constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
|
||||
return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
|
||||
}
|
||||
|
||||
ShaderType GetShaderType(Maxwell::ShaderProgram program) {
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
return ShaderType::Vertex;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
return ShaderType::TesselationControl;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
return ShaderType::TesselationEval;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
return ShaderType::Geometry;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
return ShaderType::Fragment;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
|
||||
return ShaderType::Vertex;
|
||||
}
|
||||
}
|
||||
|
||||
u32 FillDescriptorLayout(const ShaderEntries& entries,
|
||||
std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
Maxwell::ShaderProgram program_type, u32 base_binding) {
|
||||
const ShaderType stage = GetStageFromProgram(program_type);
|
||||
const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
|
||||
|
||||
u32 binding = base_binding;
|
||||
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
|
||||
for (std::size_t i = 0; i < num_entries; ++i) {
|
||||
bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
|
||||
}
|
||||
};
|
||||
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
|
||||
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
|
||||
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
|
||||
return binding;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
|
||||
ProgramCode program_code, u32 main_offset)
|
||||
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
|
||||
program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
|
||||
shader_ir{this->program_code, main_offset, compiler_settings, locker},
|
||||
entries{GenerateShaderEntries(shader_ir)} {}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
|
||||
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
|
||||
Core::System& system, Tegra::Engines::ShaderType stage) {
|
||||
if (stage == Tegra::Engines::ShaderType::Compute) {
|
||||
return system.GPU().KeplerCompute();
|
||||
} else {
|
||||
return system.GPU().Maxwell3D();
|
||||
}
|
||||
}
|
||||
|
||||
VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
||||
const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue)
|
||||
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
|
||||
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
|
||||
renderpass_cache(device) {}
|
||||
|
||||
VKPipelineCache::~VKPipelineCache() = default;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
|
||||
if (!dirty) {
|
||||
return last_shaders;
|
||||
}
|
||||
dirty = false;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = gpu.regs.shader_config[index];
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
||||
// Skip stages that are not enabled
|
||||
if (!gpu.regs.IsShaderConfigEnabled(index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||
auto shader = TryGet(host_ptr);
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
constexpr u32 stage_offset = 10;
|
||||
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
|
||||
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
||||
|
||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
||||
host_ptr, std::move(code), stage_offset);
|
||||
Register(shader);
|
||||
}
|
||||
shaders[index] = std::move(shader);
|
||||
}
|
||||
return last_shaders = shaders;
|
||||
}
|
||||
|
||||
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
if (last_graphics_pipeline && last_graphics_key == key) {
|
||||
return *last_graphics_pipeline;
|
||||
}
|
||||
last_graphics_key = key;
|
||||
|
||||
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
|
||||
auto& entry = pair->second;
|
||||
if (is_cache_miss) {
|
||||
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
|
||||
const auto [program, bindings] = DecompileShaders(key);
|
||||
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
|
||||
update_descriptor_queue, renderpass_cache, key,
|
||||
bindings, program);
|
||||
}
|
||||
return *(last_graphics_pipeline = entry.get());
|
||||
}
|
||||
|
||||
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
|
||||
auto& entry = pair->second;
|
||||
if (!is_cache_miss) {
|
||||
return *entry;
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
|
||||
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto program_addr = key.shader;
|
||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||
|
||||
auto shader = TryGet(host_ptr);
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
||||
constexpr u32 kernel_main_offset = 0;
|
||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
||||
program_addr, *cpu_addr, host_ptr, std::move(code),
|
||||
kernel_main_offset);
|
||||
Register(shader);
|
||||
}
|
||||
|
||||
Specialization specialization;
|
||||
specialization.workgroup_size = key.workgroup_size;
|
||||
specialization.shared_memory_size = key.shared_memory_size;
|
||||
|
||||
const SPIRVShader spirv_shader{
|
||||
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
|
||||
shader->GetEntries()};
|
||||
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
|
||||
update_descriptor_queue, spirv_shader);
|
||||
return *entry;
|
||||
}
|
||||
|
||||
void VKPipelineCache::Unregister(const Shader& shader) {
|
||||
bool finished = false;
|
||||
const auto Finish = [&] {
|
||||
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
|
||||
// flush.
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
finished = true;
|
||||
scheduler.Finish();
|
||||
};
|
||||
|
||||
const GPUVAddr invalidated_addr = shader->GetGpuAddr();
|
||||
for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
|
||||
auto& entry = it->first;
|
||||
if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
|
||||
entry.shaders.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
Finish();
|
||||
it = graphics_cache.erase(it);
|
||||
}
|
||||
for (auto it = compute_cache.begin(); it != compute_cache.end();) {
|
||||
auto& entry = it->first;
|
||||
if (entry.shader != invalidated_addr) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
Finish();
|
||||
it = compute_cache.erase(it);
|
||||
}
|
||||
|
||||
RasterizerCache::Unregister(shader);
|
||||
}
|
||||
|
||||
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
|
||||
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
||||
const auto& fixed_state = key.fixed_state;
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
Specialization specialization;
|
||||
specialization.primitive_topology = fixed_state.input_assembly.topology;
|
||||
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
|
||||
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
|
||||
specialization.point_size = fixed_state.input_assembly.point_size;
|
||||
}
|
||||
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
|
||||
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
|
||||
}
|
||||
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
||||
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
|
||||
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
|
||||
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
|
||||
for (const auto& rt : key.renderpass_params.color_attachments) {
|
||||
specialization.enabled_rendertargets.set(rt.index);
|
||||
}
|
||||
|
||||
SPIRVProgram program;
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
|
||||
|
||||
// Skip stages that are not enabled
|
||||
if (!gpu.regs.IsShaderConfigEnabled(index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
const auto shader = TryGet(host_ptr);
|
||||
ASSERT(shader);
|
||||
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||
const auto program_type = GetShaderType(program_enum);
|
||||
const auto& entries = shader->GetEntries();
|
||||
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
|
||||
entries};
|
||||
|
||||
if (program_enum == Maxwell::ShaderProgram::VertexA) {
|
||||
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
||||
++index;
|
||||
}
|
||||
|
||||
const u32 old_binding = specialization.base_binding;
|
||||
specialization.base_binding =
|
||||
FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
|
||||
ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
|
||||
}
|
||||
return {std::move(program), std::move(bindings)};
|
||||
}
|
||||
|
||||
void FillDescriptorUpdateTemplateEntries(
|
||||
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
|
||||
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
|
||||
static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
|
||||
const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
|
||||
const u32 count = static_cast<u32>(count_);
|
||||
if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
|
||||
device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
|
||||
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to
|
||||
// crash.
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
|
||||
offset + i * entry_size, entry_size);
|
||||
}
|
||||
} else if (count != 0) {
|
||||
template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
|
||||
}
|
||||
offset += count * entry_size;
|
||||
binding += count;
|
||||
};
|
||||
|
||||
AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
|
||||
AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
|
||||
AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
|
||||
AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
|
||||
AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
200
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
200
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
@@ -0,0 +1,200 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
#include "video_core/shader/const_buffer_locker.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RasterizerVulkan;
|
||||
class VKComputePipeline;
|
||||
class VKDescriptorPool;
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class CachedShader;
|
||||
using Shader = std::shared_ptr<CachedShader>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
using ProgramCode = std::vector<u64>;
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
FixedPipelineState fixed_state;
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
||||
RenderPassParams renderpass_params;
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
std::size_t hash = fixed_state.Hash();
|
||||
for (const auto& shader : shaders) {
|
||||
boost::hash_combine(hash, shader);
|
||||
}
|
||||
boost::hash_combine(hash, renderpass_params.Hash());
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return std::tie(fixed_state, shaders, renderpass_params) ==
|
||||
std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
|
||||
}
|
||||
};
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
GPUVAddr shader{};
|
||||
u32 shared_memory_size{};
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
return static_cast<std::size_t>(shader) ^
|
||||
((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
|
||||
static_cast<std::size_t>(workgroup_size[0]) ^
|
||||
(static_cast<std::size_t>(workgroup_size[1]) << 16) ^
|
||||
(static_cast<std::size_t>(workgroup_size[2]) << 24);
|
||||
}
|
||||
|
||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return std::tie(shader, shared_memory_size, workgroup_size) ==
|
||||
std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::GraphicsPipelineCacheKey> {
|
||||
std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::ComputePipelineCacheKey> {
|
||||
std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
|
||||
~CachedShader();
|
||||
|
||||
GPUVAddr GetGpuAddr() const {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
VAddr GetCpuAddr() const override {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return program_code.size() * sizeof(u64);
|
||||
}
|
||||
|
||||
VideoCommon::Shader::ShaderIR& GetIR() {
|
||||
return shader_ir;
|
||||
}
|
||||
|
||||
const VideoCommon::Shader::ShaderIR& GetIR() const {
|
||||
return shader_ir;
|
||||
}
|
||||
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
private:
|
||||
static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
|
||||
Tegra::Engines::ShaderType stage);
|
||||
|
||||
GPUVAddr gpu_addr{};
|
||||
VAddr cpu_addr{};
|
||||
ProgramCode program_code;
|
||||
VideoCommon::Shader::ConstBufferLocker locker;
|
||||
VideoCommon::Shader::ShaderIR shader_ir;
|
||||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
class VKPipelineCache final : public RasterizerCache<Shader> {
|
||||
public:
|
||||
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
||||
const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||
~VKPipelineCache();
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
|
||||
|
||||
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
|
||||
|
||||
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||
|
||||
protected:
|
||||
void Unregister(const Shader& shader) override;
|
||||
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
|
||||
private:
|
||||
std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
|
||||
const GraphicsPipelineCacheKey& key);
|
||||
|
||||
Core::System& system;
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
VKDescriptorPool& descriptor_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
|
||||
VKRenderPassCache renderpass_cache;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||
|
||||
GraphicsPipelineCacheKey last_graphics_key;
|
||||
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
|
||||
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
|
||||
graphics_cache;
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
|
||||
};
|
||||
|
||||
void FillDescriptorUpdateTemplateEntries(
|
||||
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
|
||||
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
|
||||
|
||||
} // namespace Vulkan
|
||||
13
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
13
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
@@ -0,0 +1,13 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class RasterizerVulkan : public VideoCore::RasterizerInterface {};
|
||||
|
||||
} // namespace Vulkan
|
||||
100
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
100
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
|
||||
|
||||
VKRenderPassCache::~VKRenderPassCache() = default;
|
||||
|
||||
vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
|
||||
const auto [pair, is_cache_miss] = cache.try_emplace(params);
|
||||
auto& entry = pair->second;
|
||||
if (is_cache_miss) {
|
||||
entry = CreateRenderPass(params);
|
||||
}
|
||||
return *entry;
|
||||
}
|
||||
|
||||
UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
|
||||
std::vector<vk::AttachmentDescription> descriptors;
|
||||
std::vector<vk::AttachmentReference> color_references;
|
||||
|
||||
for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
|
||||
const auto attachment = params.color_attachments[rt];
|
||||
const auto format =
|
||||
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
|
||||
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
|
||||
static_cast<u32>(attachment.pixel_format));
|
||||
|
||||
// TODO(Rodrigo): Add eMayAlias when it's needed.
|
||||
const auto color_layout = attachment.is_texception
|
||||
? vk::ImageLayout::eGeneral
|
||||
: vk::ImageLayout::eColorAttachmentOptimal;
|
||||
descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
|
||||
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
|
||||
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
|
||||
vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
|
||||
color_references.emplace_back(static_cast<u32>(rt), color_layout);
|
||||
}
|
||||
|
||||
vk::AttachmentReference zeta_attachment_ref;
|
||||
if (params.has_zeta) {
|
||||
const auto format =
|
||||
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
|
||||
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
|
||||
static_cast<u32>(params.zeta_pixel_format));
|
||||
|
||||
const auto zeta_layout = params.zeta_texception
|
||||
? vk::ImageLayout::eGeneral
|
||||
: vk::ImageLayout::eDepthStencilAttachmentOptimal;
|
||||
descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
|
||||
vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
|
||||
vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
|
||||
vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
|
||||
zeta_attachment_ref =
|
||||
vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
|
||||
}
|
||||
|
||||
const vk::SubpassDescription subpass_description(
|
||||
{}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
|
||||
color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
|
||||
nullptr);
|
||||
|
||||
vk::AccessFlags access;
|
||||
vk::PipelineStageFlags stage;
|
||||
if (!color_references.empty()) {
|
||||
access |=
|
||||
vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
|
||||
stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
}
|
||||
|
||||
if (params.has_zeta) {
|
||||
access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite;
|
||||
stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
}
|
||||
|
||||
const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
|
||||
{});
|
||||
|
||||
const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
|
||||
descriptors.data(), 1, &subpass_description, 1,
|
||||
&subpass_dependency);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
return dev.createRenderPassUnique(create_info, nullptr, dld);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
97
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
97
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
|
||||
// TODO(Rodrigo): Optimize this structure for faster hashing
|
||||
|
||||
struct RenderPassParams {
|
||||
struct ColorAttachment {
|
||||
u32 index = 0;
|
||||
VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
|
||||
bool is_texception = false;
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
return static_cast<std::size_t>(pixel_format) |
|
||||
static_cast<std::size_t>(is_texception) << 6 |
|
||||
static_cast<std::size_t>(index) << 7;
|
||||
}
|
||||
|
||||
bool operator==(const ColorAttachment& rhs) const noexcept {
|
||||
return std::tie(index, pixel_format, is_texception) ==
|
||||
std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
|
||||
}
|
||||
};
|
||||
|
||||
boost::container::static_vector<ColorAttachment,
|
||||
Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
|
||||
color_attachments{};
|
||||
// TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
|
||||
VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
|
||||
bool has_zeta = false;
|
||||
bool zeta_texception = false;
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
std::size_t hash = 0;
|
||||
for (const auto& rt : color_attachments) {
|
||||
boost::hash_combine(hash, rt.Hash());
|
||||
}
|
||||
boost::hash_combine(hash, zeta_pixel_format);
|
||||
boost::hash_combine(hash, has_zeta);
|
||||
boost::hash_combine(hash, zeta_texception);
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool operator==(const RenderPassParams& rhs) const {
|
||||
return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
|
||||
std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
|
||||
rhs.zeta_texception);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassParams> {
|
||||
std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKRenderPassCache final {
|
||||
public:
|
||||
explicit VKRenderPassCache(const VKDevice& device);
|
||||
~VKRenderPassCache();
|
||||
|
||||
vk::RenderPass GetRenderPass(const RenderPassParams& params);
|
||||
|
||||
private:
|
||||
UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
|
||||
|
||||
const VKDevice& device;
|
||||
std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -954,6 +954,10 @@ private:
|
||||
|
||||
Expression Visit(const Node& node) {
|
||||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
if (const auto amend_index = operation->GetAmendIndex()) {
|
||||
[[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
|
||||
ASSERT(type == Type::Void);
|
||||
}
|
||||
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
|
||||
const auto decompiler = operation_decompilers[operation_index];
|
||||
if (decompiler == nullptr) {
|
||||
@@ -1142,6 +1146,10 @@ private:
|
||||
}
|
||||
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
if (const auto amend_index = conditional->GetAmendIndex()) {
|
||||
[[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
|
||||
ASSERT(type == Type::Void);
|
||||
}
|
||||
// It's invalid to call conditional on nested nodes, use an operation instead
|
||||
const Id true_label = OpLabel();
|
||||
const Id skip_label = OpLabel();
|
||||
|
||||
127
src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
Normal file
127
src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
Normal file
@@ -0,0 +1,127 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
|
||||
u64 last_epoch)
|
||||
: buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
|
||||
|
||||
VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
|
||||
buffer = std::move(rhs.buffer);
|
||||
watch = std::move(rhs.watch);
|
||||
last_epoch = rhs.last_epoch;
|
||||
}
|
||||
|
||||
VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
|
||||
|
||||
VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
|
||||
StagingBuffer&& rhs) noexcept {
|
||||
buffer = std::move(rhs.buffer);
|
||||
watch = std::move(rhs.watch);
|
||||
last_epoch = rhs.last_epoch;
|
||||
return *this;
|
||||
}
|
||||
|
||||
VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler)
|
||||
: device{device}, memory_manager{memory_manager}, scheduler{scheduler},
|
||||
is_device_integrated{device.IsIntegrated()} {}
|
||||
|
||||
VKStagingBufferPool::~VKStagingBufferPool() = default;
|
||||
|
||||
VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
|
||||
if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
|
||||
return *buffer;
|
||||
}
|
||||
return CreateStagingBuffer(size, host_visible);
|
||||
}
|
||||
|
||||
void VKStagingBufferPool::TickFrame() {
|
||||
++epoch;
|
||||
current_delete_level = (current_delete_level + 1) % NumLevels;
|
||||
|
||||
ReleaseCache(true);
|
||||
if (!is_device_integrated) {
|
||||
ReleaseCache(false);
|
||||
}
|
||||
}
|
||||
|
||||
VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
|
||||
for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
|
||||
if (entry.watch.TryWatch(scheduler.GetFence())) {
|
||||
entry.last_epoch = epoch;
|
||||
return &*entry.buffer;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
|
||||
const auto usage =
|
||||
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
|
||||
vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
|
||||
const u32 log2 = Common::Log2Ceil64(size);
|
||||
const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
|
||||
nullptr);
|
||||
const auto dev = device.GetLogical();
|
||||
auto buffer = std::make_unique<VKBuffer>();
|
||||
buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader());
|
||||
buffer->commit = memory_manager.Commit(*buffer->handle, host_visible);
|
||||
|
||||
auto& entries = GetCache(host_visible)[log2].entries;
|
||||
return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
|
||||
}
|
||||
|
||||
VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
|
||||
return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
|
||||
}
|
||||
|
||||
void VKStagingBufferPool::ReleaseCache(bool host_visible) {
|
||||
auto& cache = GetCache(host_visible);
|
||||
const u64 size = ReleaseLevel(cache, current_delete_level);
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
|
||||
static constexpr u64 epochs_to_destroy = 180;
|
||||
static constexpr std::size_t deletions_per_tick = 16;
|
||||
|
||||
auto& staging = cache[log2];
|
||||
auto& entries = staging.entries;
|
||||
const std::size_t old_size = entries.size();
|
||||
|
||||
const auto is_deleteable = [this](const auto& entry) {
|
||||
return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
|
||||
};
|
||||
const std::size_t begin_offset = staging.delete_index;
|
||||
const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
|
||||
const auto begin = std::begin(entries) + begin_offset;
|
||||
const auto end = std::begin(entries) + end_offset;
|
||||
entries.erase(std::remove_if(begin, end, is_deleteable), end);
|
||||
|
||||
const std::size_t new_size = entries.size();
|
||||
staging.delete_index += deletions_per_tick;
|
||||
if (staging.delete_index >= new_size) {
|
||||
staging.delete_index = 0;
|
||||
}
|
||||
|
||||
return (1ULL << log2) * (old_size - new_size);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
83
src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
Normal file
83
src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
Normal file
@@ -0,0 +1,83 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFenceWatch;
|
||||
class VKScheduler;
|
||||
|
||||
struct VKBuffer final {
|
||||
UniqueBuffer handle;
|
||||
VKMemoryCommit commit;
|
||||
};
|
||||
|
||||
class VKStagingBufferPool final {
|
||||
public:
|
||||
explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler);
|
||||
~VKStagingBufferPool();
|
||||
|
||||
VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
struct StagingBuffer final {
|
||||
explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
|
||||
StagingBuffer(StagingBuffer&& rhs) noexcept;
|
||||
StagingBuffer(const StagingBuffer&) = delete;
|
||||
~StagingBuffer();
|
||||
|
||||
StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
|
||||
|
||||
std::unique_ptr<VKBuffer> buffer;
|
||||
VKFenceWatch watch;
|
||||
u64 last_epoch = 0;
|
||||
};
|
||||
|
||||
struct StagingBuffers final {
|
||||
std::vector<StagingBuffer> entries;
|
||||
std::size_t delete_index = 0;
|
||||
};
|
||||
|
||||
static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
|
||||
using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
|
||||
|
||||
VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
|
||||
|
||||
VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
|
||||
|
||||
StagingBuffersCache& GetCache(bool host_visible);
|
||||
|
||||
void ReleaseCache(bool host_visible);
|
||||
|
||||
u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
|
||||
|
||||
const VKDevice& device;
|
||||
VKMemoryManager& memory_manager;
|
||||
VKScheduler& scheduler;
|
||||
const bool is_device_integrated;
|
||||
|
||||
StagingBuffersCache host_staging_buffers;
|
||||
StagingBuffersCache device_staging_buffers;
|
||||
|
||||
u64 epoch = 0;
|
||||
|
||||
std::size_t current_delete_level = 0;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -3,86 +3,144 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
|
||||
: device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
|
||||
pipeline_stage} {
|
||||
CreateBuffers(memory_manager, usage);
|
||||
ReserveWatches(WATCHES_INITIAL_RESERVE);
|
||||
constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
|
||||
std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
|
||||
vk::MemoryPropertyFlags wanted) {
|
||||
const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; i++) {
|
||||
if (!(filter & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
|
||||
vk::BufferUsageFlags usage)
|
||||
: device{device}, scheduler{scheduler} {
|
||||
CreateBuffers(usage);
|
||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer() = default;
|
||||
|
||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
|
||||
ASSERT(size <= buffer_size);
|
||||
std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
|
||||
ASSERT(size <= STREAM_BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (offset + size > buffer_size) {
|
||||
// The buffer would overflow, save the amount of used buffers, signal an invalidation and
|
||||
// reset the state.
|
||||
invalidation_mark = used_watches;
|
||||
used_watches = 0;
|
||||
if (alignment > 0) {
|
||||
offset = Common::AlignUp(offset, alignment);
|
||||
}
|
||||
|
||||
WaitPendingOperations(offset);
|
||||
|
||||
bool invalidated = false;
|
||||
if (offset + size > STREAM_BUFFER_SIZE) {
|
||||
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||
invalidation_mark = current_watch_cursor;
|
||||
current_watch_cursor = 0;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
|
||||
}
|
||||
// Swap watches and reset waiting cursors.
|
||||
std::swap(previous_watches, current_watches);
|
||||
wait_cursor = 0;
|
||||
wait_bound = 0;
|
||||
|
||||
void VKStreamBuffer::Send(u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
if (invalidation_mark) {
|
||||
// TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
|
||||
// Ensure that we don't wait for uncommitted fences.
|
||||
scheduler.Flush();
|
||||
std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
|
||||
[&](auto& resource) { resource->Wait(); });
|
||||
invalidation_mark = std::nullopt;
|
||||
|
||||
invalidated = true;
|
||||
}
|
||||
|
||||
if (used_watches + 1 >= watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
// Add a watch for this allocation.
|
||||
watches[used_watches++]->Watch(scheduler.GetFence());
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
|
||||
const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
|
||||
nullptr);
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
commit = memory_manager.Commit(*buffer, true);
|
||||
mapped_pointer = commit->GetData();
|
||||
const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
|
||||
return {pointer, offset, invalidated};
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
|
||||
const std::size_t previous_size = watches.size();
|
||||
watches.resize(previous_size + grow_size);
|
||||
std::generate(watches.begin() + previous_size, watches.end(),
|
||||
[]() { return std::make_unique<VKFenceWatch>(); });
|
||||
void VKStreamBuffer::Unmap(u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
|
||||
|
||||
const auto dev = device.GetLogical();
|
||||
dev.unmapMemory(*memory, device.GetDispatchLoader());
|
||||
|
||||
offset += size;
|
||||
|
||||
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
auto& watch = current_watches[current_watch_cursor++];
|
||||
watch.upper_bound = offset;
|
||||
watch.fence.Watch(scheduler.GetFence());
|
||||
}
|
||||
|
||||
void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
|
||||
const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
|
||||
0, nullptr);
|
||||
const auto dev = device.GetLogical();
|
||||
const auto& dld = device.GetDispatchLoader();
|
||||
buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
|
||||
|
||||
const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
|
||||
// Prefer device local host visible allocations (this should hit AMD's pinned memory).
|
||||
auto type = FindMemoryType(device, requirements.memoryTypeBits,
|
||||
vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent |
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||
if (!type) {
|
||||
// Otherwise search for a host visible allocation.
|
||||
type = FindMemoryType(device, requirements.memoryTypeBits,
|
||||
vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||
ASSERT_MSG(type, "No host visible and coherent memory type found");
|
||||
}
|
||||
const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
|
||||
memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
|
||||
|
||||
dev.bindBufferMemory(*buffer, *memory, 0, dld);
|
||||
}
|
||||
|
||||
void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||
watches.resize(watches.size() + grow_size);
|
||||
}
|
||||
|
||||
void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||
if (!invalidation_mark) {
|
||||
return;
|
||||
}
|
||||
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
watch.fence.Wait();
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -4,28 +4,24 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKFence;
|
||||
class VKFenceWatch;
|
||||
class VKResourceManager;
|
||||
class VKScheduler;
|
||||
|
||||
class VKStreamBuffer {
|
||||
class VKStreamBuffer final {
|
||||
public:
|
||||
explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
|
||||
vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
|
||||
explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
|
||||
vk::BufferUsageFlags usage);
|
||||
~VKStreamBuffer();
|
||||
|
||||
/**
|
||||
@@ -34,39 +30,47 @@ public:
|
||||
* @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
|
||||
* offset and a boolean that's true when buffer has been invalidated.
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Reserve(u64 size);
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Send(u64 size);
|
||||
void Unmap(u64 size);
|
||||
|
||||
vk::Buffer GetBuffer() const {
|
||||
vk::Buffer GetHandle() const {
|
||||
return *buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Watch final {
|
||||
VKFenceWatch fence;
|
||||
u64 upper_bound{};
|
||||
};
|
||||
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
|
||||
void CreateBuffers(vk::BufferUsageFlags usage);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::size_t grow_size);
|
||||
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||
|
||||
void WaitPendingOperations(u64 requested_upper_bound);
|
||||
|
||||
const VKDevice& device; ///< Vulkan device manager.
|
||||
VKScheduler& scheduler; ///< Command scheduler.
|
||||
const u64 buffer_size; ///< Total size of the stream buffer.
|
||||
const vk::AccessFlags access; ///< Access usage of this stream buffer.
|
||||
const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
|
||||
|
||||
UniqueBuffer buffer; ///< Mapped buffer.
|
||||
VKMemoryCommit commit; ///< Memory commit.
|
||||
u8* mapped_pointer{}; ///< Pointer to the host visible commit
|
||||
UniqueBuffer buffer; ///< Mapped buffer.
|
||||
UniqueDeviceMemory memory; ///< Memory allocation.
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
|
||||
std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t>
|
||||
invalidation_mark{}; ///< Number of watches used in the current invalidation.
|
||||
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||
|
||||
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
57
src/video_core/renderer_vulkan/vk_update_descriptor.cpp
Normal file
57
src/video_core/renderer_vulkan/vk_update_descriptor.cpp
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
|
||||
: device{device}, scheduler{scheduler} {}
|
||||
|
||||
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
|
||||
|
||||
void VKUpdateDescriptorQueue::TickFrame() {
|
||||
payload.clear();
|
||||
}
|
||||
|
||||
void VKUpdateDescriptorQueue::Acquire() {
|
||||
entries.clear();
|
||||
}
|
||||
|
||||
void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
|
||||
vk::DescriptorSet set) {
|
||||
if (payload.size() + entries.size() >= payload.max_size()) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
scheduler.WaitWorker();
|
||||
payload.clear();
|
||||
}
|
||||
|
||||
const auto payload_start = payload.data() + payload.size();
|
||||
for (const auto& entry : entries) {
|
||||
if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
|
||||
payload.push_back(*image);
|
||||
} else if (const auto buffer = std::get_if<Buffer>(&entry)) {
|
||||
payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
|
||||
} else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
|
||||
payload.push_back(*texel);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
scheduler.Record([dev = device.GetLogical(), payload_start, set,
|
||||
update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
|
||||
dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
86
src/video_core/renderer_vulkan/vk_update_descriptor.h
Normal file
86
src/video_core/renderer_vulkan/vk_update_descriptor.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/declarations.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKScheduler;
|
||||
|
||||
class DescriptorUpdateEntry {
|
||||
public:
|
||||
explicit DescriptorUpdateEntry() : image{} {}
|
||||
|
||||
DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
|
||||
|
||||
DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
|
||||
: buffer{buffer, offset, size} {}
|
||||
|
||||
DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
|
||||
|
||||
private:
|
||||
union {
|
||||
vk::DescriptorImageInfo image;
|
||||
vk::DescriptorBufferInfo buffer;
|
||||
vk::BufferView texel_buffer;
|
||||
};
|
||||
};
|
||||
|
||||
class VKUpdateDescriptorQueue final {
|
||||
public:
|
||||
explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
|
||||
~VKUpdateDescriptorQueue();
|
||||
|
||||
void TickFrame();
|
||||
|
||||
void Acquire();
|
||||
|
||||
void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
|
||||
|
||||
void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
|
||||
entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
|
||||
}
|
||||
|
||||
void AddImage(vk::ImageView image_view) {
|
||||
entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
|
||||
}
|
||||
|
||||
void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
|
||||
entries.push_back(Buffer{buffer, offset, size});
|
||||
}
|
||||
|
||||
void AddTexelBuffer(vk::BufferView texel_buffer) {
|
||||
entries.emplace_back(texel_buffer);
|
||||
}
|
||||
|
||||
vk::ImageLayout* GetLastImageLayout() {
|
||||
return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Buffer {
|
||||
const vk::Buffer* buffer{};
|
||||
u64 offset{};
|
||||
std::size_t size{};
|
||||
};
|
||||
using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
|
||||
// Old gcc versions don't consider this trivially copyable.
|
||||
// static_assert(std::is_trivially_copyable_v<Variant>);
|
||||
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
|
||||
boost::container::static_vector<Variant, 0x400> entries;
|
||||
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
@@ -22,34 +23,39 @@ using Tegra::Shader::Register;
|
||||
|
||||
namespace {
|
||||
|
||||
u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
|
||||
return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
|
||||
uniform_type == Tegra::Shader::UniformType::UnsignedShort;
|
||||
}
|
||||
|
||||
u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
return 1;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
return 2;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
return 4;
|
||||
return 0b11;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 0b10;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
||||
return 1;
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
return 8;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 16;
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
return 1;
|
||||
return 32;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
return 2;
|
||||
return 64;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
return 4;
|
||||
return 128;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
||||
return 1;
|
||||
return 32;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}();
|
||||
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, false);
|
||||
TrackGlobalMemory(bb, instr, true, false);
|
||||
|
||||
const u32 count = GetLdgMemorySize(type);
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, load zeroes.
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
@@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
if (type == Tegra::Shader::UniformType::UnsignedByte) {
|
||||
// To handle unaligned loads get the byte used to dereferenced global memory
|
||||
// and extract that byte from the loaded uint32.
|
||||
Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
|
||||
byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
|
||||
// To handle unaligned loads get the bytes used to dereference global memory and extract
|
||||
// those bytes from the loaded u32.
|
||||
if (IsUnaligned(type)) {
|
||||
Node mask = Immediate(GetUnalignedMask(type));
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||
|
||||
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
|
||||
Immediate(8));
|
||||
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
|
||||
std::move(offset), Immediate(size));
|
||||
}
|
||||
|
||||
SetTemporary(bb, i, gmem);
|
||||
@@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
}();
|
||||
|
||||
// For unaligned reads we have to read memory too.
|
||||
const bool is_read = IsUnaligned(type);
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true);
|
||||
TrackGlobalMemory(bb, instr, is_read, true);
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, skip the store.
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 count = GetStgMemorySize(type);
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
const Node value = GetRegister(instr.gpr0.Value() + i);
|
||||
Node value = GetRegister(instr.gpr0.Value() + i);
|
||||
|
||||
if (IsUnaligned(type)) {
|
||||
Node mask = Immediate(GetUnalignedMask(type));
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||
|
||||
value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
|
||||
Immediate(size));
|
||||
}
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||
}
|
||||
break;
|
||||
@@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
|
||||
Instruction instr,
|
||||
bool is_write) {
|
||||
bool is_read, bool is_write) {
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
@@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
|
||||
const GlobalMemoryBase descriptor{index, offset};
|
||||
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
||||
auto& usage = entry->second;
|
||||
if (is_write) {
|
||||
usage.is_written = true;
|
||||
} else {
|
||||
usage.is_read = true;
|
||||
}
|
||||
usage.is_written |= is_write;
|
||||
usage.is_read |= is_read;
|
||||
|
||||
const auto real_address =
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
||||
|
||||
@@ -13,37 +13,65 @@ namespace VideoCommon::Shader {
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
namespace {
|
||||
constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
|
||||
}
|
||||
|
||||
u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
|
||||
const Instruction instr = {program_code[pc]};
|
||||
const auto opcode = OpCode::Decode(instr);
|
||||
|
||||
UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
|
||||
UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);
|
||||
|
||||
const Node apply_mask = [&]() {
|
||||
const Node apply_mask = [&] {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::R2P_IMM:
|
||||
return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
|
||||
case OpCode::Id::P2R_IMM:
|
||||
return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
const Node mask = GetRegister(instr.gpr8);
|
||||
const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
|
||||
|
||||
constexpr u32 programmable_preds = 7;
|
||||
for (u64 pred = 0; pred < programmable_preds; ++pred) {
|
||||
const auto shift = static_cast<u32>(pred);
|
||||
const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
|
||||
|
||||
const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::R2P_IMM: {
|
||||
const Node mask = GetRegister(instr.gpr8);
|
||||
|
||||
const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
|
||||
const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
|
||||
for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
|
||||
const auto shift = static_cast<u32>(pred);
|
||||
|
||||
const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
|
||||
bb.push_back(Conditional(condition, {code}));
|
||||
const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
|
||||
|
||||
const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
|
||||
const Node value =
|
||||
Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
|
||||
|
||||
const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
|
||||
bb.push_back(Conditional(condition, {code}));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::P2R_IMM: {
|
||||
Node value = Immediate(0);
|
||||
for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
|
||||
Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
|
||||
Immediate(0));
|
||||
value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
|
||||
}
|
||||
value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
|
||||
value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
|
||||
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
|
||||
break;
|
||||
}
|
||||
|
||||
return pc;
|
||||
|
||||
@@ -89,59 +89,62 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
[[fallthrough]];
|
||||
}
|
||||
case OpCode::Id::TLD4: {
|
||||
ASSERT(instr.tld4.array == 0);
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||
"NDV is not implemented");
|
||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||
"PTP is not implemented");
|
||||
|
||||
const auto texture_type = instr.tld4.texture_type.Value();
|
||||
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||
const bool is_array = instr.tld4.array != 0;
|
||||
const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
WriteTexInstructionFloat(
|
||||
bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
|
||||
const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
|
||||
: instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
|
||||
WriteTexInstructionFloat(bb, instr,
|
||||
GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
|
||||
is_ptp, is_bindless));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::TLD4S: {
|
||||
const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented");
|
||||
|
||||
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
constexpr std::size_t num_coords = 2;
|
||||
const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||
const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
|
||||
const Node op_a = GetRegister(instr.gpr8);
|
||||
const Node op_b = GetRegister(instr.gpr20);
|
||||
|
||||
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
|
||||
std::vector<Node> coords;
|
||||
Node dc_reg;
|
||||
if (depth_compare) {
|
||||
std::vector<Node> aoffi;
|
||||
Node depth_compare;
|
||||
if (is_depth_compare) {
|
||||
// Note: TLD4S coordinate encoding works just like TEXS's
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_a);
|
||||
coords.push_back(op_y);
|
||||
dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(op_b, num_coords, true);
|
||||
depth_compare = GetRegister(instr.gpr20.Value() + 1);
|
||||
} else {
|
||||
depth_compare = op_b;
|
||||
}
|
||||
} else {
|
||||
// There's no depth compare
|
||||
coords.push_back(op_a);
|
||||
if (uses_aoffi) {
|
||||
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
|
||||
coords.push_back(op_y);
|
||||
if (is_aoffi) {
|
||||
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
|
||||
aoffi = GetAoffiCoordinates(op_b, num_coords, true);
|
||||
} else {
|
||||
coords.push_back(op_b);
|
||||
}
|
||||
dc_reg = {};
|
||||
}
|
||||
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
|
||||
|
||||
const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
|
||||
const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
|
||||
const Sampler& sampler = *GetSampler(instr.sampler, info);
|
||||
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element};
|
||||
MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
@@ -190,7 +193,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
}
|
||||
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element};
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
|
||||
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
|
||||
}
|
||||
|
||||
@@ -230,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
if (!instr.txq.IsComponentEnabled(element)) {
|
||||
continue;
|
||||
}
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value =
|
||||
Operation(OperationCode::TextureQueryDimensions, meta,
|
||||
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
|
||||
@@ -299,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||
continue;
|
||||
}
|
||||
auto params = coords;
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
|
||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||
SetTemporary(bb, indexer++, value);
|
||||
}
|
||||
@@ -367,7 +370,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
|
||||
it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
|
||||
return &(*it);
|
||||
return &*it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
@@ -397,7 +400,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
|
||||
it->IsShadow() == info.is_shadow);
|
||||
return &(*it);
|
||||
return &*it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
@@ -538,7 +541,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto copy_coords = coords;
|
||||
MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
|
||||
MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
|
||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||
}
|
||||
|
||||
@@ -635,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
}
|
||||
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array, bool is_aoffi, bool is_bindless) {
|
||||
bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
|
||||
ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
|
||||
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
@@ -661,12 +666,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
return values;
|
||||
}
|
||||
|
||||
std::vector<Node> aoffi;
|
||||
std::vector<Node> aoffi, ptp;
|
||||
if (is_aoffi) {
|
||||
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
|
||||
} else if (is_ptp) {
|
||||
ptp = GetPtpCoordinates(
|
||||
{GetRegister(parameter_register++), GetRegister(parameter_register++)});
|
||||
}
|
||||
|
||||
Node dc{};
|
||||
Node dc;
|
||||
if (depth_compare) {
|
||||
dc = GetRegister(parameter_register++);
|
||||
}
|
||||
@@ -676,8 +684,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
|
||||
element};
|
||||
MetaTexture meta{
|
||||
*sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
|
||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
@@ -710,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
|
||||
@@ -760,7 +768,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||
Node4 values;
|
||||
for (u32 element = 0; element < values.size(); ++element) {
|
||||
auto coords_copy = coords;
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
|
||||
MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
|
||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||
}
|
||||
return values;
|
||||
@@ -825,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
|
||||
return aoffi;
|
||||
}
|
||||
|
||||
std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
|
||||
static constexpr u32 num_entries = 8;
|
||||
|
||||
std::vector<Node> ptp;
|
||||
ptp.reserve(num_entries);
|
||||
|
||||
const auto global_size = static_cast<s64>(global_code.size());
|
||||
const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
|
||||
const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
|
||||
if (!low || !high) {
|
||||
for (u32 entry = 0; entry < num_entries; ++entry) {
|
||||
const u32 reg = entry / 4;
|
||||
const u32 offset = entry % 4;
|
||||
const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
|
||||
const Node condition =
|
||||
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
|
||||
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
|
||||
ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||
}
|
||||
return ptp;
|
||||
}
|
||||
|
||||
const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
|
||||
for (u32 entry = 0; entry < num_entries; ++entry) {
|
||||
s32 value = (immediate >> (entry * 8)) & 0b111111;
|
||||
if (value >= 32) {
|
||||
value -= 64;
|
||||
}
|
||||
ptp.push_back(Immediate(value));
|
||||
}
|
||||
|
||||
return ptp;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -374,6 +374,7 @@ struct MetaTexture {
|
||||
Node array;
|
||||
Node depth_compare;
|
||||
std::vector<Node> aoffi;
|
||||
std::vector<Node> ptp;
|
||||
std::vector<Node> derivates;
|
||||
Node bias;
|
||||
Node lod;
|
||||
@@ -391,8 +392,30 @@ struct MetaImage {
|
||||
using Meta =
|
||||
std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
|
||||
|
||||
class AmendNode {
|
||||
public:
|
||||
std::optional<std::size_t> GetAmendIndex() const {
|
||||
if (amend_index == amend_null_index) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return {amend_index};
|
||||
}
|
||||
|
||||
void SetAmendIndex(std::size_t index) {
|
||||
amend_index = index;
|
||||
}
|
||||
|
||||
void ClearAmend() {
|
||||
amend_index = amend_null_index;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
|
||||
std::size_t amend_index{amend_null_index};
|
||||
};
|
||||
|
||||
/// Holds any kind of operation that can be done in the IR
|
||||
class OperationNode final {
|
||||
class OperationNode final : public AmendNode {
|
||||
public:
|
||||
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
|
||||
|
||||
@@ -432,7 +455,7 @@ private:
|
||||
};
|
||||
|
||||
/// Encloses inside any kind of node that returns a boolean conditionally-executed code
|
||||
class ConditionalNode final {
|
||||
class ConditionalNode final : public AmendNode {
|
||||
public:
|
||||
explicit ConditionalNode(Node condition, std::vector<Node>&& code)
|
||||
: condition{std::move(condition)}, code{std::move(code)} {}
|
||||
|
||||
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
|
||||
Immediate(bits));
|
||||
}
|
||||
|
||||
std::size_t ShaderIR::DeclareAmend(Node new_amend) {
|
||||
const std::size_t id = amend_code.size();
|
||||
amend_code.push_back(new_amend);
|
||||
return id;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -176,6 +176,10 @@ public:
|
||||
/// Returns a condition code evaluated from internal flags
|
||||
Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
|
||||
|
||||
const Node& GetAmendNode(std::size_t index) const {
|
||||
return amend_code[index];
|
||||
}
|
||||
|
||||
private:
|
||||
friend class ASTDecoder;
|
||||
|
||||
@@ -350,7 +354,8 @@ private:
|
||||
bool is_array);
|
||||
|
||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
|
||||
bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
|
||||
bool is_bindless);
|
||||
|
||||
Node4 GetTldCode(Tegra::Shader::Instruction instr);
|
||||
|
||||
@@ -363,6 +368,8 @@ private:
|
||||
|
||||
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
|
||||
|
||||
std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
|
||||
|
||||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
||||
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
|
||||
@@ -387,7 +394,10 @@ private:
|
||||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
|
||||
Tegra::Shader::Instruction instr,
|
||||
bool is_write);
|
||||
bool is_read, bool is_write);
|
||||
|
||||
/// Register new amending code and obtain the reference id.
|
||||
std::size_t DeclareAmend(Node new_amend);
|
||||
|
||||
const ProgramCode& program_code;
|
||||
const u32 main_offset;
|
||||
@@ -403,6 +413,7 @@ private:
|
||||
std::map<u32, NodeBlock> basic_blocks;
|
||||
NodeBlock global_code;
|
||||
ASTManager program_manager{true, true};
|
||||
std::vector<Node> amend_code;
|
||||
|
||||
std::set<u32> used_registers;
|
||||
std::set<Tegra::Shader::Pred> used_predicates;
|
||||
|
||||
@@ -78,11 +78,6 @@ add_executable(yuzu
|
||||
configuration/configure_web.cpp
|
||||
configuration/configure_web.h
|
||||
configuration/configure_web.ui
|
||||
debugger/graphics/graphics_breakpoint_observer.cpp
|
||||
debugger/graphics/graphics_breakpoint_observer.h
|
||||
debugger/graphics/graphics_breakpoints.cpp
|
||||
debugger/graphics/graphics_breakpoints.h
|
||||
debugger/graphics/graphics_breakpoints_p.h
|
||||
debugger/console.cpp
|
||||
debugger/console.h
|
||||
debugger/profiler.cpp
|
||||
|
||||
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
|
||||
}
|
||||
|
||||
void GRenderWindow::SwapBuffers() {
|
||||
// In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
|
||||
// since we never call `doneCurrent` in this thread.
|
||||
// However:
|
||||
// - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
|
||||
// since the last time `swapBuffers` was executed;
|
||||
// - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
|
||||
context->makeCurrent(child);
|
||||
|
||||
context->swapBuffers(child);
|
||||
|
||||
if (!first_frame) {
|
||||
emit FirstFrameDisplayed();
|
||||
first_frame = true;
|
||||
emit FirstFrameDisplayed();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <QMetaType>
|
||||
#include "yuzu/debugger/graphics/graphics_breakpoint_observer.h"
|
||||
|
||||
BreakPointObserverDock::BreakPointObserverDock(std::shared_ptr<Tegra::DebugContext> debug_context,
|
||||
const QString& title, QWidget* parent)
|
||||
: QDockWidget(title, parent), BreakPointObserver(debug_context) {
|
||||
qRegisterMetaType<Tegra::DebugContext::Event>("Tegra::DebugContext::Event");
|
||||
|
||||
connect(this, &BreakPointObserverDock::Resumed, this, &BreakPointObserverDock::OnResumed);
|
||||
|
||||
// NOTE: This signal is emitted from a non-GUI thread, but connect() takes
|
||||
// care of delaying its handling to the GUI thread.
|
||||
connect(this, &BreakPointObserverDock::BreakPointHit, this,
|
||||
&BreakPointObserverDock::OnBreakPointHit, Qt::BlockingQueuedConnection);
|
||||
}
|
||||
|
||||
void BreakPointObserverDock::OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) {
|
||||
emit BreakPointHit(event, data);
|
||||
}
|
||||
|
||||
void BreakPointObserverDock::OnMaxwellResume() {
|
||||
emit Resumed();
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <QDockWidget>
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
|
||||
/**
|
||||
* Utility class which forwards calls to OnMaxwellBreakPointHit and OnMaxwellResume to public slots.
|
||||
* This is because the Maxwell breakpoint callbacks are called from a non-GUI thread, while
|
||||
* the widget usually wants to perform reactions in the GUI thread.
|
||||
*/
|
||||
class BreakPointObserverDock : public QDockWidget,
|
||||
protected Tegra::DebugContext::BreakPointObserver {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
BreakPointObserverDock(std::shared_ptr<Tegra::DebugContext> debug_context, const QString& title,
|
||||
QWidget* parent = nullptr);
|
||||
|
||||
void OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
|
||||
void OnMaxwellResume() override;
|
||||
|
||||
signals:
|
||||
void Resumed();
|
||||
void BreakPointHit(Tegra::DebugContext::Event event, void* data);
|
||||
|
||||
private:
|
||||
virtual void OnBreakPointHit(Tegra::DebugContext::Event event, void* data) = 0;
|
||||
virtual void OnResumed() = 0;
|
||||
};
|
||||
@@ -1,221 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <QLabel>
|
||||
#include <QMetaType>
|
||||
#include <QPushButton>
|
||||
#include <QTreeView>
|
||||
#include <QVBoxLayout>
|
||||
#include "common/assert.h"
|
||||
#include "yuzu/debugger/graphics/graphics_breakpoints.h"
|
||||
#include "yuzu/debugger/graphics/graphics_breakpoints_p.h"
|
||||
|
||||
BreakPointModel::BreakPointModel(std::shared_ptr<Tegra::DebugContext> debug_context,
|
||||
QObject* parent)
|
||||
: QAbstractListModel(parent), context_weak(debug_context),
|
||||
at_breakpoint(debug_context->at_breakpoint),
|
||||
active_breakpoint(debug_context->active_breakpoint) {}
|
||||
|
||||
int BreakPointModel::columnCount(const QModelIndex& parent) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int BreakPointModel::rowCount(const QModelIndex& parent) const {
|
||||
return static_cast<int>(Tegra::DebugContext::Event::NumEvents);
|
||||
}
|
||||
|
||||
QVariant BreakPointModel::data(const QModelIndex& index, int role) const {
|
||||
const auto event = static_cast<Tegra::DebugContext::Event>(index.row());
|
||||
|
||||
switch (role) {
|
||||
case Qt::DisplayRole: {
|
||||
if (index.column() == 0) {
|
||||
return DebugContextEventToString(event);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Qt::CheckStateRole: {
|
||||
if (index.column() == 0)
|
||||
return data(index, Role_IsEnabled).toBool() ? Qt::Checked : Qt::Unchecked;
|
||||
break;
|
||||
}
|
||||
|
||||
case Qt::BackgroundRole: {
|
||||
if (at_breakpoint && index.row() == static_cast<int>(active_breakpoint)) {
|
||||
return QBrush(QColor(0xE0, 0xE0, 0x10));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Role_IsEnabled: {
|
||||
auto context = context_weak.lock();
|
||||
return context && context->breakpoints[(int)event].enabled;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return QVariant();
|
||||
}
|
||||
|
||||
Qt::ItemFlags BreakPointModel::flags(const QModelIndex& index) const {
|
||||
if (!index.isValid())
|
||||
return 0;
|
||||
|
||||
Qt::ItemFlags flags = Qt::ItemIsEnabled;
|
||||
if (index.column() == 0)
|
||||
flags |= Qt::ItemIsUserCheckable;
|
||||
return flags;
|
||||
}
|
||||
|
||||
bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, int role) {
|
||||
const auto event = static_cast<Tegra::DebugContext::Event>(index.row());
|
||||
|
||||
switch (role) {
|
||||
case Qt::CheckStateRole: {
|
||||
if (index.column() != 0)
|
||||
return false;
|
||||
|
||||
auto context = context_weak.lock();
|
||||
if (!context)
|
||||
return false;
|
||||
|
||||
context->breakpoints[(int)event].enabled = value == Qt::Checked;
|
||||
QModelIndex changed_index = createIndex(index.row(), 0);
|
||||
emit dataChanged(changed_index, changed_index);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BreakPointModel::OnBreakPointHit(Tegra::DebugContext::Event event) {
|
||||
auto context = context_weak.lock();
|
||||
if (!context)
|
||||
return;
|
||||
|
||||
active_breakpoint = context->active_breakpoint;
|
||||
at_breakpoint = context->at_breakpoint;
|
||||
emit dataChanged(createIndex(static_cast<int>(event), 0),
|
||||
createIndex(static_cast<int>(event), 0));
|
||||
}
|
||||
|
||||
void BreakPointModel::OnResumed() {
|
||||
auto context = context_weak.lock();
|
||||
if (!context)
|
||||
return;
|
||||
|
||||
at_breakpoint = context->at_breakpoint;
|
||||
emit dataChanged(createIndex(static_cast<int>(active_breakpoint), 0),
|
||||
createIndex(static_cast<int>(active_breakpoint), 0));
|
||||
active_breakpoint = context->active_breakpoint;
|
||||
}
|
||||
|
||||
QString BreakPointModel::DebugContextEventToString(Tegra::DebugContext::Event event) {
|
||||
switch (event) {
|
||||
case Tegra::DebugContext::Event::MaxwellCommandLoaded:
|
||||
return tr("Maxwell command loaded");
|
||||
case Tegra::DebugContext::Event::MaxwellCommandProcessed:
|
||||
return tr("Maxwell command processed");
|
||||
case Tegra::DebugContext::Event::IncomingPrimitiveBatch:
|
||||
return tr("Incoming primitive batch");
|
||||
case Tegra::DebugContext::Event::FinishedPrimitiveBatch:
|
||||
return tr("Finished primitive batch");
|
||||
case Tegra::DebugContext::Event::NumEvents:
|
||||
break;
|
||||
}
|
||||
|
||||
return tr("Unknown debug context event");
|
||||
}
|
||||
|
||||
GraphicsBreakPointsWidget::GraphicsBreakPointsWidget(
|
||||
std::shared_ptr<Tegra::DebugContext> debug_context, QWidget* parent)
|
||||
: QDockWidget(tr("Maxwell Breakpoints"), parent), Tegra::DebugContext::BreakPointObserver(
|
||||
debug_context) {
|
||||
setObjectName(QStringLiteral("TegraBreakPointsWidget"));
|
||||
|
||||
status_text = new QLabel(tr("Emulation running"));
|
||||
resume_button = new QPushButton(tr("Resume"));
|
||||
resume_button->setEnabled(false);
|
||||
|
||||
breakpoint_model = new BreakPointModel(debug_context, this);
|
||||
breakpoint_list = new QTreeView;
|
||||
breakpoint_list->setRootIsDecorated(false);
|
||||
breakpoint_list->setHeaderHidden(true);
|
||||
breakpoint_list->setModel(breakpoint_model);
|
||||
|
||||
qRegisterMetaType<Tegra::DebugContext::Event>("Tegra::DebugContext::Event");
|
||||
|
||||
connect(breakpoint_list, &QTreeView::doubleClicked, this,
|
||||
&GraphicsBreakPointsWidget::OnItemDoubleClicked);
|
||||
|
||||
connect(resume_button, &QPushButton::clicked, this,
|
||||
&GraphicsBreakPointsWidget::OnResumeRequested);
|
||||
|
||||
connect(this, &GraphicsBreakPointsWidget::BreakPointHit, this,
|
||||
&GraphicsBreakPointsWidget::OnBreakPointHit, Qt::BlockingQueuedConnection);
|
||||
connect(this, &GraphicsBreakPointsWidget::Resumed, this, &GraphicsBreakPointsWidget::OnResumed);
|
||||
|
||||
connect(this, &GraphicsBreakPointsWidget::BreakPointHit, breakpoint_model,
|
||||
&BreakPointModel::OnBreakPointHit, Qt::BlockingQueuedConnection);
|
||||
connect(this, &GraphicsBreakPointsWidget::Resumed, breakpoint_model,
|
||||
&BreakPointModel::OnResumed);
|
||||
|
||||
connect(this, &GraphicsBreakPointsWidget::BreakPointsChanged,
|
||||
[this](const QModelIndex& top_left, const QModelIndex& bottom_right) {
|
||||
breakpoint_model->dataChanged(top_left, bottom_right);
|
||||
});
|
||||
|
||||
QWidget* main_widget = new QWidget;
|
||||
auto main_layout = new QVBoxLayout;
|
||||
{
|
||||
auto sub_layout = new QHBoxLayout;
|
||||
sub_layout->addWidget(status_text);
|
||||
sub_layout->addWidget(resume_button);
|
||||
main_layout->addLayout(sub_layout);
|
||||
}
|
||||
main_layout->addWidget(breakpoint_list);
|
||||
main_widget->setLayout(main_layout);
|
||||
|
||||
setWidget(main_widget);
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnMaxwellBreakPointHit(Event event, void* data) {
|
||||
// Process in GUI thread
|
||||
emit BreakPointHit(event, data);
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnBreakPointHit(Tegra::DebugContext::Event event, void* data) {
|
||||
status_text->setText(tr("Emulation halted at breakpoint"));
|
||||
resume_button->setEnabled(true);
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnMaxwellResume() {
|
||||
// Process in GUI thread
|
||||
emit Resumed();
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnResumed() {
|
||||
status_text->setText(tr("Emulation running"));
|
||||
resume_button->setEnabled(false);
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnResumeRequested() {
|
||||
if (auto context = context_weak.lock())
|
||||
context->Resume();
|
||||
}
|
||||
|
||||
void GraphicsBreakPointsWidget::OnItemDoubleClicked(const QModelIndex& index) {
|
||||
if (!index.isValid())
|
||||
return;
|
||||
|
||||
QModelIndex check_index = breakpoint_list->model()->index(index.row(), 0);
|
||||
QVariant enabled = breakpoint_list->model()->data(check_index, Qt::CheckStateRole);
|
||||
QVariant new_state = Qt::Unchecked;
|
||||
if (enabled == Qt::Unchecked)
|
||||
new_state = Qt::Checked;
|
||||
breakpoint_list->model()->setData(check_index, new_state, Qt::CheckStateRole);
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <QDockWidget>
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
|
||||
class QLabel;
|
||||
class QPushButton;
|
||||
class QTreeView;
|
||||
|
||||
class BreakPointModel;
|
||||
|
||||
class GraphicsBreakPointsWidget : public QDockWidget, Tegra::DebugContext::BreakPointObserver {
|
||||
Q_OBJECT
|
||||
|
||||
using Event = Tegra::DebugContext::Event;
|
||||
|
||||
public:
|
||||
explicit GraphicsBreakPointsWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
|
||||
QWidget* parent = nullptr);
|
||||
|
||||
void OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
|
||||
void OnMaxwellResume() override;
|
||||
|
||||
signals:
|
||||
void Resumed();
|
||||
void BreakPointHit(Tegra::DebugContext::Event event, void* data);
|
||||
void BreakPointsChanged(const QModelIndex& topLeft, const QModelIndex& bottomRight);
|
||||
|
||||
private:
|
||||
void OnBreakPointHit(Tegra::DebugContext::Event event, void* data);
|
||||
void OnItemDoubleClicked(const QModelIndex&);
|
||||
void OnResumeRequested();
|
||||
void OnResumed();
|
||||
|
||||
QLabel* status_text;
|
||||
QPushButton* resume_button;
|
||||
|
||||
BreakPointModel* breakpoint_model;
|
||||
QTreeView* breakpoint_list;
|
||||
};
|
||||
@@ -1,37 +0,0 @@
|
||||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <QAbstractListModel>
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
|
||||
class BreakPointModel : public QAbstractListModel {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
enum {
|
||||
Role_IsEnabled = Qt::UserRole,
|
||||
};
|
||||
|
||||
BreakPointModel(std::shared_ptr<Tegra::DebugContext> context, QObject* parent);
|
||||
|
||||
int columnCount(const QModelIndex& parent = QModelIndex()) const override;
|
||||
int rowCount(const QModelIndex& parent = QModelIndex()) const override;
|
||||
QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
|
||||
Qt::ItemFlags flags(const QModelIndex& index) const override;
|
||||
|
||||
bool setData(const QModelIndex& index, const QVariant& value, int role = Qt::EditRole) override;
|
||||
|
||||
void OnBreakPointHit(Tegra::DebugContext::Event event);
|
||||
void OnResumed();
|
||||
|
||||
private:
|
||||
static QString DebugContextEventToString(Tegra::DebugContext::Event event);
|
||||
|
||||
std::weak_ptr<Tegra::DebugContext> context_weak;
|
||||
bool at_breakpoint;
|
||||
Tegra::DebugContext::Event active_breakpoint;
|
||||
};
|
||||
@@ -93,7 +93,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
|
||||
#include "core/perf_stats.h"
|
||||
#include "core/settings.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/debug_utils/debug_utils.h"
|
||||
#include "yuzu/about_dialog.h"
|
||||
#include "yuzu/bootmanager.h"
|
||||
#include "yuzu/compatdb.h"
|
||||
@@ -101,7 +100,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
|
||||
#include "yuzu/configuration/config.h"
|
||||
#include "yuzu/configuration/configure_dialog.h"
|
||||
#include "yuzu/debugger/console.h"
|
||||
#include "yuzu/debugger/graphics/graphics_breakpoints.h"
|
||||
#include "yuzu/debugger/profiler.h"
|
||||
#include "yuzu/debugger/wait_tree.h"
|
||||
#include "yuzu/discord.h"
|
||||
@@ -187,8 +185,6 @@ GMainWindow::GMainWindow()
|
||||
provider(std::make_unique<FileSys::ManualContentProvider>()) {
|
||||
InitializeLogging();
|
||||
|
||||
debug_context = Tegra::DebugContext::Construct();
|
||||
|
||||
setAcceptDrops(true);
|
||||
ui.setupUi(this);
|
||||
statusBar()->hide();
|
||||
@@ -495,11 +491,6 @@ void GMainWindow::InitializeDebugWidgets() {
|
||||
debug_menu->addAction(microProfileDialog->toggleViewAction());
|
||||
#endif
|
||||
|
||||
graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this);
|
||||
addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
|
||||
graphicsBreakpointsWidget->hide();
|
||||
debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
|
||||
|
||||
waitTreeWidget = new WaitTreeWidget(this);
|
||||
addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget);
|
||||
waitTreeWidget->hide();
|
||||
@@ -869,8 +860,6 @@ bool GMainWindow::LoadROM(const QString& filename) {
|
||||
Core::System& system{Core::System::GetInstance()};
|
||||
system.SetFilesystem(vfs);
|
||||
|
||||
system.SetGPUDebugContext(debug_context);
|
||||
|
||||
system.SetAppletFrontendSet({
|
||||
nullptr, // Parental Controls
|
||||
std::make_unique<QtErrorDisplay>(*this), //
|
||||
|
||||
@@ -22,7 +22,6 @@ class Config;
|
||||
class EmuThread;
|
||||
class GameList;
|
||||
class GImageInfo;
|
||||
class GraphicsBreakPointsWidget;
|
||||
class GRenderWindow;
|
||||
class LoadingScreen;
|
||||
class MicroProfileDialog;
|
||||
@@ -42,10 +41,6 @@ class ManualContentProvider;
|
||||
class VfsFilesystem;
|
||||
} // namespace FileSys
|
||||
|
||||
namespace Tegra {
|
||||
class DebugContext;
|
||||
}
|
||||
|
||||
enum class EmulatedDirectoryTarget {
|
||||
NAND,
|
||||
SDMC,
|
||||
@@ -223,8 +218,6 @@ private:
|
||||
|
||||
Ui::MainWindow ui;
|
||||
|
||||
std::shared_ptr<Tegra::DebugContext> debug_context;
|
||||
|
||||
GRenderWindow* render_window;
|
||||
GameList* game_list;
|
||||
LoadingScreen* loading_screen;
|
||||
@@ -255,7 +248,6 @@ private:
|
||||
// Debugger panes
|
||||
ProfilerWidget* profilerWidget;
|
||||
MicroProfileDialog* microProfileDialog;
|
||||
GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
|
||||
WaitTreeWidget* waitTreeWidget;
|
||||
|
||||
QAction* actions_recent_files[max_recent_files_item];
|
||||
|
||||
Reference in New Issue
Block a user