vulkan: Move unswizzle to GPU
This commit is contained in:
@@ -2,11 +2,19 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/device_memory.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {}
|
||||
DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {
|
||||
auto ptr = reinterpret_cast<std::size_t>(buffer.BackingBasePointer());
|
||||
if (ptr & 0xfff || !ptr) {
|
||||
LOG_CRITICAL(HW_Memory, "Unaligned DeviceMemory");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
DeviceMemory::~DeviceMemory() = default;
|
||||
|
||||
} // namespace Core
|
||||
|
||||
@@ -12,7 +12,9 @@ namespace Core {
|
||||
namespace DramMemoryMap {
|
||||
enum : u64 {
|
||||
Base = 0x80000000ULL,
|
||||
Size = 0x100000000ULL,
|
||||
GiB = 0x40000000ULL,
|
||||
GiBs = 4,
|
||||
Size = GiB * GiBs,
|
||||
End = Base + Size,
|
||||
KernelReserveBase = Base + 0x60000,
|
||||
SlabHeapBase = KernelReserveBase + 0x85000,
|
||||
|
||||
@@ -65,7 +65,11 @@ struct Memory::Impl {
|
||||
return {};
|
||||
}
|
||||
|
||||
return system.DeviceMemory().GetPointer(paddr) + vaddr;
|
||||
u8* test = system.DeviceMemory().GetPointer(paddr);
|
||||
// LOG_CRITICAL(Debug, "{:016X} {:016X} {:016X} va {:016X} {:016X}",
|
||||
// (size_t)system.DeviceMemory().buffer.BackingBasePointer(),
|
||||
// (size_t)test, paddr, vaddr, (size_t)(test + vaddr));
|
||||
return test + vaddr;
|
||||
}
|
||||
|
||||
u8 Read8(const VAddr addr) {
|
||||
@@ -240,6 +244,42 @@ struct Memory::Impl {
|
||||
ReadBlockImpl<true>(*system.CurrentProcess(), src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void ReadBlockPointersUnsafe(const Kernel::KProcess& process, const VAddr src_addr,
|
||||
ReadPointers& result, const std::size_t size) {
|
||||
const auto end = &result.data[0];
|
||||
const auto base_ptr = system.DeviceMemory().buffer.BackingBasePointer();
|
||||
auto& tail = result.tail;
|
||||
WalkBlock(
|
||||
process, src_addr, size,
|
||||
[src_addr, size, &tail](const std::size_t copy_amount, const VAddr current_vaddr) {
|
||||
LOG_ERROR(
|
||||
HW_Memory,
|
||||
"Unmapped ReadBlockPointers @ 0x{:016X} (start address = 0x{:016X}, size = {})",
|
||||
current_vaddr, src_addr, size);
|
||||
tail->backing_offset = DramMemoryMap::Size;
|
||||
},
|
||||
[&tail, base_ptr](const std::size_t copy_amount, const u8* const src_ptr) {
|
||||
tail->backing_offset = src_ptr - base_ptr;
|
||||
},
|
||||
[&tail, base_ptr](const VAddr current_vaddr, const std::size_t copy_amount,
|
||||
const u8* const host_ptr) {
|
||||
tail->backing_offset = host_ptr - base_ptr;
|
||||
},
|
||||
[&tail, end](const std::size_t copy_amount) {
|
||||
tail->copy_amount = static_cast<u32>(copy_amount);
|
||||
if (tail == end) {
|
||||
LOG_CRITICAL(Debug, "Trying to read too much???");
|
||||
abort();
|
||||
}
|
||||
--tail;
|
||||
});
|
||||
}
|
||||
|
||||
void ReadBlockPointersUnsafe(const VAddr src_addr, ReadPointers& result,
|
||||
const std::size_t size) {
|
||||
ReadBlockPointersUnsafe(*system.CurrentProcess(), src_addr, result, size);
|
||||
}
|
||||
|
||||
template <bool UNSAFE>
|
||||
void WriteBlockImpl(const Kernel::KProcess& process, const VAddr dest_addr,
|
||||
const void* src_buffer, const std::size_t size) {
|
||||
@@ -668,6 +708,11 @@ void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std:
|
||||
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
|
||||
}
|
||||
|
||||
void Memory::ReadBlockPointersUnsafe(const VAddr src_addr, ReadPointers& result,
|
||||
const std::size_t size) {
|
||||
impl->ReadBlockPointersUnsafe(src_addr, result, size);
|
||||
}
|
||||
|
||||
void Memory::WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
|
||||
std::size_t size) {
|
||||
impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
|
||||
@@ -691,4 +736,8 @@ void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
|
||||
impl->RasterizerMarkRegionCached(vaddr, size, cached);
|
||||
}
|
||||
|
||||
Core::DeviceMemory& Memory::GetDeviceMemory() {
|
||||
return system.DeviceMemory();
|
||||
}
|
||||
|
||||
} // namespace Core::Memory
|
||||
|
||||
@@ -14,8 +14,9 @@ struct PageTable;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
class DeviceMemory;
|
||||
class System;
|
||||
}
|
||||
} // namespace Core
|
||||
|
||||
namespace Kernel {
|
||||
class PhysicalMemory;
|
||||
@@ -41,6 +42,17 @@ enum : VAddr {
|
||||
DEFAULT_STACK_SIZE = 0x100000,
|
||||
};
|
||||
|
||||
constexpr u32 MAX_READ_POINTERS = 100000;
|
||||
|
||||
struct ReadPointers {
|
||||
struct ReadPointer {
|
||||
u32 copy_amount;
|
||||
u64 backing_offset;
|
||||
};
|
||||
std::array<ReadPointer, MAX_READ_POINTERS> data;
|
||||
ReadPointer* tail;
|
||||
};
|
||||
|
||||
/// Central class that handles all memory operations and state.
|
||||
class Memory {
|
||||
public:
|
||||
@@ -348,6 +360,8 @@ public:
|
||||
*/
|
||||
void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
|
||||
|
||||
void ReadBlockPointersUnsafe(VAddr src_addr, ReadPointers& result, std::size_t size);
|
||||
|
||||
/**
|
||||
* Writes a range of bytes into a given process' address space at the specified
|
||||
* virtual address.
|
||||
@@ -435,6 +449,8 @@ public:
|
||||
*/
|
||||
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
|
||||
|
||||
Core::DeviceMemory& GetDeviceMemory();
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
|
||||
|
||||
@@ -132,6 +132,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/vk_fence_manager.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_host_memory.cpp
|
||||
renderer_vulkan/vk_host_memory.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
|
||||
@@ -16,6 +16,7 @@ set(SHADER_FILES
|
||||
vulkan_present.vert
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_uint8.comp
|
||||
vulkan_unswizzle.comp
|
||||
)
|
||||
|
||||
find_program(GLSLANGVALIDATOR "glslangValidator")
|
||||
|
||||
100
src/video_core/host_shaders/vulkan_unswizzle.comp
Normal file
100
src/video_core/host_shaders/vulkan_unswizzle.comp
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
|
||||
layout (local_size_x = 64) in;
|
||||
|
||||
//layout (constant_id = 0) const int BYTES_PER_PIXEL = 1;
|
||||
|
||||
layout(binding = 0, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = 0, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
layout(binding = 0, std430) readonly buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = 0, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = 0, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = 1) writeonly uniform image2DArray output_image;
|
||||
|
||||
layout (push_constant) uniform constants {
|
||||
uint size;
|
||||
uint ptr;
|
||||
uint so_far;
|
||||
uint bytes_per_pixel;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint depth;
|
||||
uint block_height;
|
||||
uint block_depth;
|
||||
uint gobs_in_x;
|
||||
uint dcl2;
|
||||
};
|
||||
|
||||
const uint GiB = 0x40000000U;
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
if (offset >= GiB) {
|
||||
return uvec4(0);
|
||||
}
|
||||
switch (bytes_per_pixel) {
|
||||
case 1:
|
||||
// return uvec4(0xFF, 0, 0, 0);
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 2:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
case 4:
|
||||
// return uvec4(0xFF, 0xFF, 0, 0xFF);
|
||||
uint data4 = u32data[offset / 4];
|
||||
// return uvec4(data4 & 0xffu, (data4 >> 8) & 0xffu, (data4 >> 16) & 0xffu, (data4 >> 24) & 0xffu);
|
||||
return uvec4((data4 >> 24) & 0xffu, (data4 >> 16) & 0xffu, (data4 >> 8) & 0xffu, data4 & 0xffu);
|
||||
// return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 8:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 16:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
if (gl_GlobalInvocationID.x >= size) {
|
||||
return;
|
||||
}
|
||||
const uint swizzled_offset = gl_GlobalInvocationID.x + so_far;
|
||||
// const uint swizzled_offset = 0;
|
||||
const uint lesser_x_shift = block_height + block_depth;
|
||||
const uint lesser_slice_size = dcl2 * gobs_in_x;
|
||||
const uint block_height_mask = (1U << block_height) - 1;
|
||||
const uint block_depth_mask = (1U << block_depth) - 1;
|
||||
const uint entry = swizzled_offset & 511U;
|
||||
const uint y_table = ((entry >> 5) & 6U) | ((entry >> 4) & 1U);
|
||||
const uint x_entry = ((entry >> 3) & 32U) | ((entry >> 1) & 16U) | (entry & 15U);
|
||||
const uint base_swizzled_offset = swizzled_offset >> 9;
|
||||
const uint set_y = (base_swizzled_offset & block_height_mask) << 3;
|
||||
const uint set_z = (base_swizzled_offset >> block_height) & block_depth_mask;
|
||||
const uint inner_swizzled = base_swizzled_offset >> lesser_x_shift;
|
||||
const uint sli = inner_swizzled / lesser_slice_size;
|
||||
const uint gb = inner_swizzled % lesser_slice_size;
|
||||
const uint x_inner = (gb % gobs_in_x) << 6;
|
||||
const uint y_inner = (gb / gobs_in_x) << (block_height + 3);
|
||||
const uint z_inner = sli << block_depth;
|
||||
const uint x = x_inner + x_entry;
|
||||
const uint y = y_inner + set_y + y_table;
|
||||
const uint z = z_inner + set_z;
|
||||
if (x >= pitch || y >= height || z >= depth) {
|
||||
return;
|
||||
}
|
||||
if (z != 0) {
|
||||
return; // TODO
|
||||
}
|
||||
const uvec4 texel = ReadTexel(ptr + swizzled_offset);
|
||||
// const uvec4 texel = ReadTexel(0);
|
||||
// imageStore(output_image, ivec3(x, y, z), texel);
|
||||
imageStore(output_image, ivec3(x, y, z), vec4(texel)/255);
|
||||
// imageStore(output_image, ivec3(x, y, z), uvec4(1, 1, 1, 1) * ((ptr >> 12) & 0xFF));
|
||||
// imageStore(output_image, ivec3(x * 4, y * 4, z), texel);
|
||||
// imageStore(output_image, ivec3(x * 4, y * 4, z), uvec4(255, 0, 255, 255));
|
||||
// imageStore(output_image, ivec3(x, y, z), uvec4(255, 0, 255, 255));
|
||||
}
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "core/hle/kernel/k_page_table.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/memory.h"
|
||||
@@ -312,6 +313,37 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryManager::ReadBlockPointersUnsafe(GPUVAddr gpu_src_addr,
|
||||
Core::Memory::ReadPointers& result,
|
||||
const std::size_t size) const {
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{gpu_src_addr >> page_bits};
|
||||
std::size_t page_offset{gpu_src_addr & page_mask};
|
||||
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
|
||||
|
||||
if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) {
|
||||
const auto src_addr{*page_addr + page_offset};
|
||||
system.Memory().ReadBlockPointersUnsafe(src_addr, result, copy_amount);
|
||||
} else {
|
||||
auto& tail = result.tail;
|
||||
tail->backing_offset = Core::DramMemoryMap::Size;
|
||||
tail->copy_amount = static_cast<u32>(copy_amount);
|
||||
if (tail == &result.data[0]) {
|
||||
LOG_CRITICAL(Debug, "Trying to read too much???");
|
||||
abort();
|
||||
}
|
||||
--tail;
|
||||
}
|
||||
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{gpu_dest_addr >> page_bits};
|
||||
|
||||
@@ -16,6 +16,9 @@ class RasterizerInterface;
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
namespace Memory {
|
||||
struct ReadPointers;
|
||||
}
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
@@ -111,6 +114,8 @@ public:
|
||||
* being flushed.
|
||||
*/
|
||||
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void ReadBlockPointersUnsafe(GPUVAddr gpu_src_addr, Core::Memory::ReadPointers& result,
|
||||
std::size_t size) const;
|
||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||
|
||||
/**
|
||||
|
||||
@@ -671,8 +671,11 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
|
||||
|
||||
Image::~Image() = default;
|
||||
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
void Image::UploadMemory(const ImageBufferMap& map, Tegra::MemoryManager& gpu_memory,
|
||||
std::array<u8, VideoCommon::MAX_GUEST_SIZE>& scratch) {
|
||||
const std::span<u8> mapped_span = map.mapped_span;
|
||||
const auto copies =
|
||||
VideoCommon::UnswizzleImage(gpu_memory, gpu_addr, info, scratch, mapped_span);
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
|
||||
|
||||
|
||||
@@ -151,8 +151,8 @@ public:
|
||||
Image(Image&&) = default;
|
||||
Image& operator=(Image&&) = default;
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
void UploadMemory(const ImageBufferMap& map, Tegra::MemoryManager& gpu_memory,
|
||||
std::array<u8, VideoCommon::MAX_GUEST_SIZE>& scratch);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)),
|
||||
host_memory(cpu_memory.GetDeviceMemory(), device),
|
||||
memory_allocator(device, false),
|
||||
state_tracker(gpu),
|
||||
scheduler(device, state_tracker),
|
||||
@@ -115,7 +116,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
|
||||
memory_allocator, state_tracker, scheduler) {
|
||||
memory_allocator, state_tracker, scheduler, host_memory) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_host_memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
@@ -70,6 +71,7 @@ private:
|
||||
VKScreenInfo screen_info;
|
||||
|
||||
Device device;
|
||||
VulkanHostMemory host_memory;
|
||||
MemoryAllocator memory_allocator;
|
||||
StateTracker state_tracker;
|
||||
VKScheduler scheduler;
|
||||
|
||||
@@ -155,73 +155,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
||||
|
||||
if (!use_accelerated) {
|
||||
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
|
||||
|
||||
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
|
||||
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
|
||||
const size_t size_bytes = GetSizeInBytes(framebuffer);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {.x = 0, .y = 0, .z = 0},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = framebuffer.width,
|
||||
.height = framebuffer.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
|
||||
const VkImage image = *raw_images[image_index];
|
||||
const VkImageMemoryBarrier base_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = 0,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
VkImageMemoryBarrier read_barrier = base_barrier;
|
||||
read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
||||
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VkImageMemoryBarrier write_barrier = base_barrier;
|
||||
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
read_barrier);
|
||||
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
||||
});
|
||||
// This seems unused
|
||||
}
|
||||
scheduler.Record(
|
||||
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
|
||||
|
||||
@@ -2,18 +2,15 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/astc_decoder_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_unswizzle_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
@@ -22,20 +19,16 @@
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/astc.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Tegra::Texture::SWIZZLE_TABLE;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
|
||||
constexpr size_t ASTC_NUM_BINDINGS = 2;
|
||||
constexpr u32 BUFFER_TO_IMAGE_BINDING_INPUT_BUFFER = 0;
|
||||
constexpr u32 BUFFER_TO_IMAGE_BINDING_OUTPUT_IMAGE = 1;
|
||||
constexpr size_t BUFFER_TO_IMAGE_NUM_BINDINGS = 2;
|
||||
|
||||
template <size_t size>
|
||||
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
||||
@@ -71,24 +64,25 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
||||
.score = 2,
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
constexpr std::array<VkDescriptorSetLayoutBinding, BUFFER_TO_IMAGE_NUM_BINDINGS>
|
||||
BUFFER_TO_IMAGE_DESCRIPTOR_SET_BINDINGS{{
|
||||
{
|
||||
.binding = BUFFER_TO_IMAGE_BINDING_INPUT_BUFFER,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
{
|
||||
.binding = BUFFER_TO_IMAGE_BINDING_OUTPUT_IMAGE,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
},
|
||||
}};
|
||||
|
||||
constexpr DescriptorBankInfo ASTC_BANK_INFO{
|
||||
constexpr DescriptorBankInfo BUFFER_TO_IMAGE_BANK_INFO{
|
||||
.uniform_buffers = 0,
|
||||
.storage_buffers = 1,
|
||||
.texture_buffers = 0,
|
||||
@@ -107,22 +101,22 @@ constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMP
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
};
|
||||
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, BUFFER_TO_IMAGE_NUM_BINDINGS>
|
||||
BUFFER_TO_IMAGE_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
||||
.dstBinding = BUFFER_TO_IMAGE_BINDING_INPUT_BUFFER,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.offset = BUFFER_TO_IMAGE_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
{
|
||||
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
|
||||
.dstBinding = BUFFER_TO_IMAGE_BINDING_OUTPUT_IMAGE,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
|
||||
.offset = BUFFER_TO_IMAGE_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
},
|
||||
}};
|
||||
@@ -308,14 +302,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
||||
|
||||
ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_)
|
||||
: ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
|
||||
ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: ComputePass(device_, descriptor_pool_, BUFFER_TO_IMAGE_DESCRIPTOR_SET_BINDINGS,
|
||||
BUFFER_TO_IMAGE_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, BUFFER_TO_IMAGE_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
|
||||
scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||
|
||||
@@ -415,4 +406,130 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
UnswizzlePass::UnswizzlePass(const Device& device_, VKScheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VulkanHostMemory& vulkan_host_memory_)
|
||||
: ComputePass(device_, descriptor_pool_, BUFFER_TO_IMAGE_DESCRIPTOR_SET_BINDINGS,
|
||||
BUFFER_TO_IMAGE_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, BUFFER_TO_IMAGE_BANK_INFO,
|
||||
COMPUTE_PUSH_CONSTANT_RANGE<sizeof(VideoCommon::UnswizzlePushConstants)>,
|
||||
VULKAN_UNSWIZZLE_COMP_SPV),
|
||||
scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
vulkan_host_memory{vulkan_host_memory_} {}
|
||||
|
||||
UnswizzlePass::~UnswizzlePass() = default;
|
||||
|
||||
namespace {
|
||||
static constexpr VkAccessFlags UNSWIZZLE_WRITE_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags UNSWIZZLE_READ_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
} // namespace
|
||||
|
||||
void UnswizzlePass::Begin(Image& image) {
|
||||
// LOG_CRITICAL(Debug, "ReqOut");
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
// scheduler.Finish();
|
||||
// LOG_CRITICAL(Debug, "BeforeExchange");
|
||||
const bool is_initialized = image.ExchangeInitialization();
|
||||
scheduler.Record([vk_pipeline = *pipeline, vk_image = image.Handle(),
|
||||
aspect_mask = image.AspectMask(), is_initialized](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = is_initialized ? UNSWIZZLE_WRITE_ACCESS_FLAGS : 0,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
|
||||
});
|
||||
// LOG_CRITICAL(Debug, "AfterExchange");
|
||||
last_page = 0;
|
||||
last_level = 0;
|
||||
}
|
||||
|
||||
void UnswizzlePass::Assemble(Image& image, VideoCommon::UnswizzlePushConstants& unswizzle, u64 ptr,
|
||||
u32 size, u32 so_far, s32 level, s32 layer, bool aspect) {
|
||||
u32 new_page = static_cast<u32>(ptr / Core::DramMemoryMap::GiB);
|
||||
unswizzle.ptr = ptr % Core::DramMemoryMap::GiB;
|
||||
unswizzle.size = size;
|
||||
unswizzle.so_far = so_far;
|
||||
if (unswizzle.ptr + unswizzle.size >= Core::DramMemoryMap::GiB) {
|
||||
LOG_CRITICAL(Debug, "swizzle page align");
|
||||
abort();
|
||||
}
|
||||
if (new_page >= Core::DramMemoryMap::GiBs) {
|
||||
new_page = 0;
|
||||
unswizzle.ptr = Core::DramMemoryMap::GiB;
|
||||
}
|
||||
// if (new_page == last_page && level == last_level && layer == last_layer) {
|
||||
// return nullptr;
|
||||
// }
|
||||
// LOG_CRITICAL(Debug, "BeforeAcq");
|
||||
update_descriptor_queue.Acquire();
|
||||
// LOG_CRITICAL(Debug, "AfterAcq");
|
||||
vulkan_host_memory.BindPage(update_descriptor_queue, new_page);
|
||||
// LOG_CRITICAL(Debug, "AfterBind");
|
||||
update_descriptor_queue.AddImage(image.StorageImageView(level, layer, aspect));
|
||||
last_page = new_page;
|
||||
last_level = level;
|
||||
last_layer = layer;
|
||||
// LOG_CRITICAL(Debug, "BeforeUpdate");
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
const u32 num_dispatches_x = Common::DivCeil(unswizzle.size, 64U);
|
||||
// LOG_CRITICAL(Debug, "BeforeRecord");
|
||||
scheduler.Record(
|
||||
[this, num_dispatches_x, unswizzle, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
// if (descriptor_data) {
|
||||
const VkDescriptorSet set = descriptor_allocator.Commit();
|
||||
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
|
||||
// }
|
||||
cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, unswizzle);
|
||||
cmdbuf.Dispatch(num_dispatches_x, 1, 1);
|
||||
});
|
||||
}
|
||||
|
||||
void UnswizzlePass::Finish(Image& image) {
|
||||
scheduler.Record(
|
||||
[vk_image = image.Handle(), aspect_mask = image.AspectMask()](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier image_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = UNSWIZZLE_READ_ACCESS_FLAGS | UNSWIZZLE_WRITE_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = vk_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||
});
|
||||
// scheduler.Finish();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -10,12 +10,14 @@
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_host_memory.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
struct SwizzleParameters;
|
||||
}
|
||||
struct UnswizzlePushConstants;
|
||||
} // namespace VideoCommon
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
@@ -87,9 +89,7 @@ class ASTCDecoderPass final : public ComputePass {
|
||||
public:
|
||||
explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
StagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
MemoryAllocator& memory_allocator_);
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_);
|
||||
~ASTCDecoderPass();
|
||||
|
||||
void Assemble(Image& image, const StagingBufferRef& map,
|
||||
@@ -97,9 +97,31 @@ public:
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
MemoryAllocator& memory_allocator;
|
||||
};
|
||||
|
||||
class UnswizzlePass final : public ComputePass {
|
||||
public:
|
||||
explicit UnswizzlePass(const Device& device_, VKScheduler& scheduler_,
|
||||
DescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VulkanHostMemory& vulkan_host_memory_);
|
||||
~UnswizzlePass();
|
||||
|
||||
void Begin(Image& image);
|
||||
|
||||
void Assemble(Image& image, VideoCommon::UnswizzlePushConstants& swizzle, u64 ptr, u32 size,
|
||||
u32 so_far, s32 level, s32 layer, bool aspect);
|
||||
|
||||
void Finish(Image& image);
|
||||
|
||||
private:
|
||||
VKScheduler& scheduler;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
VulkanHostMemory& vulkan_host_memory;
|
||||
u32 last_page;
|
||||
s32 last_level;
|
||||
s32 last_layer;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -458,6 +458,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
||||
}
|
||||
|
||||
void GraphicsPipeline::ConfigureDraw() {
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
||||
|
||||
@@ -469,7 +470,6 @@ void GraphicsPipeline::ConfigureDraw() {
|
||||
});
|
||||
}
|
||||
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
|
||||
if (bind_pipeline) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
|
||||
100
src/video_core/renderer_vulkan/vk_host_memory.cpp
Normal file
100
src/video_core/renderer_vulkan/vk_host_memory.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Originally from
|
||||
// https://github.com/google/vulkan_test_applications/blob/74e3a9790fb38303cd1646bbc098173fbb9200fa/application_sandbox/external_memory_host/main.cpp
|
||||
// Copyright 2020 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_host_memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
const VkBufferCreateInfo BUFFER_CREATE_INFO = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = Core::DramMemoryMap::GiB,
|
||||
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
VulkanHostMemory::VulkanHostMemory(Core::DeviceMemory& memory, Device& device) {
|
||||
VkImportMemoryHostPointerInfoEXT import_memory_info{
|
||||
.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
|
||||
.pHostPointer = nullptr,
|
||||
};
|
||||
VkMemoryAllocateInfo allocate_info{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &import_memory_info,
|
||||
.allocationSize = Core::DramMemoryMap::GiB,
|
||||
.memoryTypeIndex = 0,
|
||||
};
|
||||
const auto& logical = device.GetLogical();
|
||||
const auto memory_properties = device.GetPhysical().GetMemoryProperties();
|
||||
auto host = memory.buffer.BackingBasePointer();
|
||||
for (auto& page : pages) {
|
||||
page.second = logical.CreateBuffer(BUFFER_CREATE_INFO);
|
||||
auto requirements = logical.GetBufferMemoryRequirements(*page.second, nullptr);
|
||||
if (requirements.size != Core::DramMemoryMap::GiB) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unexpected required size {}", requirements.size);
|
||||
abort();
|
||||
}
|
||||
if (requirements.alignment > 4096) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unexpected required alignment {}", requirements.alignment);
|
||||
abort();
|
||||
}
|
||||
u32 host_pointer_memory_type_bits = logical.GetMemoryHostPointerProperties(host);
|
||||
import_memory_info.pHostPointer = host;
|
||||
u32 memory_type_bits = requirements.memoryTypeBits & host_pointer_memory_type_bits;
|
||||
if (!memory_type_bits) {
|
||||
LOG_CRITICAL(
|
||||
Render_Vulkan,
|
||||
"Buffer memory bits({}) are not compatible with host pointer memory type bits ({})",
|
||||
requirements.memoryTypeBits, host_pointer_memory_type_bits);
|
||||
abort();
|
||||
}
|
||||
allocate_info.memoryTypeIndex =
|
||||
FindMemoryTypeIndex(memory_properties, memory_type_bits, false);
|
||||
page.first = logical.AllocateMemory(allocate_info);
|
||||
page.second.BindMemory(*page.first, 0);
|
||||
host += Core::DramMemoryMap::GiB;
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanHostMemory::BindPage(VKUpdateDescriptorQueue& update_descriptor_queue, u32 page) {
|
||||
if (page >= Core::DramMemoryMap::GiBs) {
|
||||
abort();
|
||||
}
|
||||
// page = 0;
|
||||
std::pair<vk::DeviceMemory, vk::Buffer>& pair = pages[page];
|
||||
[[maybe_unused]] auto mapping = pair.first.Map(0, Core::DramMemoryMap::GiB);
|
||||
// mapping[0] = 0xFF;
|
||||
// mapping[1] = 0xFF;
|
||||
// mapping[2] = 0xFF;
|
||||
// mapping[3] = 0xFF;
|
||||
pair.first.Unmap();
|
||||
update_descriptor_queue.AddBuffer(*pair.second, 0, Core::DramMemoryMap::GiB);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
23
src/video_core/renderer_vulkan/vk_host_memory.h
Normal file
23
src/video_core/renderer_vulkan/vk_host_memory.h
Normal file
@@ -0,0 +1,23 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "core/device_memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VulkanHostMemory {
|
||||
public:
|
||||
explicit VulkanHostMemory(Core::DeviceMemory& memory, Device& device);
|
||||
|
||||
void BindPage(VKUpdateDescriptorQueue& update_descriptor_queue, u32 page);
|
||||
|
||||
private:
|
||||
std::array<std::pair<vk::DeviceMemory, vk::Buffer>, Core::DramMemoryMap::GiBs> pages;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
@@ -125,7 +126,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
StateTracker& state_tracker_, VKScheduler& scheduler_)
|
||||
StateTracker& state_tracker_, VKScheduler& scheduler_,
|
||||
VulkanHostMemory& host_memory_)
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
|
||||
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
|
||||
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
|
||||
@@ -133,12 +135,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
|
||||
update_descriptor_queue(device, scheduler),
|
||||
blit_image(device, scheduler, state_tracker, descriptor_pool),
|
||||
astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
|
||||
memory_allocator),
|
||||
astc_decoder_pass(device, scheduler, descriptor_pool, update_descriptor_queue),
|
||||
unswizzle_pass(device, scheduler, descriptor_pool, update_descriptor_queue, host_memory_),
|
||||
render_pass_cache(device), texture_cache_runtime{device, scheduler,
|
||||
memory_allocator, staging_pool,
|
||||
blit_image, astc_decoder_pass,
|
||||
render_pass_cache},
|
||||
unswizzle_pass,
|
||||
render_pass_cache, {}},
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
update_descriptor_queue, descriptor_pool),
|
||||
@@ -155,6 +158,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||
|
||||
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
// auto t1 = std::chrono::high_resolution_clock::now();
|
||||
MICROPROFILE_SCOPE(Vulkan_Drawing);
|
||||
|
||||
SCOPE_EXIT({ gpu.TickWork(); });
|
||||
@@ -168,9 +172,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
pipeline->Configure(is_indexed);
|
||||
// auto t2 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
BeginTransformFeedback();
|
||||
|
||||
// auto t3 = std::chrono::high_resolution_clock::now();
|
||||
UpdateDynamicStates();
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
@@ -187,6 +193,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||
}
|
||||
});
|
||||
EndTransformFeedback();
|
||||
// auto t4 = std::chrono::high_resolution_clock::now();
|
||||
// auto count1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
|
||||
// auto count2 = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
|
||||
// auto count3 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3).count();
|
||||
// auto count4 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t1).count();
|
||||
// if (count4 > 1) {
|
||||
// LOG_CRITICAL(Debug, "{} {} {}", count1, count2, count3);
|
||||
// }
|
||||
}
|
||||
|
||||
void RasterizerVulkan::Clear() {
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_host_memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
|
||||
@@ -67,7 +68,7 @@ public:
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
VKScreenInfo& screen_info_, const Device& device_,
|
||||
MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
|
||||
VKScheduler& scheduler_);
|
||||
VKScheduler& scheduler_, VulkanHostMemory& host_memory_);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void Draw(bool is_indexed, bool is_instanced) override;
|
||||
@@ -154,6 +155,7 @@ private:
|
||||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
BlitImageHelper blit_image;
|
||||
ASTCDecoderPass astc_decoder_pass;
|
||||
UnswizzlePass unswizzle_pass;
|
||||
RenderPassCache render_pass_cache;
|
||||
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
|
||||
@@ -61,6 +61,11 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
size_t Region(size_t iterator) noexcept {
|
||||
return iterator / REGION_SIZE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
bool try_device_local) {
|
||||
std::optional<u32> type;
|
||||
@@ -80,11 +85,6 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
size_t Region(size_t iterator) noexcept {
|
||||
return iterator / REGION_SIZE;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
VKScheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
|
||||
|
||||
@@ -23,6 +23,9 @@ struct StagingBufferRef {
|
||||
std::span<u8> mapped_span;
|
||||
};
|
||||
|
||||
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
|
||||
bool try_device_local);
|
||||
|
||||
class StagingBufferPool {
|
||||
public:
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
@@ -21,6 +23,7 @@
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/texture_cache/formatter.h"
|
||||
#include "video_core/texture_cache/samples_helper.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
@@ -106,9 +109,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
UNREACHABLE_MSG("Invalid surface type");
|
||||
}
|
||||
}
|
||||
if (info.storage) {
|
||||
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
}
|
||||
// if (info.storage) {
|
||||
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
// }
|
||||
return usage;
|
||||
}
|
||||
|
||||
@@ -416,59 +419,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
|
||||
}
|
||||
}
|
||||
|
||||
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
||||
VkImageAspectFlags aspect_mask, bool is_initialized,
|
||||
std::span<const VkBufferImageCopy> copies) {
|
||||
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkImageMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
read_barrier);
|
||||
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
|
||||
// TODO: Move this to another API
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
|
||||
write_barrier);
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
|
||||
const VkImageSubresourceLayers& dst_layers,
|
||||
const VkImageSubresourceLayers& src_layers) {
|
||||
@@ -625,7 +575,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
|
||||
return;
|
||||
}
|
||||
}
|
||||
ASSERT(src.format == dst.format);
|
||||
// ASSERT(src.format == dst.format);
|
||||
ASSERT(!(is_dst_msaa && !is_src_msaa));
|
||||
ASSERT(operation == Fermi2D::Operation::SrcCopy);
|
||||
|
||||
@@ -844,7 +794,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
|
||||
image(MakeImage(runtime.device, info)),
|
||||
commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
|
||||
aspect_mask(ImageAspectMask(info.format)) {
|
||||
aspect_mask(ImageAspectMask(info.format)), unswizzle_pass{&runtime.unswizzle_pass},
|
||||
read_pointers{&runtime.read_pointers} {
|
||||
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
||||
if (Settings::values.accelerate_astc.GetValue()) {
|
||||
flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
|
||||
@@ -860,49 +811,197 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
|
||||
.pNext = nullptr,
|
||||
.usage = VK_IMAGE_USAGE_STORAGE_BIT,
|
||||
};
|
||||
const auto& device = runtime.device.GetLogical();
|
||||
VkImageViewCreateInfo create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = &storage_image_view_usage_create_info,
|
||||
.flags = 0,
|
||||
.image = *image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
|
||||
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = ~0U,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const u32 levels = static_cast<u32>(info.resources.levels);
|
||||
const u32 layers = static_cast<u32>(info.resources.layers);
|
||||
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
|
||||
const auto& device = runtime.device.GetLogical();
|
||||
storage_image_views.reserve(info.resources.levels);
|
||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||
storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = &storage_image_view_usage_create_info,
|
||||
.flags = 0,
|
||||
.image = *image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
|
||||
.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
|
||||
.components{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = static_cast<u32>(level),
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}));
|
||||
storage_image_views.reserve(levels);
|
||||
for (u32 level = 0; level < levels; ++level) {
|
||||
create_info.subresourceRange.baseMipLevel = level;
|
||||
storage_image_views.push_back(device.CreateImageView(create_info));
|
||||
}
|
||||
LOG_CRITICAL(Debug, "astc");
|
||||
abort();
|
||||
} else {
|
||||
switch (info.type) {
|
||||
case ImageType::e2D:
|
||||
case ImageType::e3D:
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Debug, "info.type {}", info.type);
|
||||
// abort();
|
||||
return;
|
||||
}
|
||||
create_info.subresourceRange.layerCount = 1;
|
||||
const bool depth_stencil_aspect =
|
||||
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
storage_image_views.reserve(levels * layers * (1 + depth_stencil_aspect));
|
||||
for (u32 level = 0; level < levels; ++level) {
|
||||
create_info.subresourceRange.baseMipLevel = level;
|
||||
for (u32 layer = 0; layer < layers; ++layer) {
|
||||
create_info.subresourceRange.baseArrayLayer = layer;
|
||||
if (depth_stencil_aspect) {
|
||||
create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
storage_image_views.push_back(device.CreateImageView(create_info));
|
||||
create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
}
|
||||
storage_image_views.push_back(device.CreateImageView(create_info));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Image::~Image() = default;
|
||||
|
||||
void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||
// TODO: Move this to another API
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
|
||||
const VkBuffer src_buffer = map.buffer;
|
||||
const VkImage vk_image = *image;
|
||||
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
|
||||
const bool is_initialized = std::exchange(initialized, true);
|
||||
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
|
||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
|
||||
});
|
||||
// static int uploadmemorycount = 0;
|
||||
|
||||
void Image::UploadMemory(const StagingBufferRef&, Tegra::MemoryManager& gpu_memory,
|
||||
std::array<u8, VideoCommon::MAX_GUEST_SIZE>&) {
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
// const int debug_id = uploadmemorycount++;
|
||||
// LOG_CRITICAL(Debug, "UploadMemory Starting {}", debug_id);
|
||||
unswizzle_pass->Begin(*this);
|
||||
// LOG_CRITICAL(Debug, "UploadMemory Begun {}", debug_id);
|
||||
using namespace VideoCommon;
|
||||
const size_t actual_guest_size_bytes = CalculateGuestSizeInBytes(info);
|
||||
// if (actual_guest_size_bytes >= VideoCommon::MAX_GUEST_SIZE) {
|
||||
// LOG_CRITICAL(Debug, "guest_size {}", actual_guest_size_bytes);
|
||||
// abort();
|
||||
// }
|
||||
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
|
||||
const Extent3D size = info.size;
|
||||
if (info.type == ImageType::Linear) {
|
||||
// abort(); // TODO
|
||||
LOG_CRITICAL(Debug, "Linear???");
|
||||
return;
|
||||
}
|
||||
read_pointers->tail = &read_pointers->data.back();
|
||||
// LOG_CRITICAL(Debug, "actually {}", actual_guest_size_bytes);
|
||||
const auto t2 = std::chrono::high_resolution_clock::now();
|
||||
gpu_memory.ReadBlockPointersUnsafe(gpu_addr, *read_pointers, actual_guest_size_bytes);
|
||||
const auto t3 = std::chrono::high_resolution_clock::now();
|
||||
const LevelInfo level_info = MakeLevelInfo(info);
|
||||
const s32 num_levels = info.resources.levels;
|
||||
const Extent2D tile_size = DefaultBlockSize(info.format);
|
||||
const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
|
||||
const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
|
||||
const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels);
|
||||
const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
|
||||
info.tile_width_spacing);
|
||||
const bool depth_stencil_aspect =
|
||||
aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
UnswizzlePushConstants unswizzle{};
|
||||
const auto guest_end = read_pointers->tail;
|
||||
const auto advance_tail = [guest_end](Core::Memory::ReadPointers::ReadPointer*& tail,
|
||||
u32& offset, u32 size_, auto action) {
|
||||
if (tail == guest_end) {
|
||||
LOG_CRITICAL(Debug, "tail guest already");
|
||||
return;
|
||||
}
|
||||
while (size_ && size_ + offset >= tail->copy_amount) {
|
||||
const auto ptr_size = tail->copy_amount - offset;
|
||||
if (!ptr_size) {
|
||||
LOG_CRITICAL(Debug, "ptr???size {} {} {}", size_, offset, tail->copy_amount);
|
||||
abort();
|
||||
}
|
||||
action(tail->backing_offset + offset, ptr_size);
|
||||
size_ -= ptr_size;
|
||||
offset = 0;
|
||||
--tail;
|
||||
if (tail == guest_end) {
|
||||
// LOG_CRITICAL(Debug, "tail guest end");
|
||||
++tail;
|
||||
// abort();
|
||||
offset = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (size_) {
|
||||
action(tail->backing_offset + offset, size_);
|
||||
}
|
||||
offset += size_;
|
||||
};
|
||||
auto guest_tail = &read_pointers->data.back();
|
||||
u32 guest_offset = 0;
|
||||
for (s32 level = 0; level < num_levels; ++level) {
|
||||
// if (const auto levelsz = level_sizes[level]; true) {
|
||||
// LOG_CRITICAL(Debug, "levelsz {} layersz {}", levelsz, layer_stride);
|
||||
// }
|
||||
const Extent3D level_size = AdjustMipSize(size, level);
|
||||
const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
|
||||
const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
|
||||
const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
|
||||
Tegra::Texture::CalculateUnswizzle(unswizzle, 1U << bpp_log2, num_tiles.width,
|
||||
num_tiles.height, num_tiles.depth, block.height,
|
||||
block.depth, stride_alignment);
|
||||
auto guest_layer_tail = guest_tail;
|
||||
u32 guest_layer_offset = 0;
|
||||
const auto level_advance_size = level_sizes[level];
|
||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||
const auto assemble = [this, &unswizzle, level, layer, guest_layer_tail,
|
||||
guest_layer_offset, level_advance_size,
|
||||
advance_tail](bool aspect) {
|
||||
auto assemble_tail = guest_layer_tail;
|
||||
auto assemble_offset = guest_layer_offset;
|
||||
u32 so_far = 0;
|
||||
advance_tail(
|
||||
assemble_tail, assemble_offset, level_advance_size,
|
||||
[this, &unswizzle, level, layer, aspect, &so_far](u64 ptr, u32 ptr_size) {
|
||||
// LOG_CRITICAL(Debug, "ptr_size
|
||||
// {}", ptr_size);
|
||||
unswizzle_pass->Assemble(*this, unswizzle, ptr, ptr_size, so_far, level,
|
||||
layer, aspect);
|
||||
so_far += ptr_size;
|
||||
});
|
||||
// LOG_CRITICAL(Debug, "total so_far {}", so_far);
|
||||
};
|
||||
assemble(false);
|
||||
if (depth_stencil_aspect) {
|
||||
assemble(true);
|
||||
}
|
||||
advance_tail(guest_layer_tail, guest_layer_offset, layer_stride, [](u64, u32) {});
|
||||
}
|
||||
advance_tail(guest_tail, guest_offset, level_advance_size, [](u64, u32) {});
|
||||
}
|
||||
// LOG_CRITICAL(Debug, "UploadMemory Finishing {}", debug_id);
|
||||
const auto t4 = std::chrono::high_resolution_clock::now();
|
||||
unswizzle_pass->Finish(*this);
|
||||
const auto t5 = std::chrono::high_resolution_clock::now();
|
||||
// const auto count0 = std::chrono::duration_cast<std::chrono::microseconds>(t5 - t1).count();
|
||||
const auto count1 = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
|
||||
const auto count2 = std::chrono::duration_cast<std::chrono::microseconds>(t3 - t2).count();
|
||||
const auto count3 = std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count();
|
||||
const auto count4 = std::chrono::duration_cast<std::chrono::microseconds>(t5 - t4).count();
|
||||
// if (count0 > 1) {
|
||||
LOG_CRITICAL(Debug, "{} {} {} {} bpp {}", count1, count2, count3, count4, 1U << bpp_log2);
|
||||
// }
|
||||
// LOG_CRITICAL(Debug, "UploadMemory Done {}", debug_id);
|
||||
// sleep(1);
|
||||
// if (debug_id == 23) {
|
||||
// __asm__("int3");
|
||||
// abort();
|
||||
// }
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <compare>
|
||||
#include <span>
|
||||
|
||||
#include "core/memory.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
@@ -29,6 +30,7 @@ class ImageView;
|
||||
class Framebuffer;
|
||||
class RenderPassCache;
|
||||
class StagingBufferPool;
|
||||
class UnswizzlePass;
|
||||
class VKScheduler;
|
||||
|
||||
struct TextureCacheRuntime {
|
||||
@@ -38,7 +40,9 @@ struct TextureCacheRuntime {
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
BlitImageHelper& blit_image_helper;
|
||||
ASTCDecoderPass& astc_decoder_pass;
|
||||
UnswizzlePass& unswizzle_pass;
|
||||
RenderPassCache& render_pass_cache;
|
||||
Core::Memory::ReadPointers read_pointers;
|
||||
|
||||
void Finish();
|
||||
|
||||
@@ -90,8 +94,8 @@ public:
|
||||
Image(Image&&) = default;
|
||||
Image& operator=(Image&&) = default;
|
||||
|
||||
void UploadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
void UploadMemory(const StagingBufferRef& map, Tegra::MemoryManager& gpu_memory,
|
||||
std::array<u8, VideoCommon::MAX_GUEST_SIZE>& scratch);
|
||||
|
||||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
@@ -104,8 +108,15 @@ public:
|
||||
return aspect_mask;
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
|
||||
return *storage_image_views[level];
|
||||
[[nodiscard]] VkImageView StorageImageView(s32 level, s32 layer = 0,
|
||||
bool aspect = false) const noexcept {
|
||||
const auto idx = static_cast<u32>(layer + info.resources.layers *
|
||||
(level + aspect * info.resources.levels));
|
||||
if (idx >= storage_image_views.size()) {
|
||||
LOG_CRITICAL(Debug, "{} {} {}", idx, storage_image_views.size(), aspect);
|
||||
abort();
|
||||
}
|
||||
return *storage_image_views[idx];
|
||||
}
|
||||
|
||||
/// Returns true when the image is already initialized and mark it as initialized
|
||||
@@ -121,6 +132,8 @@ private:
|
||||
std::vector<vk::ImageView> storage_image_views;
|
||||
VkImageAspectFlags aspect_mask = 0;
|
||||
bool initialized = false;
|
||||
UnswizzlePass* unswizzle_pass;
|
||||
Core::Memory::ReadPointers* read_pointers;
|
||||
};
|
||||
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
|
||||
@@ -77,7 +77,7 @@ private:
|
||||
|
||||
DescriptorUpdateEntry* payload_cursor = nullptr;
|
||||
const DescriptorUpdateEntry* upload_start = nullptr;
|
||||
std::array<DescriptorUpdateEntry, 0x10000> payload;
|
||||
std::array<DescriptorUpdateEntry, 0x100000> payload;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -272,6 +272,7 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids,
|
||||
std::span<const u32> indices,
|
||||
std::span<ImageViewId> image_view_ids) {
|
||||
// auto t1 = std::chrono::high_resolution_clock::now();
|
||||
ASSERT(indices.size() <= image_view_ids.size());
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
@@ -279,24 +280,40 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
||||
return VisitImageView(table, cached_image_view_ids, index);
|
||||
});
|
||||
} while (has_deleted_images);
|
||||
// auto t2 = std::chrono::high_resolution_clock::now();
|
||||
// auto count1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
|
||||
// if (count1 > 1) {
|
||||
// LOG_CRITICAL(Debug, "{}", count1);
|
||||
// }
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids,
|
||||
u32 index) {
|
||||
// auto t1 = std::chrono::high_resolution_clock::now();
|
||||
if (index > table.Limit()) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = table.Read(index);
|
||||
ImageViewId& image_view_id = cached_image_view_ids[index];
|
||||
// auto t2 = std::chrono::high_resolution_clock::now();
|
||||
if (is_new) {
|
||||
image_view_id = FindImageView(descriptor);
|
||||
}
|
||||
// auto t3 = std::chrono::high_resolution_clock::now();
|
||||
if (image_view_id != NULL_IMAGE_VIEW_ID) {
|
||||
PrepareImageView(image_view_id, false, false);
|
||||
}
|
||||
// auto t4 = std::chrono::high_resolution_clock::now();
|
||||
// auto count1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
|
||||
// auto count2 = std::chrono::duration_cast<std::chrono::milliseconds>(t3 - t2).count();
|
||||
// auto count3 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3).count();
|
||||
// auto count4 = std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t1).count();
|
||||
// if (count4 > 1) {
|
||||
// LOG_CRITICAL(Debug, "{} {} {} {}", count1, count2, count3, count4);
|
||||
// }
|
||||
return image_view_id;
|
||||
}
|
||||
|
||||
@@ -539,29 +556,26 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||
return;
|
||||
}
|
||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||
UploadImageContents(image, staging);
|
||||
UploadImageContents(image);
|
||||
runtime.InsertUploadMemoryBarrier();
|
||||
}
|
||||
|
||||
template <class P>
|
||||
template <typename StagingBuffer>
|
||||
void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
|
||||
const std::span<u8> mapped_span = staging.mapped_span;
|
||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||
|
||||
void TextureCache<P>::UploadImageContents(Image& image) {
|
||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
const auto uploads = FullUploadSwizzles(image.info);
|
||||
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
// gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(),
|
||||
// mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info);
|
||||
// runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
abort();
|
||||
} else if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
|
||||
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
// std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
|
||||
// auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
// ConvertImage(unswizzled_data, image.info, mapped_span, copies);
|
||||
// image.UploadMemory(staging, copies);
|
||||
abort();
|
||||
} else {
|
||||
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
image.UploadMemory(staging, copies);
|
||||
image.UploadMemory(staging, gpu_memory, unswizzle_scratch);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1378,8 +1392,14 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
} else {
|
||||
// auto t1 = std::chrono::high_resolution_clock::now();
|
||||
RefreshContents(image, image_id);
|
||||
SynchronizeAliases(image_id);
|
||||
// auto t2 = std::chrono::high_resolution_clock::now();
|
||||
// auto count1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2 -
|
||||
// t1).count(); if (count1 > 1) {
|
||||
// LOG_CRITICAL(Debug, "{}", count1);
|
||||
// }
|
||||
}
|
||||
if (is_modification) {
|
||||
MarkModification(image);
|
||||
|
||||
@@ -217,9 +217,8 @@ private:
|
||||
/// Refresh the contents (pixel data) of an image
|
||||
void RefreshContents(Image& image, ImageId image_id);
|
||||
|
||||
/// Upload data from guest to an image
|
||||
template <typename StagingBuffer>
|
||||
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
|
||||
/// Upload data from guest to an imag
|
||||
void UploadImageContents(Image& image);
|
||||
|
||||
/// Find or create an image view from a guest descriptor
|
||||
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
|
||||
@@ -328,6 +327,7 @@ private:
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
std::array<u8, MAX_GUEST_SIZE> unswizzle_scratch;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
|
||||
@@ -145,4 +145,18 @@ struct SwizzleParameters {
|
||||
s32 level;
|
||||
};
|
||||
|
||||
struct UnswizzlePushConstants {
|
||||
u32 size;
|
||||
u32 ptr;
|
||||
u32 so_far;
|
||||
u32 bytes_per_pixel;
|
||||
u32 pitch;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 block_height;
|
||||
u32 block_depth;
|
||||
u32 gobs_in_x;
|
||||
u32 dcl2;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -52,8 +52,6 @@
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
namespace {
|
||||
|
||||
using Tegra::Texture::GOB_SIZE;
|
||||
using Tegra::Texture::GOB_SIZE_SHIFT;
|
||||
using Tegra::Texture::GOB_SIZE_X;
|
||||
@@ -80,15 +78,7 @@ using VideoCore::Surface::SurfaceType;
|
||||
|
||||
constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
|
||||
|
||||
struct LevelInfo {
|
||||
Extent3D size;
|
||||
Extent3D block;
|
||||
Extent2D tile_size;
|
||||
u32 bpp_log2;
|
||||
u32 tile_width_spacing;
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
|
||||
[[nodiscard]] u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
|
||||
if (shift == 0) {
|
||||
return 0;
|
||||
}
|
||||
@@ -104,11 +94,11 @@ struct LevelInfo {
|
||||
return shift;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
|
||||
[[nodiscard]] u32 AdjustMipSize(u32 size, u32 level) {
|
||||
return std::max<u32>(size >> level, 1);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
|
||||
[[nodiscard]] Extent3D AdjustMipSize(Extent3D size, s32 level) {
|
||||
return Extent3D{
|
||||
.width = AdjustMipSize(size.width, level),
|
||||
.height = AdjustMipSize(size.height, level),
|
||||
@@ -126,7 +116,7 @@ struct LevelInfo {
|
||||
}
|
||||
|
||||
template <u32 GOB_EXTENT>
|
||||
[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
|
||||
[[nodiscard]] u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
|
||||
do {
|
||||
while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
|
||||
--block_size;
|
||||
@@ -135,8 +125,7 @@ template <u32 GOB_EXTENT>
|
||||
return block_size;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
|
||||
u32 level) {
|
||||
[[nodiscard]] Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, u32 level) {
|
||||
return {
|
||||
.width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
|
||||
.height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
|
||||
@@ -144,7 +133,7 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
|
||||
[[nodiscard]] Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
|
||||
return {
|
||||
.width = Common::DivCeil(size.width, tile_size.width),
|
||||
.height = Common::DivCeil(size.height, tile_size.height),
|
||||
@@ -152,28 +141,28 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
|
||||
[[nodiscard]] u32 BytesPerBlockLog2(u32 bytes_per_block) {
|
||||
return std::countl_zero(bytes_per_block) ^ 0x1F;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
|
||||
[[nodiscard]] u32 BytesPerBlockLog2(PixelFormat format) {
|
||||
return BytesPerBlockLog2(BytesPerBlock(format));
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
|
||||
[[nodiscard]] u32 NumBlocks(Extent3D size, Extent2D tile_size) {
|
||||
const Extent3D num_blocks = AdjustTileSize(size, tile_size);
|
||||
return num_blocks.width * num_blocks.height * num_blocks.depth;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
|
||||
[[nodiscard]] u32 AdjustSize(u32 size, u32 level, u32 block_size) {
|
||||
return Common::DivCeil(AdjustMipSize(size, level), block_size);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
|
||||
[[nodiscard]] Extent2D DefaultBlockSize(PixelFormat format) {
|
||||
return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
|
||||
[[nodiscard]] Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
|
||||
return Extent3D{
|
||||
.width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
|
||||
.height = AdjustSize(info.size.height, level, info.tile_size.height),
|
||||
@@ -181,7 +170,7 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
|
||||
[[nodiscard]] Extent3D TileShift(const LevelInfo& info, u32 level) {
|
||||
const Extent3D blocks = NumLevelBlocks(info, level);
|
||||
return Extent3D{
|
||||
.width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
|
||||
@@ -190,21 +179,19 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
|
||||
[[nodiscard]] Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
|
||||
return Extent2D{
|
||||
.width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
|
||||
.height = GOB_SIZE_Y_SHIFT + block_height,
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
|
||||
u32 block_depth) {
|
||||
[[nodiscard]] bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, u32 block_depth) {
|
||||
return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
|
||||
num_tiles.depth < (1U << block_depth);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
|
||||
u32 bpp_log2) {
|
||||
[[nodiscard]] u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, u32 bpp_log2) {
|
||||
if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
|
||||
return GOB_SIZE_X_SHIFT - bpp_log2;
|
||||
} else {
|
||||
@@ -212,13 +199,13 @@ template <u32 GOB_EXTENT>
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
|
||||
u32 tile_width_spacing) {
|
||||
[[nodiscard]] u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
|
||||
u32 tile_width_spacing) {
|
||||
const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
|
||||
return StrideAlignment(num_tiles, block, gob, bpp_log2);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
|
||||
[[nodiscard]] Extent2D NumGobs(const LevelInfo& info, u32 level) {
|
||||
const Extent3D blocks = NumLevelBlocks(info, level);
|
||||
const Extent2D gobs{
|
||||
.width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
|
||||
@@ -233,7 +220,7 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
|
||||
[[nodiscard]] Extent3D LevelTiles(const LevelInfo& info, u32 level) {
|
||||
const Extent3D blocks = NumLevelBlocks(info, level);
|
||||
const Extent3D tile_shift = TileShift(info, level);
|
||||
const Extent2D gobs = NumGobs(info, level);
|
||||
@@ -244,7 +231,7 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
|
||||
[[nodiscard]] u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
|
||||
const Extent3D tile_shift = TileShift(info, level);
|
||||
const Extent3D tiles = LevelTiles(info, level);
|
||||
const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
|
||||
@@ -252,7 +239,7 @@ template <u32 GOB_EXTENT>
|
||||
return num_tiles << shift;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr LevelArray CalculateLevelSizes(const LevelInfo& info, u32 num_levels) {
|
||||
[[nodiscard]] LevelArray CalculateLevelSizes(const LevelInfo& info, u32 num_levels) {
|
||||
ASSERT(num_levels <= MAX_MIP_LEVELS);
|
||||
LevelArray sizes{};
|
||||
for (u32 level = 0; level < num_levels; ++level) {
|
||||
@@ -265,8 +252,8 @@ template <u32 GOB_EXTENT>
|
||||
return std::reduce(sizes.begin(), sizes.begin() + num_levels, 0U);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing) {
|
||||
[[nodiscard]] LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing) {
|
||||
const u32 bytes_per_block = BytesPerBlock(format);
|
||||
return {
|
||||
.size =
|
||||
@@ -282,12 +269,12 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
|
||||
[[nodiscard]] LevelInfo MakeLevelInfo(const ImageInfo& info) {
|
||||
return MakeLevelInfo(info.format, info.size, info.block, info.tile_width_spacing);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing, u32 level) {
|
||||
[[nodiscard]] u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing, u32 level) {
|
||||
const LevelInfo info = MakeLevelInfo(format, size, block, tile_width_spacing);
|
||||
u32 offset = 0;
|
||||
for (u32 current_level = 0; current_level < level; ++current_level) {
|
||||
@@ -296,8 +283,8 @@ template <u32 GOB_EXTENT>
|
||||
return offset;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
|
||||
u32 tile_size_y, u32 tile_width_spacing) {
|
||||
[[nodiscard]] u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, u32 tile_size_y,
|
||||
u32 tile_width_spacing) {
|
||||
// https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
|
||||
if (tile_width_spacing > 0) {
|
||||
const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
|
||||
@@ -483,7 +470,7 @@ template <u32 GOB_EXTENT>
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
|
||||
[[nodiscard]] u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
|
||||
u32 num_blocks = 0;
|
||||
for (s32 level = 0; level < info.resources.levels; ++level) {
|
||||
const Extent3D mip_size = AdjustMipSize(info.size, level);
|
||||
@@ -574,8 +561,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
|
||||
ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
|
||||
if (info.type == ImageType::Buffer) {
|
||||
return info.size.width * BytesPerBlock(info.format);
|
||||
@@ -783,8 +768,15 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
|
||||
}
|
||||
|
||||
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageInfo& info, std::span<u8> output) {
|
||||
const ImageInfo& info,
|
||||
std::array<u8, MAX_GUEST_SIZE>& scratch,
|
||||
std::span<u8> output) {
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
|
||||
if (guest_size_bytes >= MAX_GUEST_SIZE) {
|
||||
LOG_CRITICAL(Debug, "guest_size {}", guest_size_bytes);
|
||||
abort();
|
||||
}
|
||||
const u32 bpp_log2 = BytesPerBlockLog2(info.format);
|
||||
const Extent3D size = info.size;
|
||||
|
||||
@@ -807,9 +799,12 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
|
||||
.image_extent = size,
|
||||
}};
|
||||
}
|
||||
const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
|
||||
const std::span<const u8> input(input_data.get(), guest_size_bytes);
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, scratch.data(), guest_size_bytes);
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
const std::span<const u8> input(scratch.data(), guest_size_bytes);
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const LevelInfo level_info = MakeLevelInfo(info);
|
||||
const s32 num_layers = info.resources.layers;
|
||||
@@ -850,13 +845,27 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
|
||||
for (s32 layer = 0; layer < info.resources.layers; ++layer) {
|
||||
const std::span<u8> dst = output.subspan(host_offset);
|
||||
const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
|
||||
UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth, stride_alignment);
|
||||
const std::span<const u8> src_limit =
|
||||
src.first(std::min(src.size(), static_cast<size_t>(level_sizes[level])));
|
||||
if (1) {
|
||||
UnswizzleTexture(dst, src_limit, 1U << bpp_log2, num_tiles.width, num_tiles.height,
|
||||
num_tiles.depth, block.height, block.depth, stride_alignment);
|
||||
}
|
||||
guest_layer_offset += layer_stride;
|
||||
host_offset += host_bytes_per_layer;
|
||||
}
|
||||
guest_offset += level_sizes[level];
|
||||
}
|
||||
auto t6 = std::chrono::high_resolution_clock::now();
|
||||
auto count1 = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
|
||||
auto count2 = std::chrono::duration_cast<std::chrono::microseconds>(t3 - t2).count();
|
||||
auto count3 = std::chrono::duration_cast<std::chrono::microseconds>(t4 - t3).count();
|
||||
auto count4 = std::chrono::duration_cast<std::chrono::microseconds>(t5 - t4).count();
|
||||
auto count5 = std::chrono::duration_cast<std::chrono::microseconds>(t6 - t5).count();
|
||||
auto count0 = std::chrono::duration_cast<std::chrono::microseconds>(t4 - t1).count();
|
||||
if (count0 > 1) {
|
||||
LOG_CRITICAL(Debug, "{} {} {} {} {}", count1, count2, count3, count4, count5);
|
||||
}
|
||||
return copies;
|
||||
}
|
||||
|
||||
@@ -1171,52 +1180,4 @@ u32 MapSizeBytes(const ImageBase& image) {
|
||||
}
|
||||
}
|
||||
|
||||
static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
|
||||
0x7f8000);
|
||||
static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
|
||||
|
||||
static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
|
||||
0x2afc00);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::ASTC_2D_12X12_UNORM, {8192, 4096, 1}, {0, 2, 0}, 0,
|
||||
12) == 0x50d200);
|
||||
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 0) ==
|
||||
0);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 1) ==
|
||||
0x400000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 2) ==
|
||||
0x500000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 3) ==
|
||||
0x540000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 4) ==
|
||||
0x550000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 5) ==
|
||||
0x554000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 6) ==
|
||||
0x555000);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 7) ==
|
||||
0x555400);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 8) ==
|
||||
0x555600);
|
||||
static_assert(CalculateLevelOffset(PixelFormat::A8B8G8R8_UNORM, {1024, 1024, 1}, {0, 4, 0}, 0, 9) ==
|
||||
0x555800);
|
||||
|
||||
constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
|
||||
u32 tile_width_spacing, u32 level) {
|
||||
const Extent3D size{width, height, 1};
|
||||
const Extent3D block{0, block_height, 0};
|
||||
const u32 offset = CalculateLevelOffset(format, size, block, tile_width_spacing, level);
|
||||
return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
|
||||
}
|
||||
|
||||
static_assert(ValidateLayerSize(PixelFormat::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) ==
|
||||
0x50d800);
|
||||
static_assert(ValidateLayerSize(PixelFormat::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
|
||||
static_assert(ValidateLayerSize(PixelFormat::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
|
||||
|
||||
static_assert(ValidateLayerSize(PixelFormat::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
|
||||
"Tile width spacing is not working");
|
||||
static_assert(ValidateLayerSize(PixelFormat::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
|
||||
"Compressed tile width spacing is not working");
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -28,6 +28,77 @@ struct OverlapResult {
|
||||
SubresourceExtent resources;
|
||||
};
|
||||
|
||||
// This ought to be enough for anybody
|
||||
constexpr size_t MAX_GUEST_SIZE = 0x4000000;
|
||||
|
||||
struct LevelInfo {
|
||||
Extent3D size;
|
||||
Extent3D block;
|
||||
Extent2D tile_size;
|
||||
u32 bpp_log2;
|
||||
u32 tile_width_spacing;
|
||||
};
|
||||
|
||||
[[nodiscard]] u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension);
|
||||
[[nodiscard]] u32 AdjustMipSize(u32 size, u32 level);
|
||||
[[nodiscard]] Extent3D AdjustMipSize(Extent3D size, s32 level);
|
||||
[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples);
|
||||
template <u32 GOB_EXTENT>
|
||||
[[nodiscard]] u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level);
|
||||
[[nodiscard]] Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, u32 level);
|
||||
[[nodiscard]] Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size);
|
||||
[[nodiscard]] u32 BytesPerBlockLog2(u32 bytes_per_block);
|
||||
[[nodiscard]] u32 BytesPerBlockLog2(PixelFormat format);
|
||||
[[nodiscard]] u32 NumBlocks(Extent3D size, Extent2D tile_size);
|
||||
[[nodiscard]] u32 AdjustSize(u32 size, u32 level, u32 block_size);
|
||||
[[nodiscard]] Extent2D DefaultBlockSize(PixelFormat format);
|
||||
[[nodiscard]] Extent3D NumLevelBlocks(const LevelInfo& info, u32 level);
|
||||
[[nodiscard]] Extent3D TileShift(const LevelInfo& info, u32 level);
|
||||
[[nodiscard]] Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing);
|
||||
[[nodiscard]] bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, u32 block_depth);
|
||||
[[nodiscard]] u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, u32 bpp_log2);
|
||||
[[nodiscard]] u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
|
||||
u32 tile_width_spacing);
|
||||
[[nodiscard]] Extent2D NumGobs(const LevelInfo& info, u32 level);
|
||||
[[nodiscard]] Extent3D LevelTiles(const LevelInfo& info, u32 level);
|
||||
[[nodiscard]] u32 CalculateLevelSize(const LevelInfo& info, u32 level);
|
||||
[[nodiscard]] LevelArray CalculateLevelSizes(const LevelInfo& info, u32 num_levels);
|
||||
[[nodiscard]] u32 CalculateLevelBytes(const LevelArray& sizes, u32 num_levels);
|
||||
[[nodiscard]] LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing);
|
||||
[[nodiscard]] LevelInfo MakeLevelInfo(const ImageInfo& info);
|
||||
[[nodiscard]] u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
|
||||
u32 tile_width_spacing, u32 level);
|
||||
[[nodiscard]] u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, u32 tile_size_y,
|
||||
u32 tile_width_spacing);
|
||||
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
|
||||
const ImageBase& overlap,
|
||||
bool strict_size);
|
||||
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
|
||||
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size);
|
||||
[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
|
||||
const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size);
|
||||
[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
|
||||
GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr,
|
||||
const ImageBase& overlap,
|
||||
bool strict_size);
|
||||
[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
|
||||
GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr,
|
||||
const ImageBase& overlap,
|
||||
bool strict_size);
|
||||
[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info);
|
||||
[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level);
|
||||
[[nodiscard]] u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept;
|
||||
[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept;
|
||||
void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageInfo& info, const BufferImageCopy& copy,
|
||||
std::span<const u8> memory);
|
||||
void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
const ImageInfo& info, const BufferImageCopy& copy,
|
||||
std::span<const u8> input);
|
||||
|
||||
[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
|
||||
|
||||
[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
|
||||
@@ -61,6 +132,7 @@ struct OverlapResult {
|
||||
|
||||
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||
GPUVAddr gpu_addr, const ImageInfo& info,
|
||||
std::array<u8, MAX_GUEST_SIZE>& scratch,
|
||||
std::span<u8> output);
|
||||
|
||||
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||
|
||||
@@ -18,15 +18,113 @@
|
||||
|
||||
namespace Tegra::Texture {
|
||||
namespace {
|
||||
[[maybe_unused]] u32 CalcUnswiz(u32 swizzled_offset, u32 block_height_mask, u32 block_height,
|
||||
u32 block_depth_mask, u32 lesser_x_shift, u32 lesser_slice_size,
|
||||
u32 gobs_in_x, u32 block_depth, u32 pitch, u32 height, u32 depth,
|
||||
u32 pitch_height, size_t output_size) {
|
||||
const u32 entry = swizzled_offset & 0b111111111;
|
||||
const u32 y_table = ((entry >> 5) & 6) | ((entry >> 4) & 1);
|
||||
const u32 x_entry = ((entry >> 3) & 32) | ((entry >> 1) & 16) | (entry & 15);
|
||||
const u32 base_swizzled_offset = swizzled_offset >> 9;
|
||||
const u32 set_y = (base_swizzled_offset & block_height_mask) << 3;
|
||||
const u32 set_z = (base_swizzled_offset >> block_height) & block_depth_mask;
|
||||
const u32 inner_swizzled = base_swizzled_offset >> lesser_x_shift;
|
||||
const u32 sli = inner_swizzled / lesser_slice_size;
|
||||
const u32 gb = inner_swizzled % lesser_slice_size;
|
||||
const u32 x_inner = (gb % gobs_in_x) << 6;
|
||||
const u32 y_inner = (gb / gobs_in_x) << (block_height + 3);
|
||||
const u32 z_inner = sli << block_depth;
|
||||
const u32 x = x_inner + x_entry;
|
||||
const u32 y = y_inner + set_y + y_table;
|
||||
const u32 z = z_inner + set_z;
|
||||
if (x >= pitch || y >= height || z >= depth) {
|
||||
return ~0U;
|
||||
}
|
||||
const u32 z_pitch_height = z * pitch_height;
|
||||
const u32 z_pitch_height_y_pitch = z_pitch_height + y * pitch;
|
||||
const u32 unswizzled_offset = z_pitch_height_y_pitch + x;
|
||||
if (unswizzled_offset >= output_size) {
|
||||
return ~0U;
|
||||
}
|
||||
return unswizzled_offset;
|
||||
}
|
||||
|
||||
template <u32 BYTES_PER_PIXEL>
|
||||
void UnswizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height,
|
||||
u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
|
||||
const u32 pitch = width * BYTES_PER_PIXEL;
|
||||
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
|
||||
const u32 gobs_in_x = Common::DivCeilLog2(stride, 6U);
|
||||
const u32 lesser_x_shift = block_height + block_depth;
|
||||
const u32 x_shift = 9 + lesser_x_shift;
|
||||
[[maybe_unused]] const u32 block_size = gobs_in_x << x_shift;
|
||||
const u32 dcl2 = Common::DivCeilLog2(height, block_height + 3);
|
||||
const u32 lesser_slice_size = dcl2 * gobs_in_x;
|
||||
[[maybe_unused]] const u32 slice_size = lesser_slice_size << x_shift;
|
||||
const u32 block_height_mask = (1U << block_height) - 1;
|
||||
const u32 block_depth_mask = (1U << block_depth) - 1;
|
||||
[[maybe_unused]] const u32 pitch_height = pitch * height;
|
||||
// for (u32 swizzled_offset = 0; swizzled_offset < input.size();
|
||||
// swizzled_offset += BYTES_PER_PIXEL) {
|
||||
// u32 unswizzled_offset =
|
||||
// CalcUnswiz(swizzled_offset, block_height_mask, block_height, block_depth_mask,
|
||||
// lesser_x_shift, lesser_slice_size, gobs_in_x, block_depth, pitch,
|
||||
// height, depth, pitch_height, output.size());
|
||||
// if (!~unswizzled_offset) {
|
||||
// continue;
|
||||
// }
|
||||
// u8* const dst = &output[unswizzled_offset];
|
||||
// const u8* const src = &input[swizzled_offset];
|
||||
// std::memcpy(dst, src, BYTES_PER_PIXEL);
|
||||
// }
|
||||
for (u32 unswizzled_offset = 0; unswizzled_offset < output.size();
|
||||
unswizzled_offset += BYTES_PER_PIXEL) {
|
||||
const u32 unswizzled_offset_pitch = unswizzled_offset / pitch;
|
||||
const u32 z = unswizzled_offset_pitch / height;
|
||||
const u32 y = unswizzled_offset_pitch % height;
|
||||
const u32 x = unswizzled_offset % pitch;
|
||||
const u32 offset_z =
|
||||
(z >> block_depth) * slice_size + ((z & block_depth_mask) << (9 + block_height));
|
||||
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
|
||||
const u32 offset_y =
|
||||
(block_y >> block_height) * block_size + ((block_y & block_height_mask) << 9);
|
||||
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
|
||||
const u32 table = ((y & 6) << 5) | ((y & 1) << 4);
|
||||
const u32 entry = ((x & 32) << 3) | ((x & 16) << 1) | (x & 15) | table;
|
||||
const u32 swizzled_offset = base_swizzled_offset | entry;
|
||||
u32 other = CalcUnswiz(swizzled_offset, block_height_mask, block_height, block_depth_mask,
|
||||
lesser_x_shift, lesser_slice_size, gobs_in_x, block_depth, pitch,
|
||||
height, depth, pitch_height, output.size());
|
||||
if (swizzled_offset >= input.size()) {
|
||||
continue;
|
||||
}
|
||||
if (x >= pitch || y >= height || z >= depth) {
|
||||
// if (~other) {
|
||||
// LOG_CRITICAL(Debug, "E2 {} != {}", unswizzled_offset, other);
|
||||
// abort();
|
||||
// }
|
||||
continue;
|
||||
}
|
||||
// if (z != 0) {
|
||||
// continue;
|
||||
// }
|
||||
if (other != unswizzled_offset) {
|
||||
LOG_CRITICAL(Debug, "E3 {} != {} | {} {} {} {} {} {} {} {} {} {} {} {} | {} {} {}",
|
||||
unswizzled_offset, other, swizzled_offset, block_height_mask, block_height,
|
||||
block_depth_mask, lesser_x_shift, lesser_slice_size, gobs_in_x,
|
||||
block_depth, pitch, height, depth, pitch_height, x, y, z);
|
||||
abort();
|
||||
}
|
||||
u8* const dst = &output[unswizzled_offset];
|
||||
const u8* const src = &input[swizzled_offset];
|
||||
std::memcpy(dst, src, BYTES_PER_PIXEL);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
|
||||
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
|
||||
u32 block_height, u32 block_depth, u32 stride_alignment) {
|
||||
// The origin of the transformation can be configured here, leave it as zero as the current API
|
||||
// doesn't expose it.
|
||||
static constexpr u32 origin_x = 0;
|
||||
static constexpr u32 origin_y = 0;
|
||||
static constexpr u32 origin_z = 0;
|
||||
|
||||
// We can configure here a custom pitch
|
||||
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
|
||||
const u32 pitch = width * BYTES_PER_PIXEL;
|
||||
@@ -42,32 +140,34 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
|
||||
const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
|
||||
|
||||
for (u32 slice = 0; slice < depth; ++slice) {
|
||||
const u32 z = slice + origin_z;
|
||||
const u32 z = slice;
|
||||
const u32 offset_z = (z >> block_depth) * slice_size +
|
||||
((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
|
||||
const u32 slice_pitch_height = slice * pitch * height;
|
||||
for (u32 line = 0; line < height; ++line) {
|
||||
const u32 y = line + origin_y;
|
||||
const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
|
||||
const u32 y = line;
|
||||
const u32 table = ((y & 6) << 5) | ((y & 1) << 4);
|
||||
|
||||
const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
|
||||
const u32 offset_y = (block_y >> block_height) * block_size +
|
||||
((block_y & block_height_mask) << GOB_SIZE_SHIFT);
|
||||
const u32 line_pitch = line * pitch;
|
||||
|
||||
for (u32 column = 0; column < width; ++column) {
|
||||
const u32 x = (column + origin_x) * BYTES_PER_PIXEL;
|
||||
const u32 x = column * BYTES_PER_PIXEL;
|
||||
const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
|
||||
const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
|
||||
const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
|
||||
const u32 entry = ((x & 32) << 3) | ((x & 16) << 1) | (x & 15) | table;
|
||||
const u32 swizzled_offset = base_swizzled_offset | entry;
|
||||
|
||||
const u32 unswizzled_offset =
|
||||
slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
|
||||
const u32 unswizzled_offset = slice_pitch_height + line_pitch + x;
|
||||
|
||||
if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
|
||||
offset >= input.size()) {
|
||||
// TODO(Rodrigo): This is an out of bounds access that should never happen. To
|
||||
// avoid crashing the emulator, break.
|
||||
ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
|
||||
// ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -84,36 +184,61 @@ template <bool TO_LINEAR>
|
||||
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
||||
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
|
||||
switch (bytes_per_pixel) {
|
||||
case 1:
|
||||
return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height,
|
||||
#define BPP_CASE(x) \
|
||||
case x: \
|
||||
return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \
|
||||
block_depth, stride_alignment);
|
||||
case 2:
|
||||
return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 3:
|
||||
return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 4:
|
||||
return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 6:
|
||||
return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 8:
|
||||
return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 12:
|
||||
return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
case 16:
|
||||
return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height,
|
||||
block_depth, stride_alignment);
|
||||
BPP_CASE(1)
|
||||
BPP_CASE(2)
|
||||
BPP_CASE(3)
|
||||
BPP_CASE(4)
|
||||
BPP_CASE(6)
|
||||
BPP_CASE(8)
|
||||
BPP_CASE(12)
|
||||
BPP_CASE(16)
|
||||
#undef BPP_CASE
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
[[maybe_unused]] void Unswizzle(std::span<u8> output, std::span<const u8> input,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height, u32 block_depth, u32 stride_alignment) {
|
||||
switch (bytes_per_pixel) {
|
||||
#define BPP_CASE(x) \
|
||||
case x: \
|
||||
return UnswizzleImpl<x>(output, input, width, height, depth, block_height, block_depth, \
|
||||
stride_alignment);
|
||||
BPP_CASE(1)
|
||||
BPP_CASE(2)
|
||||
BPP_CASE(3)
|
||||
BPP_CASE(4)
|
||||
BPP_CASE(6)
|
||||
BPP_CASE(8)
|
||||
BPP_CASE(12)
|
||||
BPP_CASE(16)
|
||||
#undef BPP_CASE
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void CalculateUnswizzle(VideoCommon::UnswizzlePushConstants& result, u32 bytes_per_pixel, u32 width,
|
||||
u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||
u32 stride_alignment) {
|
||||
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
|
||||
result.bytes_per_pixel = bytes_per_pixel;
|
||||
result.pitch = width * bytes_per_pixel;
|
||||
result.height = height;
|
||||
result.depth = depth;
|
||||
result.block_height = block_height;
|
||||
result.block_depth = block_depth;
|
||||
result.gobs_in_x = Common::DivCeilLog2(stride, 6U);
|
||||
result.dcl2 = Common::DivCeilLog2(stride, 6U);
|
||||
}
|
||||
|
||||
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
|
||||
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||
u32 stride_alignment) {
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <span>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Texture {
|
||||
@@ -40,6 +41,10 @@ constexpr SwizzleTable MakeSwizzleTable() {
|
||||
}
|
||||
constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable();
|
||||
|
||||
void CalculateUnswizzle(VideoCommon::UnswizzlePushConstants& result, u32 bytes_per_pixel, u32 width,
|
||||
u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||
u32 stride_alignment);
|
||||
|
||||
/// Unswizzles a block linear texture into linear memory.
|
||||
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
|
||||
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/device_memory.h"
|
||||
#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
@@ -42,6 +43,7 @@ enum class NvidiaArchitecture {
|
||||
|
||||
constexpr std::array REQUIRED_EXTENSIONS{
|
||||
VK_KHR_MAINTENANCE1_EXTENSION_NAME,
|
||||
VK_KHR_MAINTENANCE3_EXTENSION_NAME,
|
||||
VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
|
||||
VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
|
||||
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
|
||||
@@ -63,6 +65,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
|
||||
#endif
|
||||
#ifdef __unix__
|
||||
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
||||
VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -754,6 +757,7 @@ void Device::CheckSuitability(bool requires_swapchain) const {
|
||||
const VkPhysicalDeviceLimits& limits{properties.limits};
|
||||
const std::array limits_report{
|
||||
LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
|
||||
LimitTuple{134217728, limits.maxStorageBufferRange, "maxUniformBufferRange"},
|
||||
LimitTuple{16, limits.maxViewports, "maxViewports"},
|
||||
LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
|
||||
LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
|
||||
@@ -900,6 +904,27 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
||||
|
||||
VkPhysicalDeviceProperties2KHR physical_properties;
|
||||
physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
|
||||
{
|
||||
VkPhysicalDeviceMaintenance3Properties properties3{};
|
||||
properties3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES;
|
||||
properties3.pNext = nullptr;
|
||||
VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_properties{};
|
||||
host_properties.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
|
||||
host_properties.pNext = &properties3;
|
||||
physical_properties.pNext = &host_properties;
|
||||
physical.GetProperties2KHR(physical_properties);
|
||||
if (properties3.maxMemoryAllocationSize < Core::DramMemoryMap::GiB) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Not enough memory for Vulkan host memory {} < {}",
|
||||
properties3.maxMemoryAllocationSize, Core::DramMemoryMap::GiB);
|
||||
abort();
|
||||
}
|
||||
if (host_properties.minImportedHostPointerAlignment > 4096) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unexpected minImportedHostPointerAlignment {}",
|
||||
host_properties.minImportedHostPointerAlignment);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (has_khr_shader_float16_int8) {
|
||||
VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
|
||||
|
||||
@@ -180,6 +180,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||
#ifdef _WIN32
|
||||
X(vkGetMemoryWin32HandleKHR);
|
||||
#endif
|
||||
X(vkGetMemoryHostPointerPropertiesEXT);
|
||||
X(vkGetQueryPoolResults);
|
||||
X(vkGetPipelineExecutablePropertiesKHR);
|
||||
X(vkGetPipelineExecutableStatisticsKHR);
|
||||
@@ -811,6 +812,17 @@ VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noe
|
||||
return requirements;
|
||||
}
|
||||
|
||||
u32 Device::GetMemoryHostPointerProperties(const void* ptr) const noexcept {
|
||||
VkMemoryHostPointerPropertiesEXT properties{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
|
||||
.pNext = nullptr,
|
||||
.memoryTypeBits = 0,
|
||||
};
|
||||
Check(dld->vkGetMemoryHostPointerPropertiesEXT(
|
||||
handle, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, ptr, &properties));
|
||||
return properties.memoryTypeBits;
|
||||
}
|
||||
|
||||
std::vector<VkPipelineExecutablePropertiesKHR> Device::GetPipelineExecutablePropertiesKHR(
|
||||
VkPipeline pipeline) const {
|
||||
const VkPipelineInfoKHR info{
|
||||
|
||||
@@ -295,6 +295,7 @@ struct DeviceDispatch : InstanceDispatch {
|
||||
#ifdef _WIN32
|
||||
PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
|
||||
#endif
|
||||
PFN_vkGetMemoryHostPointerPropertiesEXT vkGetMemoryHostPointerPropertiesEXT{};
|
||||
PFN_vkGetPipelineExecutablePropertiesKHR vkGetPipelineExecutablePropertiesKHR{};
|
||||
PFN_vkGetPipelineExecutableStatisticsKHR vkGetPipelineExecutableStatisticsKHR{};
|
||||
PFN_vkGetQueryPoolResults vkGetQueryPoolResults{};
|
||||
@@ -881,6 +882,8 @@ public:
|
||||
|
||||
VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
|
||||
|
||||
u32 GetMemoryHostPointerProperties(const void* ptr) const noexcept;
|
||||
|
||||
std::vector<VkPipelineExecutablePropertiesKHR> GetPipelineExecutablePropertiesKHR(
|
||||
VkPipeline pipeline) const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user