Compare commits
1 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0bea13ed8 |
@@ -133,8 +133,8 @@ add_library(video_core STATIC
|
||||
renderer_opengl/gl_shader_util.h
|
||||
renderer_opengl/gl_state_tracker.cpp
|
||||
renderer_opengl/gl_state_tracker.h
|
||||
renderer_opengl/gl_staging_buffer_pool.cpp
|
||||
renderer_opengl/gl_staging_buffer_pool.h
|
||||
renderer_opengl/gl_stream_buffer.cpp
|
||||
renderer_opengl/gl_stream_buffer.h
|
||||
renderer_opengl/gl_texture_cache.cpp
|
||||
renderer_opengl/gl_texture_cache.h
|
||||
renderer_opengl/gl_texture_cache_base.cpp
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/control/channel_state_cache.inc"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
@@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12
|
||||
MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
|
||||
MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
|
||||
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>;
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -64,17 +64,22 @@ void BufferCache<P>::RunGarbageCollector() {
|
||||
template <class P>
|
||||
void BufferCache<P>::TickFrame() {
|
||||
// Calculate hits and shots and move hit bits to the right
|
||||
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
||||
uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
||||
uniform_cache_shots.begin() + 1);
|
||||
uniform_cache_hits[0] = 0;
|
||||
uniform_cache_shots[0] = 0;
|
||||
|
||||
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
|
||||
channel_state->uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(),
|
||||
channel_state->uniform_cache_shots.end());
|
||||
std::copy_n(channel_state->uniform_cache_hits.begin(),
|
||||
channel_state->uniform_cache_hits.size() - 1,
|
||||
channel_state->uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(channel_state->uniform_cache_shots.begin(),
|
||||
channel_state->uniform_cache_shots.size() - 1,
|
||||
channel_state->uniform_cache_shots.begin() + 1);
|
||||
channel_state->uniform_cache_hits[0] = 0;
|
||||
channel_state->uniform_cache_shots[0] = 0;
|
||||
|
||||
const bool skip_preferred = hits * 256 < shots * 251;
|
||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
|
||||
// If we can obtain the memory info, use it instead of the estimate.
|
||||
if (runtime.CanReportMemoryUsage()) {
|
||||
@@ -164,10 +169,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
||||
BufferId buffer_a;
|
||||
BufferId buffer_b;
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
|
||||
buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
auto& src_buffer = slot_buffers[buffer_a];
|
||||
auto& dest_buffer = slot_buffers[buffer_b];
|
||||
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
|
||||
@@ -272,30 +277,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
|
||||
.size = size,
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
uniform_buffers[stage][index] = binding;
|
||||
channel_state->uniform_buffers[stage][index] = binding;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
|
||||
uniform_buffers[stage][index] = NULL_BINDING;
|
||||
channel_state->uniform_buffers[stage][index] = NULL_BINDING;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
|
||||
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
DoUpdateGraphicsBuffers(is_indexed);
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeBuffers() {
|
||||
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
DoUpdateComputeBuffers();
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -338,98 +343,102 @@ template <class P>
|
||||
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
||||
const UniformBufferSizes* sizes) {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
if (enabled_uniform_buffer_masks != mask) {
|
||||
if (channel_state->enabled_uniform_buffer_masks != mask) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
fast_bound_uniform_buffers.fill(0);
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
}
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
uniform_buffer_binding_sizes.fill({});
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
}
|
||||
enabled_uniform_buffer_masks = mask;
|
||||
uniform_buffer_sizes = sizes;
|
||||
channel_state->enabled_uniform_buffer_masks = mask;
|
||||
channel_state->uniform_buffer_sizes = sizes;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
|
||||
const ComputeUniformBufferSizes* sizes) {
|
||||
enabled_compute_uniform_buffer_mask = mask;
|
||||
compute_uniform_buffer_sizes = sizes;
|
||||
channel_state->enabled_compute_uniform_buffer_mask = mask;
|
||||
channel_state->compute_uniform_buffer_sizes = sizes;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
enabled_storage_buffers[stage] = 0;
|
||||
written_storage_buffers[stage] = 0;
|
||||
channel_state->enabled_storage_buffers[stage] = 0;
|
||||
channel_state->written_storage_buffers[stage] = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
|
||||
u32 cbuf_offset, bool is_written) {
|
||||
enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
||||
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
||||
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
channel_state->storage_buffers[stage][ssbo_index] =
|
||||
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
|
||||
enabled_texture_buffers[stage] = 0;
|
||||
written_texture_buffers[stage] = 0;
|
||||
image_texture_buffers[stage] = 0;
|
||||
channel_state->enabled_texture_buffers[stage] = 0;
|
||||
channel_state->written_texture_buffers[stage] = 0;
|
||||
channel_state->image_texture_buffers[stage] = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
|
||||
u32 size, PixelFormat format, bool is_written,
|
||||
bool is_image) {
|
||||
enabled_texture_buffers[stage] |= 1U << tbo_index;
|
||||
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
||||
channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index;
|
||||
channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
||||
channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
channel_state->texture_buffers[stage][tbo_index] =
|
||||
GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
||||
enabled_compute_storage_buffers = 0;
|
||||
written_compute_storage_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
channel_state->enabled_compute_storage_buffers = 0;
|
||||
channel_state->written_compute_storage_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
|
||||
bool is_written) {
|
||||
enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
|
||||
|
||||
const auto& cbufs = launch_desc.const_buffer_config;
|
||||
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
|
||||
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
channel_state->compute_storage_buffers[ssbo_index] =
|
||||
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeTextureBuffers() {
|
||||
enabled_compute_texture_buffers = 0;
|
||||
written_compute_texture_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
channel_state->enabled_compute_texture_buffers = 0;
|
||||
channel_state->written_compute_texture_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format, bool is_written, bool is_image) {
|
||||
enabled_compute_texture_buffers |= 1U << tbo_index;
|
||||
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
||||
channel_state->enabled_compute_texture_buffers |= 1U << tbo_index;
|
||||
channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
||||
channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
channel_state->compute_texture_buffers[tbo_index] =
|
||||
GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -465,6 +474,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
|
||||
if (committed_ranges.empty()) {
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
|
||||
async_buffers.emplace_back(std::optional<Async_Buffer>{});
|
||||
}
|
||||
return;
|
||||
@@ -525,6 +535,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||
committed_ranges.clear();
|
||||
if (downloads.empty()) {
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
|
||||
async_buffers.emplace_back(std::optional<Async_Buffer>{});
|
||||
}
|
||||
return;
|
||||
@@ -670,13 +681,13 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostIndexBuffer() {
|
||||
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
||||
TouchBuffer(buffer, index_buffer.buffer_id);
|
||||
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
||||
const u32 size = index_buffer.size;
|
||||
Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
|
||||
TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
|
||||
const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr);
|
||||
const u32 size = channel_state->index_buffer.size;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
if constexpr (USE_MEMORY_MAPS) {
|
||||
auto upload_staging = runtime.UploadStagingBuffer(size);
|
||||
std::array<BufferCopy, 1> copies{
|
||||
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
||||
@@ -687,7 +698,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
||||
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
||||
}
|
||||
} else {
|
||||
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
||||
SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size);
|
||||
}
|
||||
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const u32 new_offset =
|
||||
@@ -704,7 +715,7 @@ template <class P>
|
||||
void BufferCache<P>::BindHostVertexBuffers() {
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
const Binding& binding = vertex_buffers[index];
|
||||
const Binding& binding = channel_state->vertex_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
@@ -727,19 +738,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
bind_buffer(count_buffer_binding);
|
||||
bind_buffer(channel_state->count_buffer_binding);
|
||||
}
|
||||
bind_buffer(indirect_buffer_binding);
|
||||
bind_buffer(channel_state->indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
u32 dirty = ~0U;
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
|
||||
dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0);
|
||||
}
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
const bool needs_bind = ((dirty >> index) & 1) != 0;
|
||||
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
@@ -751,13 +762,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
|
||||
bool needs_bind) {
|
||||
const Binding& binding = uniform_buffers[stage][index];
|
||||
const Binding& binding = channel_state->uniform_buffers[stage][index];
|
||||
const VAddr cpu_addr = binding.cpu_addr;
|
||||
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
|
||||
const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||
size <= uniform_buffer_skip_cache_size &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(cpu_addr, size);
|
||||
if (use_fast_buffer) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
@@ -765,11 +776,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
// Fast path for Nvidia
|
||||
const bool should_fast_bind =
|
||||
!HasFastUniformBufferBound(stage, binding_index) ||
|
||||
uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
if (should_fast_bind) {
|
||||
// We only have to bind when the currently bound buffer is not the fast version
|
||||
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
||||
}
|
||||
const auto span = ImmediateBufferWithData(cpu_addr, size);
|
||||
@@ -778,8 +789,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
}
|
||||
}
|
||||
if constexpr (IS_OPENGL) {
|
||||
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
||||
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
||||
@@ -789,15 +800,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
// Classic cached path
|
||||
const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
|
||||
if (sync_cached) {
|
||||
++uniform_cache_hits[0];
|
||||
++channel_state->uniform_cache_hits[0];
|
||||
}
|
||||
++uniform_cache_shots[0];
|
||||
++channel_state->uniform_cache_shots[0];
|
||||
|
||||
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
||||
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
||||
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
}
|
||||
if (!needs_bind) {
|
||||
return;
|
||||
@@ -805,14 +816,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
const u32 offset = buffer.Offset(cpu_addr);
|
||||
if constexpr (IS_OPENGL) {
|
||||
// Fast buffer will be unbound
|
||||
fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||
|
||||
// Mark the index as dirty if offset doesn't match
|
||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||
dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||
}
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
|
||||
@@ -824,15 +835,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const Binding& binding = storage_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const Binding& binding = channel_state->storage_buffers[stage][index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
@@ -844,8 +855,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
||||
const TextureBufferBinding& binding = texture_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
|
||||
const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
@@ -853,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
@@ -870,7 +881,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
||||
return;
|
||||
}
|
||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||
const Binding& binding = transform_feedback_buffers[index];
|
||||
const Binding& binding = channel_state->transform_feedback_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
@@ -885,15 +896,16 @@ template <class P>
|
||||
void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
// Mark all uniform buffers as dirty
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
fast_bound_uniform_buffers.fill(0);
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
}
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
const Binding& binding = compute_uniform_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
const Binding& binding = channel_state->compute_uniform_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
|
||||
const u32 size =
|
||||
std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
@@ -909,15 +921,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||
const Binding& binding = compute_storage_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
|
||||
const Binding& binding = channel_state->compute_storage_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
|
||||
const bool is_written =
|
||||
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
@@ -929,8 +942,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
||||
const TextureBufferBinding& binding = compute_texture_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
|
||||
const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
@@ -938,7 +951,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
@@ -952,7 +965,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
template <class P>
|
||||
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
if (is_indexed) {
|
||||
UpdateIndexBuffer();
|
||||
}
|
||||
@@ -966,7 +979,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
||||
if (current_draw_indirect) {
|
||||
UpdateDrawIndirect();
|
||||
}
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -997,7 +1010,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
||||
slot_buffers.erase(inline_buffer_id);
|
||||
inline_buffer_id = CreateBuffer(0, buffer_size);
|
||||
}
|
||||
index_buffer = Binding{
|
||||
channel_state->index_buffer = Binding{
|
||||
.cpu_addr = 0,
|
||||
.size = inline_index_size,
|
||||
.buffer_id = inline_buffer_id,
|
||||
@@ -1013,10 +1026,10 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
||||
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
|
||||
const u32 size = std::min(address_size, draw_size);
|
||||
if (size == 0 || !cpu_addr) {
|
||||
index_buffer = NULL_BINDING;
|
||||
channel_state->index_buffer = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
index_buffer = Binding{
|
||||
channel_state->index_buffer = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = FindBuffer(*cpu_addr, size),
|
||||
@@ -1049,13 +1062,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
u32 size = address_size; // TODO: Analyze stride and number of vertices
|
||||
if (array.enable == 0 || size == 0 || !cpu_addr) {
|
||||
vertex_buffers[index] = NULL_BINDING;
|
||||
channel_state->vertex_buffers[index] = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
|
||||
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
|
||||
}
|
||||
vertex_buffers[index] = Binding{
|
||||
channel_state->vertex_buffers[index] = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = FindBuffer(*cpu_addr, size),
|
||||
@@ -1077,23 +1090,24 @@ void BufferCache<P>::UpdateDrawIndirect() {
|
||||
};
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32),
|
||||
channel_state->count_buffer_binding);
|
||||
}
|
||||
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||
indirect_buffer_binding);
|
||||
channel_state->indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
Binding& binding = uniform_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
Binding& binding = channel_state->uniform_buffers[stage][index];
|
||||
if (binding.buffer_id) {
|
||||
// Already updated
|
||||
return;
|
||||
}
|
||||
// Mark as dirty
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty_uniform_buffers[stage] |= 1U << index;
|
||||
channel_state->dirty_uniform_buffers[stage] |= 1U << index;
|
||||
}
|
||||
// Resolve buffer
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
@@ -1102,10 +1116,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
||||
const u32 written_mask = written_storage_buffers[stage];
|
||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const u32 written_mask = channel_state->written_storage_buffers[stage];
|
||||
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = storage_buffers[stage][index];
|
||||
Binding& binding = channel_state->storage_buffers[stage][index];
|
||||
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
binding.buffer_id = buffer_id;
|
||||
// Mark buffer as written if needed
|
||||
@@ -1117,11 +1131,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
||||
Binding& binding = texture_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
|
||||
Binding& binding = channel_state->texture_buffers[stage][index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark buffer as written if needed
|
||||
if (((written_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
@@ -1144,11 +1158,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
||||
const u32 size = binding.size;
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (binding.enable == 0 || size == 0 || !cpu_addr) {
|
||||
transform_feedback_buffers[index] = NULL_BINDING;
|
||||
channel_state->transform_feedback_buffers[index] = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
||||
transform_feedback_buffers[index] = Binding{
|
||||
channel_state->transform_feedback_buffers[index] = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = buffer_id,
|
||||
@@ -1158,8 +1172,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeUniformBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
Binding& binding = compute_uniform_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
Binding& binding = channel_state->compute_uniform_buffers[index];
|
||||
binding = NULL_BINDING;
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
|
||||
@@ -1176,12 +1190,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeStorageBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = compute_storage_buffers[index];
|
||||
Binding& binding = channel_state->compute_storage_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
@@ -1189,11 +1203,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeTextureBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
||||
Binding& binding = compute_texture_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
|
||||
Binding& binding = channel_state->compute_texture_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((written_compute_texture_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
@@ -1444,7 +1458,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
|
||||
template <class P>
|
||||
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||
std::span<BufferCopy> copies) {
|
||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
if constexpr (USE_MEMORY_MAPS) {
|
||||
MappedUploadMemory(buffer, total_size_bytes, copies);
|
||||
} else {
|
||||
ImmediateUploadMemory(buffer, largest_copy, copies);
|
||||
@@ -1455,7 +1469,7 @@ template <class P>
|
||||
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
|
||||
[[maybe_unused]] u64 largest_copy,
|
||||
[[maybe_unused]] std::span<const BufferCopy> copies) {
|
||||
if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
if constexpr (!USE_MEMORY_MAPS) {
|
||||
std::span<u8> immediate_buffer;
|
||||
for (const BufferCopy& copy : copies) {
|
||||
std::span<const u8> upload_span;
|
||||
@@ -1514,7 +1528,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
||||
auto& buffer = slot_buffers[buffer_id];
|
||||
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
|
||||
|
||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||
if constexpr (USE_MEMORY_MAPS) {
|
||||
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
|
||||
std::array copies{BufferCopy{
|
||||
.src_offset = upload_staging.offset,
|
||||
@@ -1608,13 +1622,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||
const auto replace = [scalar_replace](std::span<Binding> bindings) {
|
||||
std::ranges::for_each(bindings, scalar_replace);
|
||||
};
|
||||
scalar_replace(index_buffer);
|
||||
replace(vertex_buffers);
|
||||
std::ranges::for_each(uniform_buffers, replace);
|
||||
std::ranges::for_each(storage_buffers, replace);
|
||||
replace(transform_feedback_buffers);
|
||||
replace(compute_uniform_buffers);
|
||||
replace(compute_storage_buffers);
|
||||
scalar_replace(channel_state->index_buffer);
|
||||
replace(channel_state->vertex_buffers);
|
||||
std::ranges::for_each(channel_state->uniform_buffers, replace);
|
||||
std::ranges::for_each(channel_state->storage_buffers, replace);
|
||||
replace(channel_state->transform_feedback_buffers);
|
||||
replace(channel_state->compute_uniform_buffers);
|
||||
replace(channel_state->compute_storage_buffers);
|
||||
|
||||
// Mark the whole buffer as CPU written to stop tracking CPU writes
|
||||
if (!do_not_mark) {
|
||||
@@ -1632,8 +1646,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
||||
template <class P>
|
||||
void BufferCache<P>::NotifyBufferDeletion() {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
uniform_buffer_binding_sizes.fill({});
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
flags[Dirty::IndexBuffer] = true;
|
||||
@@ -1641,13 +1655,12 @@ void BufferCache<P>::NotifyBufferDeletion() {
|
||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
flags[Dirty::VertexBuffer0 + index] = true;
|
||||
}
|
||||
has_deleted_buffers = true;
|
||||
channel_state->has_deleted_buffers = true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
|
||||
u32 cbuf_index,
|
||||
bool is_written) const {
|
||||
Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
|
||||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const auto size = [&]() {
|
||||
const bool is_nvn_cbuf = cbuf_index == 0;
|
||||
@@ -1679,8 +1692,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
|
||||
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
|
||||
TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
TextureBufferBinding binding;
|
||||
if (!cpu_addr || size == 0) {
|
||||
@@ -1719,7 +1732,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
||||
template <class P>
|
||||
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||
} else {
|
||||
// Only OpenGL has fast uniform buffers
|
||||
return false;
|
||||
@@ -1728,14 +1741,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||
auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||
auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 {
|
||||
MarkQuery = 3,
|
||||
};
|
||||
|
||||
template <typename P>
|
||||
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
struct Binding {
|
||||
VAddr cpu_addr{};
|
||||
u32 size{};
|
||||
BufferId buffer_id;
|
||||
};
|
||||
|
||||
struct TextureBufferBinding : Binding {
|
||||
PixelFormat format;
|
||||
};
|
||||
|
||||
static constexpr Binding NULL_BINDING{
|
||||
.cpu_addr = 0,
|
||||
.size = 0,
|
||||
.buffer_id = NULL_BUFFER_ID,
|
||||
};
|
||||
|
||||
class BufferCacheChannelInfo : public ChannelInfo {
|
||||
public:
|
||||
BufferCacheChannelInfo() = delete;
|
||||
BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {}
|
||||
BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete;
|
||||
BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete;
|
||||
|
||||
Binding index_buffer;
|
||||
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
|
||||
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
|
||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
|
||||
u32 enabled_compute_uniform_buffer_mask = 0;
|
||||
|
||||
const UniformBufferSizes* uniform_buffer_sizes{};
|
||||
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_storage_buffers{};
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
u32 enabled_compute_texture_buffers = 0;
|
||||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, 16> uniform_cache_hits{};
|
||||
std::array<u32, 16> uniform_cache_shots{};
|
||||
|
||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||
|
||||
bool has_deleted_buffers = false;
|
||||
|
||||
std::array<u32, NUM_STAGES> dirty_uniform_buffers{};
|
||||
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
|
||||
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>
|
||||
uniform_buffer_binding_sizes{};
|
||||
};
|
||||
|
||||
template <class P>
|
||||
class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> {
|
||||
// Page size for caching purposes.
|
||||
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
|
||||
static constexpr u32 CACHING_PAGEBITS = 16;
|
||||
@@ -103,9 +173,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
||||
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
|
||||
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
|
||||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
|
||||
@@ -150,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
||||
using OverlapSection = boost::icl::inter_section<int>;
|
||||
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
|
||||
|
||||
struct Empty {};
|
||||
|
||||
struct OverlapResult {
|
||||
std::vector<BufferId> ids;
|
||||
VAddr begin;
|
||||
@@ -159,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
||||
bool has_stream_leap = false;
|
||||
};
|
||||
|
||||
struct Binding {
|
||||
VAddr cpu_addr{};
|
||||
u32 size{};
|
||||
BufferId buffer_id;
|
||||
};
|
||||
|
||||
struct TextureBufferBinding : Binding {
|
||||
PixelFormat format;
|
||||
};
|
||||
|
||||
static constexpr Binding NULL_BINDING{
|
||||
.cpu_addr = 0,
|
||||
.size = 0,
|
||||
.buffer_id = NULL_BUFFER_ID,
|
||||
};
|
||||
|
||||
public:
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
|
||||
|
||||
@@ -497,51 +544,6 @@ private:
|
||||
|
||||
u32 last_index_count = 0;
|
||||
|
||||
Binding index_buffer;
|
||||
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
|
||||
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
|
||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
|
||||
u32 enabled_compute_uniform_buffer_mask = 0;
|
||||
|
||||
const UniformBufferSizes* uniform_buffer_sizes{};
|
||||
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_storage_buffers{};
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
u32 enabled_compute_texture_buffers = 0;
|
||||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, 16> uniform_cache_hits{};
|
||||
std::array<u32, 16> uniform_cache_shots{};
|
||||
|
||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||
|
||||
bool has_deleted_buffers = false;
|
||||
|
||||
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
|
||||
dirty_uniform_buffers{};
|
||||
std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
|
||||
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
|
||||
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
|
||||
uniform_buffer_binding_sizes{};
|
||||
|
||||
MemoryTracker memory_tracker;
|
||||
IntervalSet uncommitted_ranges;
|
||||
IntervalSet common_ranges;
|
||||
|
||||
@@ -106,10 +106,8 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
|
||||
return views.back().texture.handle;
|
||||
}
|
||||
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
|
||||
StagingBufferPool& staging_buffer_pool_)
|
||||
: device{device_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
||||
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
|
||||
use_assembly_shaders{device.UseAssemblyShaders()},
|
||||
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
|
||||
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
|
||||
@@ -119,7 +117,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
|
||||
for (auto& stage_uniforms : fast_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
|
||||
glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr,
|
||||
GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
@@ -142,14 +140,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
|
||||
}();
|
||||
}
|
||||
|
||||
StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.RequestUploadBuffer(size);
|
||||
}
|
||||
|
||||
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.RequestDownloadBuffer(size);
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
if (device.CanReportMemoryUsage()) {
|
||||
return device_access_memory - device.GetCurrentDedicatedVideoMemory();
|
||||
@@ -157,47 +147,13 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
return 2_GiB;
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
if (barrier) {
|
||||
PreCopyBarrier();
|
||||
}
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
|
||||
static_cast<GLintptr>(copy.dst_offset),
|
||||
static_cast<GLsizeiptr>(copy.size));
|
||||
}
|
||||
if (barrier) {
|
||||
PostCopyBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
|
||||
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies) {
|
||||
CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PreCopyBarrier() {
|
||||
// TODO: finer grained barrier?
|
||||
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::PostCopyBarrier() {
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
glFinish();
|
||||
for (const VideoCommon::BufferCopy& copy : copies) {
|
||||
glCopyNamedBufferSubData(
|
||||
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
|
||||
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@@ -60,28 +60,11 @@ class BufferCacheRuntime {
|
||||
public:
|
||||
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
|
||||
|
||||
explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
|
||||
|
||||
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
|
||||
|
||||
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
|
||||
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
|
||||
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
|
||||
explicit BufferCacheRuntime(const Device& device_);
|
||||
|
||||
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void PreCopyBarrier();
|
||||
void PostCopyBarrier();
|
||||
void Finish();
|
||||
|
||||
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
|
||||
|
||||
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
|
||||
@@ -186,7 +169,6 @@ private:
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
|
||||
bool has_fast_buffer_sub_data = false;
|
||||
bool use_assembly_shaders = false;
|
||||
@@ -219,7 +201,7 @@ private:
|
||||
struct BufferCacheParams {
|
||||
using Runtime = OpenGL::BufferCacheRuntime;
|
||||
using Buffer = OpenGL::Buffer;
|
||||
using Async_Buffer = OpenGL::StagingBufferMap;
|
||||
using Async_Buffer = u32;
|
||||
using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
|
||||
|
||||
static constexpr bool IS_OPENGL = true;
|
||||
@@ -227,12 +209,9 @@ struct BufferCacheParams {
|
||||
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
|
||||
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
|
||||
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
static constexpr bool USE_MEMORY_MAPS = false;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
|
||||
|
||||
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
|
||||
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
@@ -59,9 +58,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
|
||||
program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
|
||||
texture_cache(texture_cache_runtime, *this),
|
||||
buffer_cache_runtime(device, staging_buffer_pool),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
|
||||
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
|
||||
state_tracker, gpu.ShaderNotify()),
|
||||
|
||||
@@ -230,7 +230,6 @@ private:
|
||||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
StagingBufferPool staging_buffer_pool;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
StagingBufferMap::~StagingBufferMap() {
|
||||
if (sync) {
|
||||
sync->Create();
|
||||
}
|
||||
}
|
||||
|
||||
StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
|
||||
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
|
||||
|
||||
StagingBuffers::~StagingBuffers() = default;
|
||||
|
||||
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
|
||||
MICROPROFILE_SCOPE(OpenGL_BufferRequest);
|
||||
|
||||
const size_t index = RequestBuffer(requested_size);
|
||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
||||
sync_indices[index] = insert_fence ? ++current_sync_index : 0;
|
||||
return StagingBufferMap{
|
||||
.mapped_span = std::span(maps[index], requested_size),
|
||||
.sync = sync,
|
||||
.buffer = buffers[index].handle,
|
||||
};
|
||||
}
|
||||
|
||||
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
|
||||
return *index;
|
||||
}
|
||||
|
||||
OGLBuffer& buffer = buffers.emplace_back();
|
||||
buffer.Create();
|
||||
const auto next_pow2_size = Common::NextPow2(requested_size);
|
||||
glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr,
|
||||
storage_flags | GL_MAP_PERSISTENT_BIT);
|
||||
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size,
|
||||
map_flags | GL_MAP_PERSISTENT_BIT)));
|
||||
syncs.emplace_back();
|
||||
sync_indices.emplace_back();
|
||||
sizes.push_back(next_pow2_size);
|
||||
|
||||
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
|
||||
maps.size() == sizes.size());
|
||||
|
||||
return buffers.size() - 1;
|
||||
}
|
||||
|
||||
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
|
||||
size_t known_unsignaled_index = current_sync_index + 1;
|
||||
size_t smallest_buffer = std::numeric_limits<size_t>::max();
|
||||
std::optional<size_t> found;
|
||||
const size_t num_buffers = sizes.size();
|
||||
for (size_t index = 0; index < num_buffers; ++index) {
|
||||
const size_t buffer_size = sizes[index];
|
||||
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
|
||||
continue;
|
||||
}
|
||||
if (syncs[index].handle != 0) {
|
||||
if (sync_indices[index] >= known_unsignaled_index) {
|
||||
// This fence is later than a fence that is known to not be signaled
|
||||
continue;
|
||||
}
|
||||
if (!syncs[index].IsSignaled()) {
|
||||
// Since this fence hasn't been signaled, it's safe to assume all later
|
||||
// fences haven't been signaled either
|
||||
known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]);
|
||||
continue;
|
||||
}
|
||||
syncs[index].Release();
|
||||
}
|
||||
smallest_buffer = buffer_size;
|
||||
found = index;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
StreamBuffer::StreamBuffer() {
|
||||
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
buffer.Create();
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
|
||||
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
|
||||
mapped_pointer =
|
||||
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
|
||||
for (OGLSync& sync : fences) {
|
||||
sync.Create();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
|
||||
ASSERT(size < REGION_SIZE);
|
||||
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
|
||||
++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
for (size_t region = Region(free_iterator) + 1,
|
||||
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
|
||||
region < region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
if (iterator + size >= free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return {std::span(mapped_pointer + offset, size), offset};
|
||||
}
|
||||
|
||||
StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
|
||||
return upload_buffers.RequestMap(size, true);
|
||||
}
|
||||
|
||||
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
|
||||
return download_buffers.RequestMap(size, false);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
63
src/video_core/renderer_opengl/gl_stream_buffer.cpp
Normal file
63
src/video_core/renderer_opengl/gl_stream_buffer.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
StreamBuffer::StreamBuffer() {
|
||||
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
|
||||
buffer.Create();
|
||||
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
|
||||
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
|
||||
mapped_pointer =
|
||||
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
|
||||
for (OGLSync& sync : fences) {
|
||||
sync.Create();
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
|
||||
ASSERT(size < REGION_SIZE);
|
||||
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
|
||||
++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
for (size_t region = Region(free_iterator) + 1,
|
||||
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
|
||||
region < region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
if (iterator + size >= free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
if (iterator + size > STREAM_BUFFER_SIZE) {
|
||||
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
|
||||
fences[region].Create();
|
||||
}
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = size;
|
||||
|
||||
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
|
||||
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
|
||||
fences[region].Release();
|
||||
}
|
||||
}
|
||||
const size_t offset = iterator;
|
||||
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
return {std::span(mapped_pointer + offset, size), offset};
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -4,10 +4,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
@@ -19,35 +17,6 @@ namespace OpenGL {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
struct StagingBufferMap {
|
||||
~StagingBufferMap();
|
||||
|
||||
std::span<u8> mapped_span;
|
||||
size_t offset = 0;
|
||||
OGLSync* sync;
|
||||
GLuint buffer;
|
||||
};
|
||||
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
~StagingBuffers();
|
||||
|
||||
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
||||
|
||||
size_t RequestBuffer(size_t requested_size);
|
||||
|
||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||
|
||||
std::vector<OGLSync> syncs;
|
||||
std::vector<OGLBuffer> buffers;
|
||||
std::vector<u8*> maps;
|
||||
std::vector<size_t> sizes;
|
||||
std::vector<size_t> sync_indices;
|
||||
GLenum storage_flags;
|
||||
GLenum map_flags;
|
||||
size_t current_sync_index = 0;
|
||||
};
|
||||
|
||||
class StreamBuffer {
|
||||
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
|
||||
static constexpr size_t NUM_SYNCS = 16;
|
||||
@@ -79,17 +48,4 @@ private:
|
||||
std::array<OGLSync, NUM_SYNCS> fences;
|
||||
};
|
||||
|
||||
class StagingBufferPool {
|
||||
public:
|
||||
StagingBufferPool() = default;
|
||||
~StagingBufferPool() = default;
|
||||
|
||||
StagingBufferMap RequestUploadBuffer(size_t size);
|
||||
StagingBufferMap RequestDownloadBuffer(size_t size);
|
||||
|
||||
private:
|
||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
@@ -451,14 +451,19 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
|
||||
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
ImageBufferMap::~ImageBufferMap() {
|
||||
if (sync) {
|
||||
sync->Create();
|
||||
}
|
||||
}
|
||||
|
||||
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker_,
|
||||
StagingBufferPool& staging_buffer_pool_)
|
||||
: device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
util_shaders(program_manager), format_conversion_pass{util_shaders},
|
||||
resolution{Settings::values.resolution_info} {
|
||||
StateTracker& state_tracker_)
|
||||
: device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
|
||||
format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
|
||||
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
|
||||
for (size_t i = 0; i < TARGETS.size(); ++i) {
|
||||
const GLenum target = TARGETS[i];
|
||||
@@ -548,12 +553,12 @@ void TextureCacheRuntime::Finish() {
|
||||
glFinish();
|
||||
}
|
||||
|
||||
StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.RequestUploadBuffer(size);
|
||||
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
|
||||
return upload_buffers.RequestMap(size, true);
|
||||
}
|
||||
|
||||
StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return staging_buffer_pool.RequestDownloadBuffer(size);
|
||||
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
|
||||
return download_buffers.RequestMap(size, false);
|
||||
}
|
||||
|
||||
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
|
||||
@@ -638,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
||||
is_linear ? GL_LINEAR : GL_NEAREST);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
|
||||
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
switch (image.info.type) {
|
||||
case ImageType::e2D:
|
||||
@@ -680,6 +685,64 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
|
||||
return device.HasASTC();
|
||||
}
|
||||
|
||||
TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
|
||||
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
|
||||
|
||||
TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
|
||||
|
||||
ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
|
||||
bool insert_fence) {
|
||||
const size_t index = RequestBuffer(requested_size);
|
||||
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
|
||||
return ImageBufferMap{
|
||||
.mapped_span = std::span(maps[index], requested_size),
|
||||
.sync = sync,
|
||||
.buffer = buffers[index].handle,
|
||||
};
|
||||
}
|
||||
|
||||
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
|
||||
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
|
||||
return *index;
|
||||
}
|
||||
|
||||
OGLBuffer& buffer = buffers.emplace_back();
|
||||
buffer.Create();
|
||||
glNamedBufferStorage(buffer.handle, requested_size, nullptr,
|
||||
storage_flags | GL_MAP_PERSISTENT_BIT);
|
||||
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
|
||||
map_flags | GL_MAP_PERSISTENT_BIT)));
|
||||
|
||||
syncs.emplace_back();
|
||||
sizes.push_back(requested_size);
|
||||
|
||||
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
|
||||
maps.size() == sizes.size());
|
||||
|
||||
return buffers.size() - 1;
|
||||
}
|
||||
|
||||
std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
|
||||
size_t smallest_buffer = std::numeric_limits<size_t>::max();
|
||||
std::optional<size_t> found;
|
||||
const size_t num_buffers = sizes.size();
|
||||
for (size_t index = 0; index < num_buffers; ++index) {
|
||||
const size_t buffer_size = sizes[index];
|
||||
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
|
||||
continue;
|
||||
}
|
||||
if (syncs[index].handle != 0) {
|
||||
if (!syncs[index].IsSignaled()) {
|
||||
continue;
|
||||
}
|
||||
syncs[index].Release();
|
||||
}
|
||||
smallest_buffer = buffer_size;
|
||||
found = index;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
|
||||
VAddr cpu_addr_)
|
||||
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
|
||||
@@ -755,7 +818,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
|
||||
}
|
||||
}
|
||||
|
||||
void Image::UploadMemory(const StagingBufferMap& map,
|
||||
void Image::UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
UploadMemory(map.buffer, map.offset, copies);
|
||||
}
|
||||
@@ -802,7 +865,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
|
||||
}
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(StagingBufferMap& map,
|
||||
void Image::DownloadMemory(ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
DownloadMemory(map.buffer, map.offset, copies);
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
@@ -38,6 +37,15 @@ using VideoCommon::Region2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
using VideoCommon::SlotVector;
|
||||
|
||||
struct ImageBufferMap {
|
||||
~ImageBufferMap();
|
||||
|
||||
std::span<u8> mapped_span;
|
||||
size_t offset = 0;
|
||||
OGLSync* sync;
|
||||
GLuint buffer;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
GLenum compatibility_class;
|
||||
bool compatibility_by_size;
|
||||
@@ -66,15 +74,14 @@ class TextureCacheRuntime {
|
||||
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker,
|
||||
StagingBufferPool& staging_buffer_pool);
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheRuntime();
|
||||
|
||||
void Finish();
|
||||
|
||||
StagingBufferMap UploadStagingBuffer(size_t size);
|
||||
ImageBufferMap UploadStagingBuffer(size_t size);
|
||||
|
||||
StagingBufferMap DownloadStagingBuffer(size_t size);
|
||||
ImageBufferMap DownloadStagingBuffer(size_t size);
|
||||
|
||||
u64 GetDeviceLocalMemory() const {
|
||||
return device_access_memory;
|
||||
@@ -113,7 +120,7 @@ public:
|
||||
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
@@ -142,16 +149,35 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
~StagingBuffers();
|
||||
|
||||
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
||||
|
||||
size_t RequestBuffer(size_t requested_size);
|
||||
|
||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||
|
||||
std::vector<OGLSync> syncs;
|
||||
std::vector<OGLBuffer> buffers;
|
||||
std::vector<u8*> maps;
|
||||
std::vector<size_t> sizes;
|
||||
GLenum storage_flags;
|
||||
GLenum map_flags;
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
StagingBufferPool& staging_buffer_pool;
|
||||
|
||||
UtilShaders util_shaders;
|
||||
FormatConversionPass format_conversion_pass;
|
||||
|
||||
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
|
||||
bool has_broken_texture_view_formats = false;
|
||||
|
||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
|
||||
|
||||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
@@ -187,7 +213,7 @@ public:
|
||||
void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const StagingBufferMap& map,
|
||||
void UploadMemory(const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
|
||||
@@ -196,8 +222,7 @@ public:
|
||||
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(StagingBufferMap& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint StorageHandle() noexcept;
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_util.h"
|
||||
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
@@ -64,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
|
||||
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
@@ -112,7 +111,7 @@ void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
@@ -149,7 +148,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
|
||||
|
||||
@@ -190,7 +189,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
|
||||
@@ -16,23 +16,23 @@ namespace OpenGL {
|
||||
class Image;
|
||||
class ProgramManager;
|
||||
|
||||
struct StagingBufferMap;
|
||||
struct ImageBufferMap;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void ASTCDecode(Image& image, const StagingBufferMap& map,
|
||||
void ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void PitchUpload(Image& image, const StagingBufferMap& map,
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void CopyBC4(Image& dst_image, Image& src_image,
|
||||
|
||||
@@ -157,7 +157,6 @@ struct BufferCacheParams {
|
||||
static constexpr bool USE_MEMORY_MAPS = true;
|
||||
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
|
||||
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
|
||||
};
|
||||
|
||||
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
|
||||
|
||||
Reference in New Issue
Block a user