Fix
This commit is contained in:
@@ -52,6 +52,8 @@ void LogSettings() {
|
||||
log_setting("Renderer_FrameLimit", values.frame_limit.GetValue());
|
||||
log_setting("Renderer_UseDiskShaderCache", values.use_disk_shader_cache.GetValue());
|
||||
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
|
||||
log_setting("Renderer_UseGarbageCollect", values.use_garbage_collect.GetValue());
|
||||
log_setting("Renderer_GarbageCollectLevel", values.garbage_collect_level.GetValue());
|
||||
log_setting("Renderer_UseAsynchronousGpuEmulation",
|
||||
values.use_asynchronous_gpu_emulation.GetValue());
|
||||
log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
|
||||
@@ -95,6 +97,24 @@ bool IsFastmemEnabled() {
|
||||
return values.cpuopt_fastmem;
|
||||
}
|
||||
return true;
|
||||
|
||||
bool UseGarbageCollect() {
|
||||
return values.use_garbage_collect.GetValue();
|
||||
}
|
||||
|
||||
std::chrono::minutes GarbageCollectTimer() {
|
||||
switch (values.garbage_collect_level.GetValue()) {
|
||||
case Settings::GCLevel::Aggressive:
|
||||
return std::chrono::minutes(1);
|
||||
case Settings::GCLevel::Normal:
|
||||
return std::chrono::minutes(4);
|
||||
case Settings::GCLevel::Relaxed:
|
||||
return std::chrono::minutes(10);
|
||||
}
|
||||
UNREACHABLE_MSG("Garbage collection set to unknown value!",
|
||||
static_cast<int>(values.garbage_collect_level.GetValue()));
|
||||
values.use_garbage_collect.SetValue(false);
|
||||
return {};
|
||||
}
|
||||
|
||||
float Volume() {
|
||||
@@ -133,6 +153,8 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||
values.frame_limit.SetGlobal(true);
|
||||
values.use_disk_shader_cache.SetGlobal(true);
|
||||
values.gpu_accuracy.SetGlobal(true);
|
||||
values.use_garbage_collect.SetGlobal(true);
|
||||
values.garbage_collect_level.SetGlobal(true);
|
||||
values.use_asynchronous_gpu_emulation.SetGlobal(true);
|
||||
values.use_nvdec_emulation.SetGlobal(true);
|
||||
values.use_vsync.SetGlobal(true);
|
||||
|
||||
@@ -34,6 +34,12 @@ enum class CPUAccuracy : u32 {
|
||||
DebugMode = 2,
|
||||
};
|
||||
|
||||
enum class GCLevel : u32 {
|
||||
Aggressive = 0,
|
||||
Normal = 1,
|
||||
Relaxed = 2,
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
class Setting final {
|
||||
public:
|
||||
@@ -145,6 +151,8 @@ struct Values {
|
||||
Setting<u16> frame_limit;
|
||||
Setting<bool> use_disk_shader_cache;
|
||||
Setting<GPUAccuracy> gpu_accuracy;
|
||||
Setting<bool> use_garbage_collect;
|
||||
Setting<GCLevel> garbage_collect_level;
|
||||
Setting<bool> use_asynchronous_gpu_emulation;
|
||||
Setting<bool> use_nvdec_emulation;
|
||||
Setting<bool> use_vsync;
|
||||
@@ -253,6 +261,9 @@ bool IsGPULevelHigh();
|
||||
|
||||
bool IsFastmemEnabled();
|
||||
|
||||
bool UseGarbageCollect();
|
||||
std::chrono::minutes GarbageCollectTimer();
|
||||
|
||||
float Volume();
|
||||
|
||||
std::string GetTimeZoneString();
|
||||
|
||||
@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
static const char* TranslateGarbageCollectLevel(Settings::GCLevel backend) {
|
||||
switch (backend) {
|
||||
case Settings::GCLevel::Aggressive:
|
||||
return "Aggressive";
|
||||
case Settings::GCLevel::Normal:
|
||||
return "Normal";
|
||||
case Settings::GCLevel::Relaxed:
|
||||
return "Relaxed";
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
u64 GetTelemetryId() {
|
||||
u64 telemetry_id{};
|
||||
const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
|
||||
@@ -226,6 +238,10 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
|
||||
Settings::values.use_disk_shader_cache.GetValue());
|
||||
AddField(field_type, "Renderer_GPUAccuracyLevel",
|
||||
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
|
||||
AddField(field_type, "Renderer_UseGarbageCollect",
|
||||
Settings::values.use_garbage_collect.GetValue());
|
||||
AddField(field_type, "Renderer_GarbageCollectLevel",
|
||||
TranslateGarbageCollectLevel(Settings::values.garbage_collect_level.GetValue()));
|
||||
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
||||
Settings::values.use_asynchronous_gpu_emulation.GetValue());
|
||||
AddField(field_type, "Renderer_UseNvdecEmulation",
|
||||
|
||||
@@ -635,6 +635,7 @@ void RasterizerVulkan::TickFrame() {
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
buffer_cache.TickFrame();
|
||||
}
|
||||
memory_allocator.TickFrame();
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
|
||||
@@ -62,7 +62,8 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
|
||||
unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
|
||||
converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
|
||||
cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
|
||||
mip_level_offsets{CalculateMipLevelOffsets(info)} {
|
||||
last_access_time{std::chrono::steady_clock::now()}, mip_level_offsets{
|
||||
CalculateMipLevelOffsets(info)} {
|
||||
if (info.type == ImageType::e3D) {
|
||||
slice_offsets = CalculateSliceOffsets(info);
|
||||
slice_subresources = CalculateSliceSubresources(info);
|
||||
|
||||
@@ -62,6 +62,7 @@ struct ImageBase {
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
std::chrono::time_point<std::chrono::steady_clock> last_access_time;
|
||||
|
||||
std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
|
||||
|
||||
|
||||
@@ -70,6 +70,14 @@ public:
|
||||
ResetStorageBit(id.index);
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
return stored_bitset.size() * 64;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsIndexFree(SlotId id) noexcept {
|
||||
return ReadStorageBit(id.index);
|
||||
}
|
||||
|
||||
private:
|
||||
struct NonTrivialDummy {
|
||||
NonTrivialDummy() noexcept {}
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@@ -22,6 +23,7 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
@@ -57,12 +59,23 @@ using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
using Clock = std::chrono::steady_clock;
|
||||
|
||||
template <class P>
|
||||
class TextureCache {
|
||||
/// Address shift for caching images into a hash table
|
||||
static constexpr u64 PAGE_BITS = 20;
|
||||
|
||||
/// Time between checking for expired images
|
||||
static constexpr std::chrono::seconds GC_TICK_TIME = std::chrono::seconds(10);
|
||||
|
||||
/// Number of past cache sizes to keep
|
||||
static constexpr size_t NUM_CACHE_HISTORY = 12;
|
||||
/// Number of past cache sizes to keep
|
||||
static constexpr size_t CACHE_REMOVAL_MIN_MB = 16;
|
||||
/// Number of past cache sizes to keep
|
||||
static constexpr size_t CACHE_REMOVAL_MAX_MB = 1024;
|
||||
|
||||
/// Enables debugging features to the texture cache
|
||||
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
|
||||
/// Implement blits as copies between framebuffers
|
||||
@@ -308,6 +321,12 @@ private:
|
||||
/// Returns true if the current clear parameters clear the whole image of a given image view
|
||||
[[nodiscard]] bool IsFullClear(ImageViewId id);
|
||||
|
||||
/// Tick the garbage collector to free up unused images
|
||||
void TickGC();
|
||||
|
||||
/// Update image expiration times from current framebuffers
|
||||
void UpdateFramebufferReferences();
|
||||
|
||||
Runtime& runtime;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
@@ -340,6 +359,8 @@ private:
|
||||
SlotVector<Sampler> slot_samplers;
|
||||
SlotVector<Framebuffer> slot_framebuffers;
|
||||
|
||||
std::unordered_set<ImageViewId> active_framebuffer_imageviews;
|
||||
|
||||
// TODO: This data structure is not optimal and it should be reworked
|
||||
std::vector<ImageId> uncommitted_downloads;
|
||||
std::queue<std::vector<ImageId>> committed_downloads;
|
||||
@@ -353,6 +374,19 @@ private:
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
|
||||
const bool GC_ENABLED;
|
||||
/// Time between checking for expired images
|
||||
const std::chrono::minutes GC_EXPIRATION_TIME;
|
||||
|
||||
std::chrono::time_point<std::chrono::steady_clock> GC_TIMER;
|
||||
std::chrono::time_point<std::chrono::steady_clock> GC_FRAMEBUFF_TIMER;
|
||||
size_t gc_ticks;
|
||||
std::chrono::time_point<std::chrono::steady_clock> current_time;
|
||||
size_t current_cache_size_bytes = 0;
|
||||
size_t current_cache_removal_size_mb = CACHE_REMOVAL_MIN_MB;
|
||||
std::array<s64, NUM_CACHE_HISTORY> cache_size_history_mb;
|
||||
size_t current_cache_index = 0;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
@@ -361,7 +395,10 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_},
|
||||
GC_ENABLED{Settings::UseGarbageCollect()},
|
||||
GC_EXPIRATION_TIME{Settings::GarbageCollectTimer()}, GC_TIMER{Clock::now()},
|
||||
GC_FRAMEBUFF_TIMER{GC_TIMER}, current_time{GC_TIMER} {
|
||||
// Configure null sampler
|
||||
TSCEntry sampler_descriptor{};
|
||||
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
|
||||
@@ -373,14 +410,23 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||
// This way the null resource becomes a compile time constant
|
||||
void(slot_image_views.insert(runtime, NullImageParams{}));
|
||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||
|
||||
// Fill the cache with the average to start with
|
||||
cache_size_history_mb.fill((CACHE_REMOVAL_MIN_MB + CACHE_REMOVAL_MAX_MB) / 2);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickFrame() {
|
||||
current_time = Clock::now();
|
||||
|
||||
// Tick sentenced resources in this order to ensure they are destroyed in the right order
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
|
||||
UpdateFramebufferReferences();
|
||||
TickGC();
|
||||
|
||||
++frame_tick;
|
||||
}
|
||||
|
||||
@@ -540,6 +586,17 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
|
||||
template <class P>
|
||||
FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
|
||||
const auto [pair, is_new] = framebuffers.try_emplace(key);
|
||||
|
||||
for (const auto& id : pair->first.color_buffer_ids) {
|
||||
if (!id) {
|
||||
break;
|
||||
}
|
||||
active_framebuffer_imageviews.insert(id);
|
||||
}
|
||||
if (pair->first.depth_buffer_id) {
|
||||
active_framebuffer_imageviews.insert(pair->first.depth_buffer_id);
|
||||
}
|
||||
|
||||
FramebufferId& framebuffer_id = pair->second;
|
||||
if (!is_new) {
|
||||
return framebuffer_id;
|
||||
@@ -609,7 +666,9 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image);
|
||||
}
|
||||
UnregisterImage(id);
|
||||
if (True(image.flags & ImageFlagBits::Registered)) {
|
||||
UnregisterImage(id);
|
||||
}
|
||||
DeleteImage(id);
|
||||
}
|
||||
}
|
||||
@@ -718,6 +777,7 @@ void TextureCache<P>::InvalidateColorBuffer(size_t index) {
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
|
||||
active_framebuffer_imageviews.erase(color_buffer_id);
|
||||
runtime.InvalidateColorBuffer(color_buffer, index);
|
||||
}
|
||||
|
||||
@@ -734,6 +794,7 @@ void TextureCache<P>::InvalidateDepthBuffer() {
|
||||
image.flags &= ~ImageFlagBits::CpuModified;
|
||||
image.flags &= ~ImageFlagBits::GpuModified;
|
||||
|
||||
active_framebuffer_imageviews.erase(depth_buffer_id);
|
||||
ImageView& depth_buffer = slot_image_views[depth_buffer_id];
|
||||
runtime.InvalidateDepthBuffer(depth_buffer);
|
||||
}
|
||||
@@ -999,6 +1060,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
});
|
||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
current_cache_size_bytes += new_image.guest_size_bytes;
|
||||
|
||||
// TODO: Only upload what we need
|
||||
RefreshContents(new_image);
|
||||
@@ -1015,7 +1077,9 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(overlap);
|
||||
}
|
||||
UnregisterImage(overlap_id);
|
||||
if (True(overlap.flags & ImageFlagBits::Registered)) {
|
||||
UnregisterImage(overlap_id);
|
||||
}
|
||||
DeleteImage(overlap_id);
|
||||
}
|
||||
ImageBase& new_image_base = new_image;
|
||||
@@ -1317,6 +1381,7 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi
|
||||
auto it = framebuffers.begin();
|
||||
while (it != framebuffers.end()) {
|
||||
if (it->first.Contains(removed_views)) {
|
||||
sentenced_framebuffers.Push(std::move(slot_framebuffers[it->second]));
|
||||
it = framebuffers.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
@@ -1372,6 +1437,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||
MarkModification(image);
|
||||
}
|
||||
image.frame_tick = frame_tick;
|
||||
image.last_access_time = current_time;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -1492,4 +1558,108 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
|
||||
scissor.max_y >= size.height;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickGC() {
|
||||
if (!GC_ENABLED || current_time - GC_TIMER < GC_TICK_TIME) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t num_removed = 0;
|
||||
size_t removed_mb = 0;
|
||||
bool gc_capped = false;
|
||||
for (u32 i = 0; i < slot_images.Size(); ++i) {
|
||||
const SlotId id{static_cast<u32>(current_cache_index)};
|
||||
++current_cache_index;
|
||||
current_cache_index %= slot_images.Size();
|
||||
|
||||
if (!slot_images.IsIndexFree(id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const Image& image = slot_images[id];
|
||||
if (image.last_access_time + GC_EXPIRATION_TIME >= current_time) {
|
||||
continue;
|
||||
}
|
||||
|
||||
removed_mb += image.guest_size_bytes / 1024 / 1024;
|
||||
current_cache_size_bytes -= image.guest_size_bytes;
|
||||
UnmapMemory(image.cpu_addr, image.guest_size_bytes);
|
||||
++num_removed;
|
||||
if (removed_mb > current_cache_removal_size_mb) {
|
||||
gc_capped = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_removed > 0) {
|
||||
LOG_INFO(HW_Memory, "Removed {} images ({}MB) from texture cache. GC removal cap: {}MB",
|
||||
num_removed, removed_mb, current_cache_removal_size_mb);
|
||||
}
|
||||
|
||||
size_t current_average_cache_size_mb =
|
||||
std::accumulate(cache_size_history_mb.begin(), cache_size_history_mb.end(), size_t{0});
|
||||
current_average_cache_size_mb /= NUM_CACHE_HISTORY * 2;
|
||||
f32 removal_sizef = static_cast<f32>(current_cache_removal_size_mb);
|
||||
f32 current_averagef = static_cast<f32>(current_average_cache_size_mb);
|
||||
|
||||
if (gc_capped) {
|
||||
// GC has capped out, so increase the cache removal
|
||||
// for the next tick, up to CACHE_REMOVAL_MAX_MB
|
||||
f32 ratio = std::max(0.0f, (1 + (current_averagef / removal_sizef)) / NUM_CACHE_HISTORY);
|
||||
current_cache_removal_size_mb += std::llround(removal_sizef * ratio);
|
||||
current_cache_removal_size_mb =
|
||||
std::min(CACHE_REMOVAL_MAX_MB, current_cache_removal_size_mb);
|
||||
if (current_cache_removal_size_mb == CACHE_REMOVAL_MAX_MB) {
|
||||
LOG_WARNING(HW_Memory, "Texture cache GC has maxed out at {}MB per tick!",
|
||||
current_cache_removal_size_mb);
|
||||
}
|
||||
} else {
|
||||
// GC removed less than the current cap, so decrease the cache removal
|
||||
// for the next tick, down to CACHE_REMOVAL_MIN_MB
|
||||
f32 ratio = std::max(0.0f, (1 / (removal_sizef / current_averagef)) / NUM_CACHE_HISTORY);
|
||||
current_cache_removal_size_mb -= std::llround(removal_sizef * ratio);
|
||||
// Can underflow
|
||||
if (current_cache_removal_size_mb > CACHE_REMOVAL_MAX_MB) {
|
||||
current_cache_removal_size_mb = 0;
|
||||
}
|
||||
current_cache_removal_size_mb =
|
||||
std::max(CACHE_REMOVAL_MIN_MB, current_cache_removal_size_mb);
|
||||
}
|
||||
|
||||
GC_TIMER = current_time;
|
||||
cache_size_history_mb[gc_ticks++ % NUM_CACHE_HISTORY] = current_cache_size_bytes / 1024 / 1024;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::UpdateFramebufferReferences() {
|
||||
if (!GC_ENABLED || current_time - GC_FRAMEBUFF_TIMER < GC_EXPIRATION_TIME / 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < slot_images.Size(); ++i) {
|
||||
const SlotId id{i};
|
||||
if (!slot_images.IsIndexFree(id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Image& image = slot_images[id];
|
||||
const bool in_use = std::ranges::any_of(image.image_view_ids, [this](ImageViewId& view) {
|
||||
return active_framebuffer_imageviews.contains(view);
|
||||
});
|
||||
|
||||
if (in_use) {
|
||||
// Update the times for the aliases and views for this image,
|
||||
// as they may not be directly referenced otherwise (i.e pause menus)
|
||||
image.last_access_time = current_time;
|
||||
for (auto& view : image.image_view_ids) {
|
||||
slot_images[slot_image_views[view].image_id].last_access_time = current_time;
|
||||
}
|
||||
for (const auto& alias : image.aliased_images) {
|
||||
slot_images[alias.id].last_access_time = current_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
GC_FRAMEBUFF_TIMER = current_time;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
||||
@@ -146,6 +146,23 @@ public:
|
||||
return (flags & property_flags) == property_flags && (type_mask & shifted_memory_type) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t GetCommitCount() const noexcept {
|
||||
return commits.size();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::chrono::time_point<std::chrono::steady_clock> GetLastCommitTime()
|
||||
const noexcept {
|
||||
return last_commit_time;
|
||||
}
|
||||
|
||||
void SetLastCommitTime(std::chrono::time_point<std::chrono::steady_clock> time) noexcept {
|
||||
last_commit_time = time;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 AllocationSize() const noexcept {
|
||||
return allocation_size;
|
||||
}
|
||||
|
||||
private:
|
||||
[[nodiscard]] static constexpr u32 ShiftType(u32 type) {
|
||||
return 1U << type;
|
||||
@@ -177,6 +194,7 @@ private:
|
||||
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
|
||||
std::vector<Range> commits; ///< All commit ranges done from this allocation.
|
||||
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
|
||||
std::chrono::time_point<std::chrono::steady_clock> last_commit_time;
|
||||
#if defined(_WIN32) || defined(__unix__)
|
||||
u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
|
||||
#endif
|
||||
@@ -223,7 +241,8 @@ void MemoryCommit::Release() {
|
||||
|
||||
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
|
||||
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
|
||||
export_allocations{export_allocations_} {}
|
||||
export_allocations{export_allocations_}, GC_ENABLED{Settings::UseGarbageCollect()},
|
||||
GC_EXPIRATION_TIME{Settings::GarbageCollectTimer()}, GC_TIMER{Clock::now()} {}
|
||||
|
||||
MemoryAllocator::~MemoryAllocator() = default;
|
||||
|
||||
@@ -286,6 +305,7 @@ std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirement
|
||||
continue;
|
||||
}
|
||||
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
|
||||
allocation->SetLastCommitTime(GC_TIMER);
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
@@ -326,6 +346,21 @@ std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void MemoryAllocator::TickFrame() {
|
||||
const auto now{Clock::now()};
|
||||
if (!GC_ENABLED || now - GC_TIMER < GC_TICK_TIME) {
|
||||
return;
|
||||
}
|
||||
for (s64 x = static_cast<s64>(allocations.size() - 1); x > 0; --x) {
|
||||
const auto& allocation = allocations[x];
|
||||
if (allocation->GetCommitCount() == 0 &&
|
||||
allocation->GetLastCommitTime() + GC_EXPIRATION_TIME < now) {
|
||||
allocations.erase(allocations.begin() + x);
|
||||
}
|
||||
}
|
||||
GC_TIMER = now;
|
||||
}
|
||||
|
||||
bool IsHostVisible(MemoryUsage usage) noexcept {
|
||||
switch (usage) {
|
||||
case MemoryUsage::DeviceLocal:
|
||||
|
||||
@@ -4,13 +4,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
using Clock = std::chrono::steady_clock;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
@@ -69,6 +73,9 @@ private:
|
||||
/// Memory allocator container.
|
||||
/// Allocates and releases memory allocations on demand.
|
||||
class MemoryAllocator {
|
||||
/// Time between checking for expired allocations
|
||||
static constexpr auto GC_TICK_TIME = std::chrono::seconds(10);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Construct memory allocator
|
||||
@@ -100,6 +107,9 @@ public:
|
||||
/// Commits memory required by the image and binds it.
|
||||
MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
|
||||
|
||||
/// Tick the allocator to free memory
|
||||
void TickFrame();
|
||||
|
||||
private:
|
||||
/// Tries to allocate a chunk of memory.
|
||||
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
|
||||
@@ -118,6 +128,11 @@ private:
|
||||
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||
const bool export_allocations; ///< True when memory allocations have to be exported.
|
||||
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
|
||||
|
||||
const bool GC_ENABLED;
|
||||
/// Time since last commit was made that allocations are removed
|
||||
const std::chrono::minutes GC_EXPIRATION_TIME;
|
||||
std::chrono::time_point<std::chrono::steady_clock> GC_TIMER;
|
||||
};
|
||||
|
||||
/// Returns true when a memory usage is guaranteed to be host visible.
|
||||
|
||||
@@ -803,6 +803,11 @@ void Config::ReadRendererValues() {
|
||||
ReadSettingGlobal(Settings::values.use_disk_shader_cache,
|
||||
QStringLiteral("use_disk_shader_cache"), true);
|
||||
ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1);
|
||||
ReadSettingGlobal(Settings::values.use_garbage_collect, QStringLiteral("use_garbage_collect"),
|
||||
true);
|
||||
ReadSettingGlobal(Settings::values.garbage_collect_level,
|
||||
QStringLiteral("garbage_collect_level"),
|
||||
static_cast<int>(Settings::GCLevel::Normal));
|
||||
ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation,
|
||||
QStringLiteral("use_asynchronous_gpu_emulation"), true);
|
||||
ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"),
|
||||
@@ -1384,6 +1389,12 @@ void Config::SaveRendererValues() {
|
||||
WriteSettingGlobal(QStringLiteral("gpu_accuracy"),
|
||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)),
|
||||
Settings::values.gpu_accuracy.UsingGlobal(), 1);
|
||||
WriteSettingGlobal(QStringLiteral("use_garbage_collect"), Settings::values.use_garbage_collect,
|
||||
true);
|
||||
WriteSettingGlobal(QStringLiteral("garbage_collect_level"),
|
||||
static_cast<int>(Settings::values.garbage_collect_level.GetValue(global)),
|
||||
Settings::values.garbage_collect_level.UsingGlobal(),
|
||||
static_cast<int>(Settings::GCLevel::Normal));
|
||||
WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"),
|
||||
Settings::values.use_asynchronous_gpu_emulation, true);
|
||||
WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation,
|
||||
|
||||
@@ -135,3 +135,4 @@ private:
|
||||
Q_DECLARE_METATYPE(Settings::CPUAccuracy);
|
||||
Q_DECLARE_METATYPE(Settings::RendererBackend);
|
||||
Q_DECLARE_METATYPE(Settings::GPUAccuracy);
|
||||
Q_DECLARE_METATYPE(Settings::GCLevel);
|
||||
|
||||
@@ -80,7 +80,6 @@ void ConfigureGeneral::ResetDefaults() {
|
||||
void ConfigureGeneral::ApplyConfiguration() {
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core,
|
||||
use_multi_core);
|
||||
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
|
||||
UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked();
|
||||
|
||||
@@ -22,11 +22,14 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
|
||||
|
||||
void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
|
||||
ui->use_garbage_collect->setEnabled(runtime_lock);
|
||||
ui->garbage_collect_level->setEnabled(runtime_lock);
|
||||
ui->use_vsync->setEnabled(runtime_lock);
|
||||
ui->use_assembly_shaders->setEnabled(runtime_lock);
|
||||
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
|
||||
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
|
||||
|
||||
ui->use_garbage_collect->setChecked(Settings::values.use_garbage_collect.GetValue());
|
||||
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
|
||||
ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
|
||||
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
|
||||
@@ -35,10 +38,14 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
ui->gpu_accuracy->setCurrentIndex(
|
||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue()));
|
||||
ui->garbage_collect_level->setCurrentIndex(
|
||||
static_cast<int>(Settings::values.garbage_collect_level.GetValue()));
|
||||
ui->anisotropic_filtering_combobox->setCurrentIndex(
|
||||
Settings::values.max_anisotropy.GetValue());
|
||||
} else {
|
||||
ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
|
||||
ConfigurationShared::SetPerGameSetting(ui->garbage_collect_level,
|
||||
&Settings::values.garbage_collect_level);
|
||||
ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
|
||||
&Settings::values.max_anisotropy);
|
||||
ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
|
||||
@@ -53,7 +60,12 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||
const auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(
|
||||
ui->gpu_accuracy->currentIndex() -
|
||||
((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET));
|
||||
const auto gc_level = static_cast<Settings::GCLevel>(
|
||||
ui->garbage_collect_level->currentIndex() -
|
||||
((Settings::IsConfiguringGlobal()) ? 0 : ConfigurationShared::USE_GLOBAL_OFFSET));
|
||||
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_garbage_collect,
|
||||
ui->use_garbage_collect, use_garbage_collect);
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
|
||||
ui->anisotropic_filtering_combobox);
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
|
||||
@@ -70,6 +82,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||
if (Settings::values.gpu_accuracy.UsingGlobal()) {
|
||||
Settings::values.gpu_accuracy.SetValue(gpu_accuracy);
|
||||
}
|
||||
if (Settings::values.garbage_collect_level.UsingGlobal()) {
|
||||
Settings::values.garbage_collect_level.SetValue(gc_level);
|
||||
}
|
||||
} else {
|
||||
if (ui->gpu_accuracy->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
|
||||
Settings::values.gpu_accuracy.SetGlobal(true);
|
||||
@@ -77,6 +92,12 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||
Settings::values.gpu_accuracy.SetGlobal(false);
|
||||
Settings::values.gpu_accuracy.SetValue(gpu_accuracy);
|
||||
}
|
||||
if (ui->garbage_collect_level->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
|
||||
Settings::values.garbage_collect_level.SetGlobal(true);
|
||||
} else {
|
||||
Settings::values.garbage_collect_level.SetGlobal(false);
|
||||
Settings::values.garbage_collect_level.SetValue(gc_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,6 +117,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||
// Disable if not global (only happens during game)
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
|
||||
ui->use_garbage_collect->setEnabled(Settings::values.use_garbage_collect.UsingGlobal());
|
||||
ui->garbage_collect_level->setEnabled(Settings::values.garbage_collect_level.UsingGlobal());
|
||||
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
|
||||
ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal());
|
||||
ui->use_asynchronous_shaders->setEnabled(
|
||||
@@ -107,6 +130,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||
return;
|
||||
}
|
||||
|
||||
ConfigurationShared::SetColoredTristate(
|
||||
ui->use_garbage_collect, Settings::values.use_garbage_collect, use_garbage_collect);
|
||||
ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
|
||||
ConfigurationShared::SetColoredTristate(
|
||||
ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders);
|
||||
@@ -118,6 +143,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||
ConfigurationShared::SetColoredComboBox(
|
||||
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
||||
ConfigurationShared::SetColoredComboBox(
|
||||
ui->garbage_collect_level, ui->garbage_collect_level,
|
||||
static_cast<int>(Settings::values.garbage_collect_level.GetValue(true)));
|
||||
ConfigurationShared::SetColoredComboBox(
|
||||
ui->anisotropic_filtering_combobox, ui->af_label,
|
||||
static_cast<int>(Settings::values.max_anisotropy.GetValue(true)));
|
||||
|
||||
@@ -34,6 +34,7 @@ private:
|
||||
|
||||
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
|
||||
|
||||
ConfigurationShared::CheckState use_garbage_collect;
|
||||
ConfigurationShared::CheckState use_vsync;
|
||||
ConfigurationShared::CheckState use_assembly_shaders;
|
||||
ConfigurationShared::CheckState use_asynchronous_shaders;
|
||||
|
||||
@@ -66,6 +66,42 @@
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="garbage_collect_layout">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="use_garbage_collect">
|
||||
<property name="toolTip">
|
||||
<string>Enable periodic removal of textures from memory, reducing VRAM usage. Lower settings work more often, lowering VRAM usage but potentially causes hitching. Set to a shorter duration if you experience out of memory crashes.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Garbage Collector:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QComboBox" name="garbage_collect_level">
|
||||
<property name="currentIndex">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string notr="true">Aggressive</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string notr="true">Normal</string>
|
||||
</property>
|
||||
</item>
|
||||
<item>
|
||||
<property name="text">
|
||||
<string notr="true">Relaxed</string>
|
||||
</property>
|
||||
</item>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="use_vsync">
|
||||
<property name="toolTip">
|
||||
|
||||
@@ -439,6 +439,12 @@ void Config::ReadValues() {
|
||||
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false));
|
||||
const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1);
|
||||
Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level));
|
||||
Settings::values.use_garbage_collect.SetValue(
|
||||
sdl2_config->GetBoolean("Renderer", "use_garbage_collect", true));
|
||||
const int garbage_collect_level = sdl2_config->GetInteger(
|
||||
"Renderer", "garbage_collect_level", static_cast<int>(Settings::GCLevel::Normal));
|
||||
Settings::values.garbage_collect_level.SetValue(
|
||||
static_cast<Settings::GCLevel>(garbage_collect_level));
|
||||
Settings::values.use_asynchronous_gpu_emulation.SetValue(
|
||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true));
|
||||
Settings::values.use_vsync.SetValue(
|
||||
|
||||
@@ -210,6 +210,16 @@ use_disk_shader_cache =
|
||||
# 0 (Normal), 1 (High), 2 (Extreme)
|
||||
gpu_accuracy =
|
||||
|
||||
# Enable the garbage collector?
|
||||
# 0: Off, 1: On (default)
|
||||
use_garbage_collect =
|
||||
|
||||
# How quickly the garbage collector removes images from the texture cache
|
||||
# 0 (Aggressive): 1 minute
|
||||
# 1 (Normal): 4 minutes
|
||||
# 2 (Relaxed): 10 minutes
|
||||
garbage_collect_level =
|
||||
|
||||
# Whether to use asynchronous GPU emulation
|
||||
# 0 : Off (slow), 1 (default): On (fast)
|
||||
use_asynchronous_gpu_emulation =
|
||||
|
||||
Reference in New Issue
Block a user