Compare commits

...

47 Commits

Author SHA1 Message Date
yzct12345
d203b24da0 vulkan_device: Disable CollectToolingInfo 2021-08-11 07:26:29 +00:00
bunnei
e6b80c2cf8 Merge pull request #6776 from lat9nq/ranged-settings
settings: Implement settings ranges
2021-08-10 21:19:01 -07:00
Fernando S
6a082df427 Merge pull request #6820 from yzct12345/split-cache
texture_cache: Split out template definitions
2021-08-10 12:23:05 +02:00
Ameer J
9fbe188c01 Merge pull request #6837 from german77/no-pause-screenshot
main: Avoid stopping emulation when taking a screenshot
2021-08-09 23:49:48 -04:00
bunnei
7df790f1ae Merge pull request #6823 from yzct12345/memory-cleanup
memory: Clean up code
2021-08-09 17:09:56 -07:00
bunnei
3e3bd425c1 Merge pull request #6839 from ameerj/frame-cap-positon
configure_general: Swap positions of speed limit and frame limit options
2021-08-09 12:32:07 -07:00
Mai M
2da91ec75b Merge pull request #6844 from ameerj/vp9-empty-frame
vp9: Ensure the first frame is complete
2021-08-08 19:02:39 -04:00
bunnei
b9eee1c539 Merge pull request #6843 from FernandoS27/lives-in-a-pineapple-under-the-sea-2
yuzu-cmd/CMakeLists: Correct attribution for this function.
2021-08-08 11:31:47 -07:00
Fernando Sahmkow
23ca1eb82e yuzu-cmd/CMakeLists: Correct attribution for this function. 2021-08-08 20:24:53 +02:00
ameerj
fa22695705 vp9: Ensure the first frame is complete
Silences a runtime error due to the first frame missing the frame data, and being set to hidden despite being a key-frame.
2021-08-08 13:49:00 -04:00
yzct12345
c4eafcc861 texture_cache: Address ameerj's review 2021-08-08 11:02:51 +00:00
Fernando S
859deda3bb Merge pull request #6834 from K0bin/buffer-image-granularity
Respect Vulkan bufferImageGranularity
2021-08-08 11:57:40 +02:00
bunnei
b023413c98 Merge pull request #6698 from german77/SDL_QoL
input_common: Improve SDL joystick and hide toggle option
2021-08-08 02:44:42 -07:00
bunnei
00358e2098 Merge pull request #6817 from gidoly/patch-1
Add description to fast gpu time option
2021-08-08 01:11:47 -07:00
ameerj
8e0cc3e59a configure_general: Swap positions of speed limit and frame limit options 2021-08-08 01:00:40 -04:00
german77
48b6d41f1b input_common: Improve SDL joystick and hide toggle option 2021-08-07 23:11:23 -05:00
bunnei
63325cafbe Merge pull request #6827 from Morph1984/uuid-hash
common: uuid: Add hash function for UUID
2021-08-07 17:18:46 -07:00
german77
acce512ae8 main: Avoid stopping emulation when taking a screenshot 2021-08-07 15:45:29 -05:00
bunnei
bd0e1d3a25 Merge pull request #6830 from ameerj/nvdec-unimpld-codec
nvdec: Better logging for unimplemented codecs
2021-08-07 12:37:39 -07:00
Robin Kertels
bb29dcb7f2 vulkan_memory_allocator: Respect bufferImageGranularity 2021-08-07 15:28:05 +02:00
bunnei
456adb95ff Merge pull request #6795 from sankasan/cmd-remove-cursor-fullscreen
yuzu-cmd: hide mouse cursor when started fullscreen
2021-08-07 02:00:29 -07:00
bunnei
bd1a764827 Merge pull request #6815 from german77/ui_improvements
settings_ui: Add emulated joystick position dot to controller preview
2021-08-06 23:54:23 -07:00
bunnei
268b5764c7 Merge pull request #6791 from ameerj/astc-opt
astc_decoder: Various performance and memory optimizations
2021-08-06 21:45:24 -07:00
yzct12345
5f97f74a9a memory: Address lioncash's review 2021-08-07 03:03:21 +00:00
yzct12345
70cc4c0f46 memory: Dedup Read and Write and fix logging bugs 2021-08-07 01:32:06 +00:00
yzct12345
e80323b8b0 texture_cache: Address ameerj's review 2021-08-07 01:27:47 +00:00
Morph
d20c5ac720 common: uuid: Add hash function for UUID
Used when UUID is a key in an unordered_map. The hash is produced by XORing the high and low 64-bits of the UUID together.
2021-08-06 00:41:55 -04:00
yzct12345
e611f522c2 memory: Clean up CopyBlock too 2021-08-05 21:09:08 +00:00
gidoly
8ba551e1cd Update configure_graphics_advanced.ui
add description too fast gpu time
2021-08-06 06:08:12 +09:00
yzct12345
02e98f6c93 texture_cache: Don't change copyright year 2021-08-05 20:52:12 +00:00
yzct12345
5566f3dbc0 texture_cache: Address ameerj's review 2021-08-05 20:46:24 +00:00
yzct12345
4edfa6ad8f memory: Address lioncash's review 2021-08-05 20:29:43 +00:00
yzct12345
6df9611059 memory: Clean up code 2021-08-05 20:11:14 +00:00
yzct12345
f9563c8f24 texture_cache: Split templates out 2021-08-05 13:52:30 +00:00
german77
d5bf597436 settings_ui: Use better colors for the light theme 2021-08-04 11:47:06 -05:00
german77
1fb158ce90 settings_ui: Add emulated joystick position dot to controller preview 2021-08-04 11:46:54 -05:00
san
3e26141483 yuzu-cmd: hide cursor when in fullscreen
Exposed the SDL_ShowCursor function to EmuWindow baseclass. When creating the window (GL or VK) in fullscreen it now automatically hides the cursor.
2021-08-01 21:46:13 +02:00
ameerj
c439fc9be9 astc_decoder: Reduce workgroup size
This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32.
2021-08-01 01:22:27 -04:00
ameerj
5ab8053511 astc_decoder: Compute offset swizzles in-shader
Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes.
2021-08-01 01:22:26 -04:00
ameerj
b2862e4772 astc_decoder: Make use of uvec4 for payload data 2021-07-31 22:28:04 -04:00
ameerj
a75d70fa90 astc_decoder: Simplify Select2DPartition 2021-07-31 21:36:26 -04:00
ameerj
5665d05547 astc_decoder: Optimize the use EncodingData
This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation.
We can take advantage of its sorted property to optimize its usage in the shader.

Thanks to wwylele for the optimization idea.
2021-07-31 21:36:26 -04:00
ameerj
15c0c213b1 astc.h: Move data to cpp implementation
Moves leftover values that are no longer used by the gpu decoder back to the cpp implementation.
2021-07-31 21:26:42 -04:00
lat9nq
3862511a9a settings: Use std::clamp where possible
Addresses PR review

Co-authored-by: PixelyIon <pixelyion@protonmail.com>
2021-07-31 17:20:12 -04:00
lat9nq
e9cf08c241 settings: Remove unnecessary std::move usages
Addresses review feedback.

Co-authored-by: Mai M. <mathew1800@gmail.com>
2021-07-30 18:44:50 -04:00
lat9nq
7737bdfd1a settings: Fix function virtualization
Fixes a theoretical scenario where a Setting is using the BasicSetting's
GetValue function. In practice this probably only happens on yuzu-cmd,
where there is no need for a Setting's additional features. Need to fix
regardless.
2021-07-30 13:33:35 -04:00
lat9nq
a1f19b61f8 settings: Implement setting ranges
Clamps the setting's values against the specified minimum and maximum
values.
2021-07-30 13:33:21 -04:00
44 changed files with 1168 additions and 1466 deletions

View File

@@ -51,11 +51,11 @@ QPushButton#GPUStatusBarButton:hover {
}
QPushButton#GPUStatusBarButton:checked {
color: #ff8040;
color: #b06020;
}
QPushButton#GPUStatusBarButton:!checked {
color: #40dd40;
color: #109010;
}
QPushButton#buttonRefreshDevices {

View File

@@ -4,6 +4,7 @@
#pragma once
#include <algorithm>
#include <array>
#include <atomic>
#include <chrono>
@@ -81,7 +82,7 @@ public:
*
* @returns A reference to the setting
*/
[[nodiscard]] const Type& GetValue() const {
[[nodiscard]] virtual const Type& GetValue() const {
return global;
}
@@ -90,7 +91,7 @@ public:
*
* @param value The desired value
*/
void SetValue(const Type& value) {
virtual void SetValue(const Type& value) {
Type temp{value};
std::swap(global, temp);
}
@@ -120,7 +121,7 @@ public:
*
* @returns A reference to the setting
*/
const Type& operator=(const Type& value) {
virtual const Type& operator=(const Type& value) {
Type temp{value};
std::swap(global, temp);
return global;
@@ -131,7 +132,7 @@ public:
*
* @returns A reference to the setting
*/
explicit operator const Type&() const {
explicit virtual operator const Type&() const {
return global;
}
@@ -141,6 +142,51 @@ protected:
const std::string label{}; ///< The setting's label
};
/**
* BasicRangedSetting class is intended for use with quantifiable settings that need a more
* restrictive range than implicitly defined by its type. Implements a minimum and maximum that is
* simply used to sanitize SetValue and the assignment overload.
*/
template <typename Type>
class BasicRangedSetting : virtual public BasicSetting<Type> {
public:
/**
* Sets a default value, minimum value, maximum value, and label.
*
* @param default_val Intial value of the setting, and default value of the setting
* @param min_val Sets the minimum allowed value of the setting
* @param max_val Sets the maximum allowed value of the setting
* @param name Label for the setting
*/
explicit BasicRangedSetting(const Type& default_val, const Type& min_val, const Type& max_val,
const std::string& name)
: BasicSetting<Type>{default_val, name}, minimum{min_val}, maximum{max_val} {}
~BasicRangedSetting() = default;
/**
* Like BasicSetting's SetValue, except value is clamped to the range of the setting.
*
* @param value The desired value
*/
void SetValue(const Type& value) override {
this->global = std::clamp(value, minimum, maximum);
}
/**
* Like BasicSetting's assignment overload, except value is clamped to the range of the setting.
*
* @param value The desired value
* @returns A reference to the setting's value
*/
const Type& operator=(const Type& value) override {
this->global = std::clamp(value, minimum, maximum);
return this->global;
}
const Type minimum; ///< Minimum allowed value of the setting
const Type maximum; ///< Maximum allowed value of the setting
};
/**
* The Setting class is a slightly more complex version of the BasicSetting class. This adds a
* custom setting to switch to when a guest application specifically requires it. The effect is that
@@ -152,7 +198,7 @@ protected:
* Like the BasicSetting, this requires setting a default value and label to use.
*/
template <typename Type>
class Setting final : public BasicSetting<Type> {
class Setting : virtual public BasicSetting<Type> {
public:
/**
* Sets a default value, label, and setting value.
@@ -191,7 +237,13 @@ public:
*
* @returns The required value of the setting
*/
[[nodiscard]] const Type& GetValue(bool need_global = false) const {
[[nodiscard]] const Type& GetValue() const override {
if (use_global) {
return this->global;
}
return custom;
}
[[nodiscard]] const Type& GetValue(bool need_global) const {
if (use_global || need_global) {
return this->global;
}
@@ -203,7 +255,7 @@ public:
*
* @param value The new value
*/
void SetValue(const Type& value) {
void SetValue(const Type& value) override {
Type temp{value};
if (use_global) {
std::swap(this->global, temp);
@@ -219,7 +271,7 @@ public:
*
* @returns A reference to the current setting value
*/
const Type& operator=(const Type& value) {
const Type& operator=(const Type& value) override {
Type temp{value};
if (use_global) {
std::swap(this->global, temp);
@@ -234,18 +286,72 @@ public:
*
* @returns A reference to the current setting value
*/
explicit operator const Type&() const {
explicit operator const Type&() const override {
if (use_global) {
return this->global;
}
return custom;
}
private:
protected:
bool use_global{true}; ///< The setting's global state
Type custom{}; ///< The custom value of the setting
};
/**
* RangedSetting is a Setting that implements a maximum and minimum value for its setting. Intended
* for use with quantifiable settings.
*/
template <typename Type>
class RangedSetting final : public BasicRangedSetting<Type>, public Setting<Type> {
public:
/**
* Sets a default value, minimum value, maximum value, and label.
*
* @param default_val Intial value of the setting, and default value of the setting
* @param min_val Sets the minimum allowed value of the setting
* @param max_val Sets the maximum allowed value of the setting
* @param name Label for the setting
*/
explicit RangedSetting(const Type& default_val, const Type& min_val, const Type& max_val,
const std::string& name)
: BasicSetting<Type>{default_val, name},
BasicRangedSetting<Type>{default_val, min_val, max_val, name}, Setting<Type>{default_val,
name} {}
~RangedSetting() = default;
/**
* Like BasicSetting's SetValue, except value is clamped to the range of the setting. Sets the
* appropriate value depending on the global state.
*
* @param value The desired value
*/
void SetValue(const Type& value) override {
const Type temp = std::clamp(value, this->minimum, this->maximum);
if (this->use_global) {
this->global = temp;
}
this->custom = temp;
}
/**
* Like BasicSetting's assignment overload, except value is clamped to the range of the setting.
* Uses the appropriate value depending on the global state.
*
* @param value The desired value
* @returns A reference to the setting's value
*/
const Type& operator=(const Type& value) override {
const Type temp = std::clamp(value, this->minimum, this->maximum);
if (this->use_global) {
this->global = temp;
return this->global;
}
this->custom = temp;
return this->custom;
}
};
/**
* The InputSetting class allows for getting a reference to either the global or custom members.
* This is required as we cannot easily modify the values of user-defined types within containers
@@ -289,13 +395,14 @@ struct Values {
BasicSetting<std::string> sink_id{"auto", "output_engine"};
BasicSetting<bool> audio_muted{false, "audio_muted"};
Setting<bool> enable_audio_stretching{true, "enable_audio_stretching"};
Setting<u8> volume{100, "volume"};
RangedSetting<u8> volume{100, 0, 100, "volume"};
// Core
Setting<bool> use_multi_core{true, "use_multi_core"};
// Cpu
Setting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, "cpu_accuracy"};
RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
CPUAccuracy::Unsafe, "cpu_accuracy"};
// TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
@@ -317,7 +424,8 @@ struct Values {
Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
// Renderer
Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
RangedSetting<RendererBackend> renderer_backend{
RendererBackend::OpenGL, RendererBackend::OpenGL, RendererBackend::Vulkan, "backend"};
BasicSetting<bool> renderer_debug{false, "debug"};
BasicSetting<bool> renderer_shader_feedback{false, "shader_feedback"};
BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
@@ -328,26 +436,28 @@ struct Values {
Setting<u16> resolution_factor{1, "resolution_factor"};
// *nix platforms may have issues with the borderless windowed fullscreen mode.
// Default to exclusive fullscreen on these platforms for now.
Setting<FullscreenMode> fullscreen_mode{
RangedSetting<FullscreenMode> fullscreen_mode{
#ifdef _WIN32
FullscreenMode::Borderless,
#else
FullscreenMode::Exclusive,
#endif
"fullscreen_mode"};
Setting<int> aspect_ratio{0, "aspect_ratio"};
Setting<int> max_anisotropy{0, "max_anisotropy"};
FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"};
RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"};
RangedSetting<int> max_anisotropy{0, 0, 4, "max_anisotropy"};
Setting<bool> use_speed_limit{true, "use_speed_limit"};
Setting<u16> speed_limit{100, "speed_limit"};
RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"};
Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"};
RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
GPUAccuracy::Extreme, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"};
BasicSetting<u16> fps_cap{1000, "fps_cap"};
BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
RangedSetting<ShaderBackend> shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL,
ShaderBackend::SPIRV, "shader_backend"};
Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
Setting<bool> use_caches_gc{false, "use_caches_gc"};
@@ -364,10 +474,10 @@ struct Values {
std::chrono::seconds custom_rtc_differential;
BasicSetting<s32> current_user{0, "current_user"};
Setting<s32> language_index{1, "language_index"};
Setting<s32> region_index{1, "region_index"};
Setting<s32> time_zone_index{0, "time_zone_index"};
Setting<s32> sound_index{1, "sound_index"};
RangedSetting<s32> language_index{1, 0, 16, "language_index"};
RangedSetting<s32> region_index{1, 0, 6, "region_index"};
RangedSetting<s32> time_zone_index{0, 0, 45, "time_zone_index"};
RangedSetting<s32> sound_index{1, 0, 2, "sound_index"};
// Controls
InputSetting<std::array<PlayerInput, 10>> players;
@@ -384,7 +494,7 @@ struct Values {
"udp_input_servers"};
BasicSetting<bool> mouse_panning{false, "mouse_panning"};
BasicSetting<u8> mouse_panning_sensitivity{10, "mouse_panning_sensitivity"};
BasicRangedSetting<u8> mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"};
BasicSetting<bool> mouse_enabled{false, "mouse_enabled"};
std::string mouse_device;
MouseButtonsRaw mouse_buttons;

View File

@@ -69,3 +69,14 @@ struct UUID {
static_assert(sizeof(UUID) == 16, "UUID is an invalid size!");
} // namespace Common
namespace std {
template <>
struct hash<Common::UUID> {
size_t operator()(const Common::UUID& uuid) const noexcept {
return uuid.uuid[1] ^ uuid.uuid[0];
}
};
} // namespace std

View File

@@ -4,8 +4,6 @@
#include <algorithm>
#include <cstring>
#include <optional>
#include <utility>
#include "common/assert.h"
#include "common/atomic_ops.h"
@@ -14,12 +12,10 @@
#include "common/page_table.h"
#include "common/settings.h"
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/memory.h"
#include "video_core/gpu.h"
@@ -62,17 +58,7 @@ struct Memory::Impl {
}
}
bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
const auto& page_table = process.PageTable().PageTableImpl();
const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
}
bool IsValidVirtualAddress(VAddr vaddr) const {
return IsValidVirtualAddress(*system.CurrentProcess(), vaddr);
}
u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
[[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]};
if (!paddr) {
@@ -82,18 +68,6 @@ struct Memory::Impl {
return system.DeviceMemory().GetPointer(paddr) + vaddr;
}
u8* GetPointer(const VAddr vaddr) const {
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return pointer + vaddr;
}
const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer);
if (type == Common::PageType::RasterizerCachedMemory) {
return GetPointerFromRasterizerCachedMemory(vaddr);
}
return nullptr;
}
u8 Read8(const VAddr addr) {
return Read<u8>(addr);
}
@@ -179,7 +153,7 @@ struct Memory::Impl {
std::string string;
string.reserve(max_length);
for (std::size_t i = 0; i < max_length; ++i) {
const char c = Read8(vaddr);
const char c = Read<s8>(vaddr);
if (c == '\0') {
break;
}
@@ -190,15 +164,14 @@ struct Memory::Impl {
return string;
}
void ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
void WalkBlock(const Kernel::KProcess& process, const VAddr addr, const std::size_t size,
auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
std::size_t page_index = addr >> PAGE_BITS;
std::size_t page_offset = addr & PAGE_MASK;
while (remaining_size > 0) {
while (remaining_size) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
@@ -206,22 +179,18 @@ struct Memory::Impl {
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
on_unmapped(copy_amount, current_vaddr);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_buffer, src_ptr, copy_amount);
u8* mem_ptr = pointer + page_offset + (page_index << PAGE_BITS);
on_memory(copy_amount, mem_ptr);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
system.GPU().FlushRegion(current_vaddr, copy_amount);
std::memcpy(dest_buffer, host_ptr, copy_amount);
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
on_rasterizer(current_vaddr, copy_amount, host_ptr);
break;
}
default:
@@ -230,248 +199,122 @@ struct Memory::Impl {
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
increment(copy_amount);
remaining_size -= copy_amount;
}
}
void ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
template <bool UNSAFE>
void ReadBlockImpl(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
WalkBlock(
process, src_addr, size,
[src_addr, size, &dest_buffer](const std::size_t copy_amount,
const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) {
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[&system = system, &dest_buffer](const VAddr current_vaddr,
const std::size_t copy_amount,
const u8* const host_ptr) {
if constexpr (!UNSAFE) {
system.GPU().FlushRegion(current_vaddr, copy_amount);
}
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
},
[&dest_buffer](const std::size_t copy_amount) {
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
});
}
void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
ReadBlockImpl<false>(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
ReadBlockImpl<true>(*system.CurrentProcess(), src_addr, dest_buffer, size);
}
void WriteBlock(const Kernel::KProcess& process, const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
template <bool UNSAFE>
void WriteBlockImpl(const Kernel::KProcess& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
WalkBlock(
process, dest_addr, size,
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) {
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
},
[&system = system, &src_buffer](const VAddr current_vaddr,
const std::size_t copy_amount, u8* const host_ptr) {
if constexpr (!UNSAFE) {
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
}
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void WriteBlockUnsafe(const Kernel::KProcess& process, const VAddr dest_addr,
const void* src_buffer, const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
},
[&src_buffer](const std::size_t copy_amount) {
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
});
}
void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
WriteBlockImpl<false>(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
WriteBlockImpl<true>(*system.CurrentProcess(), dest_addr, src_buffer, size);
}
void ZeroBlock(const Kernel::KProcess& process, const VAddr dest_addr, const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = dest_addr >> PAGE_BITS;
std::size_t page_offset = dest_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
WalkBlock(
process, dest_addr, size,
[dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
},
[](const std::size_t copy_amount, u8* const dest_ptr) {
std::memset(dest_ptr, 0, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[&system = system](const VAddr current_vaddr, const std::size_t copy_amount,
u8* const host_ptr) {
system.GPU().InvalidateRegion(current_vaddr, copy_amount);
std::memset(host_ptr, 0, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
remaining_size -= copy_amount;
}
}
void ZeroBlock(const VAddr dest_addr, const std::size_t size) {
ZeroBlock(*system.CurrentProcess(), dest_addr, size);
},
[](const std::size_t copy_amount) {});
}
void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
const std::size_t size) {
const auto& page_table = process.PageTable().PageTableImpl();
std::size_t remaining_size = size;
std::size_t page_index = src_addr >> PAGE_BITS;
std::size_t page_offset = src_addr & PAGE_MASK;
while (remaining_size > 0) {
const std::size_t copy_amount =
std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
const auto [pointer, type] = page_table.pointers[page_index].PointerType();
switch (type) {
case Common::PageType::Unmapped: {
WalkBlock(
process, dest_addr, size,
[this, &process, &dest_addr, &src_addr, size](const std::size_t copy_amount,
const VAddr current_vaddr) {
LOG_ERROR(HW_Memory,
"Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
ZeroBlock(process, dest_addr, copy_amount);
break;
}
case Common::PageType::Memory: {
DEBUG_ASSERT(pointer);
const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
WriteBlock(process, dest_addr, src_ptr, copy_amount);
break;
}
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
},
[this, &process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) {
WriteBlockImpl<false>(process, dest_addr, src_ptr, copy_amount);
},
[this, &system = system, &process, &dest_addr](
const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) {
system.GPU().FlushRegion(current_vaddr, copy_amount);
WriteBlock(process, dest_addr, host_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
remaining_size -= copy_amount;
}
}
void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount);
},
[&dest_addr, &src_addr](const std::size_t copy_amount) {
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
});
}
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
@@ -514,7 +357,7 @@ struct Memory::Impl {
} else {
// Switch page type to uncached if now uncached
switch (page_type) {
case Common::PageType::Unmapped:
case Common::PageType::Unmapped: // NOLINT(bugprone-branch-clone)
// It is not necessary for a process to have this region mapped into its address
// space, for example, a system module need not have a VRAM mapping.
break;
@@ -597,6 +440,44 @@ struct Memory::Impl {
}
}
[[nodiscard]] u8* GetPointerImpl(VAddr vaddr, auto on_unmapped, auto on_rasterizer) const {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
on_unmapped();
return nullptr;
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
return &pointer[vaddr];
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
on_unmapped();
return nullptr;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr);
return nullptr;
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
on_rasterizer();
return host_ptr;
}
default:
UNREACHABLE();
}
return nullptr;
}
[[nodiscard]] u8* GetPointer(const VAddr vaddr) const {
return GetPointerImpl(
vaddr, [vaddr]() { LOG_ERROR(HW_Memory, "Unmapped GetPointer @ 0x{:016X}", vaddr); },
[]() {});
}
/**
* Reads a particular data type out of memory at the given virtual address.
*
@@ -610,39 +491,17 @@ struct Memory::Impl {
*/
template <typename T>
T Read(VAddr vaddr) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
return 0;
T result = 0;
const u8* const ptr = GetPointerImpl(
vaddr,
[vaddr]() {
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, vaddr);
},
[&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); });
if (ptr) {
std::memcpy(&result, ptr, sizeof(T));
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
T value;
std::memcpy(&value, &pointer[vaddr], sizeof(T));
return value;
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
return 0;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().FlushRegion(vaddr, sizeof(T));
T value;
std::memcpy(&value, host_ptr, sizeof(T));
return value;
}
default:
UNREACHABLE();
}
return {};
return result;
}
/**
@@ -656,110 +515,46 @@ struct Memory::Impl {
*/
template <typename T>
void Write(VAddr vaddr, const T data) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return;
}
// Avoid adding any extra logic to this fast-path block
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
std::memcpy(&pointer[vaddr], &data, sizeof(T));
return;
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(T));
std::memcpy(host_ptr, &data, sizeof(T));
break;
}
default:
UNREACHABLE();
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
vaddr, static_cast<u64>(data));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
}
template <typename T>
bool WriteExclusive(VAddr vaddr, const T data, const T expected) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return true;
}
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
// NOTE: Avoid adding any extra logic to this fast-path block
const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]);
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, vaddr, static_cast<u64>(data));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return true;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(T));
auto* pointer = reinterpret_cast<volatile T*>(&host_ptr);
return Common::AtomicCompareAndSwap(pointer, data, expected);
}
default:
UNREACHABLE();
}
return true;
}
bool WriteExclusive128(VAddr vaddr, const u128 data, const u128 expected) {
// AARCH64 masks the upper 16 bit of all memory accesses
vaddr &= 0xffffffffffffLL;
if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data[0]), vaddr);
return true;
}
const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
// NOTE: Avoid adding any extra logic to this fast-path block
const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]);
u8* const ptr = GetPointerImpl(
vaddr,
[vaddr, data]() {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
vaddr, static_cast<u64>(data[1]), static_cast<u64>(data[0]));
},
[&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
}
switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
return true;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case Common::PageType::RasterizerCachedMemory: {
u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
system.GPU().InvalidateRegion(vaddr, sizeof(u128));
auto* pointer = reinterpret_cast<volatile u64*>(&host_ptr);
return Common::AtomicCompareAndSwap(pointer, data, expected);
}
default:
UNREACHABLE();
}
return true;
}
@@ -789,12 +584,11 @@ void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
impl->UnmapRegion(page_table, base, size);
}
bool Memory::IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
return impl->IsValidVirtualAddress(process, vaddr);
}
bool Memory::IsValidVirtualAddress(const VAddr vaddr) const {
return impl->IsValidVirtualAddress(vaddr);
const Kernel::KProcess& process = *system.CurrentProcess();
const auto& page_table = process.PageTable().PageTableImpl();
const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
}
u8* Memory::GetPointer(VAddr vaddr) {
@@ -863,64 +657,38 @@ std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {
void Memory::ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
const std::size_t size) {
impl->ReadBlock(process, src_addr, dest_buffer, size);
impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlock(src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr,
void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
}
void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
}
void Memory::WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size) {
impl->WriteBlock(process, dest_addr, src_buffer, size);
impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
impl->WriteBlock(dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr,
const void* src_buffer, std::size_t size) {
impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
}
void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
const std::size_t size) {
impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
}
void Memory::ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(process, dest_addr, size);
}
void Memory::ZeroBlock(VAddr dest_addr, std::size_t size) {
impl->ZeroBlock(dest_addr, size);
}
void Memory::CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
const std::size_t size) {
impl->CopyBlock(process, dest_addr, src_addr, size);
}
void Memory::CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
impl->CopyBlock(dest_addr, src_addr, size);
}
void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
impl->RasterizerMarkRegionCached(vaddr, size, cached);
}
bool IsKernelVirtualAddress(const VAddr vaddr) {
return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
}
} // namespace Core::Memory

View File

@@ -39,11 +39,6 @@ enum : VAddr {
/// Application stack
DEFAULT_STACK_SIZE = 0x100000,
/// Kernel Virtual Address Range
KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
KERNEL_REGION_SIZE = 0x7FFFE00000,
KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
};
/// Central class that handles all memory operations and state.
@@ -56,7 +51,7 @@ public:
Memory& operator=(const Memory&) = delete;
Memory(Memory&&) = default;
Memory& operator=(Memory&&) = default;
Memory& operator=(Memory&&) = delete;
/**
* Resets the state of the Memory system.
@@ -90,17 +85,6 @@ public:
*/
void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
/**
* Checks whether or not the supplied address is a valid virtual
* address for the given process.
*
* @param process The emulated process to check the address against.
* @param vaddr The virtual address to check the validity of.
*
* @returns True if the given virtual address is valid, false otherwise.
*/
bool IsValidVirtualAddress(const Kernel::KProcess& process, VAddr vaddr) const;
/**
* Checks whether or not the supplied address is a valid virtual
* address for the current process.
@@ -109,7 +93,7 @@ public:
*
* @returns True if the given virtual address is valid, false otherwise.
*/
bool IsValidVirtualAddress(VAddr vaddr) const;
[[nodiscard]] bool IsValidVirtualAddress(VAddr vaddr) const;
/**
* Gets a pointer to the given address.
@@ -134,7 +118,7 @@ public:
* @returns The pointer to the given address, if the address is valid.
* If the address is not valid, nullptr will be returned.
*/
const u8* GetPointer(VAddr vaddr) const;
[[nodiscard]] const u8* GetPointer(VAddr vaddr) const;
template <typename T>
const T* GetPointer(VAddr vaddr) const {
@@ -327,27 +311,6 @@ public:
void ReadBlock(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from a specified process' address space.
* This unsafe version does not trigger GPU flushing.
*
* @param process The process to read the data from.
* @param src_addr The virtual address to begin reading from.
* @param dest_buffer The buffer to place the read bytes into.
* @param size The amount of data to read, in bytes.
*
* @note If a size of 0 is specified, then this function reads nothing and
* no attempts to access memory are made at all.
*
* @pre dest_buffer must be at least size bytes in length, otherwise a
* buffer overrun will occur.
*
* @post The range [dest_buffer, size) contains the read bytes from the
* process' address space.
*/
void ReadBlockUnsafe(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
std::size_t size);
/**
* Reads a contiguous block of bytes from the current process' address space.
*
@@ -408,26 +371,6 @@ public:
void WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into a given process' address space at the specified
* virtual address.
* This unsafe version does not invalidate GPU Memory.
*
* @param process The process to write data into the address space of.
* @param dest_addr The destination virtual address to begin writing the data at.
* @param src_buffer The data to write into the process' address space.
* @param size The size of the data to write, in bytes.
*
* @post The address range [dest_addr, size) in the process' address space
* contains the data that was within src_buffer.
*
* @post If an attempt is made to write into an unmapped region of memory, the writes
* will be ignored and an error will be logged.
*
*/
void WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
std::size_t size);
/**
* Writes a range of bytes into the current process' address space at the specified
* virtual address.
@@ -467,29 +410,6 @@ public:
*/
void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
/**
* Fills the specified address range within a process' address space with zeroes.
*
* @param process The process that will have a portion of its memory zeroed out.
* @param dest_addr The starting virtual address of the range to zero out.
* @param size The size of the address range to zero out, in bytes.
*
* @post The range [dest_addr, size) within the process' address space is
* filled with zeroes.
*/
void ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
/**
* Fills the specified address range within the current process' address space with zeroes.
*
* @param dest_addr The starting virtual address of the range to zero out.
* @param size The size of the address range to zero out, in bytes.
*
* @post The range [dest_addr, size) within the current process' address space is
* filled with zeroes.
*/
void ZeroBlock(VAddr dest_addr, std::size_t size);
/**
* Copies data within a process' address space to another location within the
* same address space.
@@ -505,19 +425,6 @@ public:
void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
std::size_t size);
/**
* Copies data within the current process' address space to another location within the
* same address space.
*
* @param dest_addr The destination virtual address to begin copying the data into.
* @param src_addr The source virtual address to begin copying the data from.
* @param size The size of the data to copy, in bytes.
*
* @post The range [dest_addr, size) within the current process' address space
* contains the same data within the range [src_addr, size).
*/
void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
/**
* Marks each page within the specified address range as cached or uncached.
*
@@ -535,7 +442,4 @@ private:
std::unique_ptr<Impl> impl;
};
/// Determines if the given VAddr is a kernel address
bool IsKernelVirtualAddress(VAddr vaddr);
} // namespace Core::Memory

View File

@@ -304,10 +304,10 @@ std::vector<std::unique_ptr<Polling::DevicePoller>> InputSubsystem::GetPollers([
}
std::string GenerateKeyboardParam(int key_code) {
Common::ParamPackage param{
{"engine", "keyboard"},
{"code", std::to_string(key_code)},
};
Common::ParamPackage param;
param.Set("engine", "keyboard");
param.Set("code", key_code);
param.Set("toggle", false);
return param.Serialize();
}

View File

@@ -57,6 +57,7 @@ Common::ParamPackage MouseButtonFactory::GetNextInput() const {
if (pad.button != MouseInput::MouseButton::Undefined) {
params.Set("engine", "mouse");
params.Set("button", static_cast<u16>(pad.button));
params.Set("toggle", false);
return params;
}
}

View File

@@ -82,6 +82,12 @@ public:
state.buttons.insert_or_assign(button, value);
}
void PreSetButton(int button) {
if (!state.buttons.contains(button)) {
SetButton(button, false);
}
}
void SetMotion(SDL_ControllerSensorEvent event) {
constexpr float gravity_constant = 9.80665f;
std::lock_guard lock{mutex};
@@ -155,9 +161,16 @@ public:
state.axes.insert_or_assign(axis, value);
}
float GetAxis(int axis, float range) const {
void PreSetAxis(int axis) {
if (!state.axes.contains(axis)) {
SetAxis(axis, 0);
}
}
float GetAxis(int axis, float range, float offset) const {
std::lock_guard lock{mutex};
return static_cast<float>(state.axes.at(axis)) / (32767.0f * range);
const float value = static_cast<float>(state.axes.at(axis)) / 32767.0f;
return (value + offset) / range;
}
bool RumblePlay(u16 amp_low, u16 amp_high) {
@@ -174,9 +187,10 @@ public:
return false;
}
std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range) const {
float x = GetAxis(axis_x, range);
float y = GetAxis(axis_y, range);
std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range, float offset_x,
float offset_y) const {
float x = GetAxis(axis_x, range, offset_x);
float y = GetAxis(axis_y, range, offset_y);
y = -y; // 3DS uses an y-axis inverse from SDL
// Make sure the coordinates are in the unit circle,
@@ -483,7 +497,7 @@ public:
trigger_if_greater(trigger_if_greater_) {}
bool GetStatus() const override {
const float axis_value = joystick->GetAxis(axis, 1.0f);
const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
if (trigger_if_greater) {
return axis_value > threshold;
}
@@ -500,12 +514,14 @@ private:
class SDLAnalog final : public Input::AnalogDevice {
public:
explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
bool invert_x_, bool invert_y_, float deadzone_, float range_)
bool invert_x_, bool invert_y_, float deadzone_, float range_,
float offset_x_, float offset_y_)
: joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
invert_y(invert_y_), deadzone(deadzone_), range(range_) {}
invert_y(invert_y_), deadzone(deadzone_), range(range_), offset_x(offset_x_),
offset_y(offset_y_) {}
std::tuple<float, float> GetStatus() const override {
auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range, offset_x, offset_y);
const float r = std::sqrt((x * x) + (y * y));
if (invert_x) {
x = -x;
@@ -522,8 +538,8 @@ public:
}
std::tuple<float, float> GetRawStatus() const override {
const float x = joystick->GetAxis(axis_x, range);
const float y = joystick->GetAxis(axis_y, range);
const float x = joystick->GetAxis(axis_x, range, offset_x);
const float y = joystick->GetAxis(axis_y, range, offset_y);
return {x, -y};
}
@@ -555,6 +571,8 @@ private:
const bool invert_y;
const float deadzone;
const float range;
const float offset_x;
const float offset_y;
};
class SDLVibration final : public Input::VibrationDevice {
@@ -621,7 +639,7 @@ public:
trigger_if_greater(trigger_if_greater_) {}
Input::MotionStatus GetStatus() const override {
const float axis_value = joystick->GetAxis(axis, 1.0f);
const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
bool trigger = axis_value < threshold;
if (trigger_if_greater) {
trigger = axis_value > threshold;
@@ -720,13 +738,13 @@ public:
LOG_ERROR(Input, "Unknown direction {}", direction_name);
}
// This is necessary so accessing GetAxis with axis won't crash
joystick->SetAxis(axis, 0);
joystick->PreSetAxis(axis);
return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
}
const int button = params.Get("button", 0);
// This is necessary so accessing GetButton with button won't crash
joystick->SetButton(button, false);
joystick->PreSetButton(button);
return std::make_unique<SDLButton>(joystick, button, toggle);
}
@@ -757,13 +775,15 @@ public:
const std::string invert_y_value = params.Get("invert_y", "+");
const bool invert_x = invert_x_value == "-";
const bool invert_y = invert_y_value == "-";
const float offset_x = params.Get("offset_x", 0.0f);
const float offset_y = params.Get("offset_y", 0.0f);
auto joystick = state.GetSDLJoystickByGUID(guid, port);
// This is necessary so accessing GetAxis with axis_x and axis_y won't crash
joystick->SetAxis(axis_x, 0);
joystick->SetAxis(axis_y, 0);
joystick->PreSetAxis(axis_x);
joystick->PreSetAxis(axis_y);
return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
range);
range, offset_x, offset_y);
}
private:
@@ -844,13 +864,13 @@ public:
LOG_ERROR(Input, "Unknown direction {}", direction_name);
}
// This is necessary so accessing GetAxis with axis won't crash
joystick->SetAxis(axis, 0);
joystick->PreSetAxis(axis);
return std::make_unique<SDLAxisMotion>(joystick, axis, threshold, trigger_if_greater);
}
const int button = params.Get("button", 0);
// This is necessary so accessing GetButton with button won't crash
joystick->SetButton(button, false);
joystick->PreSetButton(button);
return std::make_unique<SDLButtonMotion>(joystick, button);
}
@@ -995,6 +1015,7 @@ Common::ParamPackage BuildButtonParamPackageForButton(int port, std::string guid
params.Set("port", port);
params.Set("guid", std::move(guid));
params.Set("button", button);
params.Set("toggle", false);
return params;
}
@@ -1134,13 +1155,15 @@ Common::ParamPackage BuildParamPackageForBinding(int port, const std::string& gu
}
Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& guid, int axis_x,
int axis_y) {
int axis_y, float offset_x, float offset_y) {
Common::ParamPackage params;
params.Set("engine", "sdl");
params.Set("port", port);
params.Set("guid", guid);
params.Set("axis_x", axis_x);
params.Set("axis_y", axis_y);
params.Set("offset_x", offset_x);
params.Set("offset_y", offset_y);
params.Set("invert_x", "+");
params.Set("invert_y", "+");
return params;
@@ -1342,24 +1365,39 @@ AnalogMapping SDLState::GetAnalogMappingForDevice(const Common::ParamPackage& pa
const auto& binding_left_y =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_LEFTY);
if (params.Has("guid2")) {
joystick2->PreSetAxis(binding_left_x.value.axis);
joystick2->PreSetAxis(binding_left_y.value.axis);
const auto left_offset_x = -joystick2->GetAxis(binding_left_x.value.axis, 1.0f, 0);
const auto left_offset_y = -joystick2->GetAxis(binding_left_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(
Settings::NativeAnalog::LStick,
BuildParamPackageForAnalog(joystick2->GetPort(), joystick2->GetGUID(),
binding_left_x.value.axis, binding_left_y.value.axis));
binding_left_x.value.axis, binding_left_y.value.axis,
left_offset_x, left_offset_y));
} else {
joystick->PreSetAxis(binding_left_x.value.axis);
joystick->PreSetAxis(binding_left_y.value.axis);
const auto left_offset_x = -joystick->GetAxis(binding_left_x.value.axis, 1.0f, 0);
const auto left_offset_y = -joystick->GetAxis(binding_left_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(
Settings::NativeAnalog::LStick,
BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
binding_left_x.value.axis, binding_left_y.value.axis));
binding_left_x.value.axis, binding_left_y.value.axis,
left_offset_x, left_offset_y));
}
const auto& binding_right_x =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTX);
const auto& binding_right_y =
SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTY);
joystick->PreSetAxis(binding_right_x.value.axis);
joystick->PreSetAxis(binding_right_y.value.axis);
const auto right_offset_x = -joystick->GetAxis(binding_right_x.value.axis, 1.0f, 0);
const auto right_offset_y = -joystick->GetAxis(binding_right_y.value.axis, 1.0f, 0);
mapping.insert_or_assign(Settings::NativeAnalog::RStick,
BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
binding_right_x.value.axis,
binding_right_y.value.axis));
binding_right_y.value.axis, right_offset_x,
right_offset_y));
return mapping;
}
@@ -1563,8 +1601,9 @@ public:
}
if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) {
// Set offset to zero since the joystick is not on center
auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
first_axis, axis);
first_axis, axis, 0, 0);
first_axis = -1;
return params;
}

View File

@@ -97,6 +97,7 @@ add_library(video_core STATIC
renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp
renderer_opengl/gl_query_cache.cpp
renderer_opengl/gl_query_cache.h
renderer_opengl/maxwell_to_gl.h
@@ -155,6 +156,7 @@ add_library(video_core STATIC
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp
@@ -186,6 +188,7 @@ add_library(video_core STATIC
texture_cache/samples_helper.h
texture_cache/slot_vector.h
texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
texture_cache/types.h
texture_cache/util.cpp
texture_cache/util.h

View File

@@ -397,14 +397,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
next_frame = std::move(temp);
} else {
next_frame.info = current_frame.info;
next_frame.bit_stream = std::move(current_frame.bit_stream);
next_frame.bit_stream = current_frame.bit_stream;
}
return current_frame;
}
std::vector<u8> VP9::ComposeCompressedHeader() {
VpxRangeEncoder writer{};
const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame;
if (!current_frame_info.lossless) {
if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
writer.Write(3, 2);

View File

@@ -176,7 +176,7 @@ struct PictureInfo {
.frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
.error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
.last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
.show_frame = false,
.show_frame = true,
.ref_frame_sign_bias = ref_frame_sign_bias,
.base_q_index = base_q_index,
.y_dc_delta_q = y_dc_delta_q,

View File

@@ -10,33 +10,27 @@
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
#define BINDING_INPUT_BUFFER 0
#define BINDING_ENC_BUFFER 1
#define BINDING_SWIZZLE_BUFFER 2
#define BINDING_OUTPUT_IMAGE 3
#define BINDING_OUTPUT_IMAGE 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
#define UNIFORM(n) layout(location = n) uniform
#define BINDING_SWIZZLE_BUFFER 0
#define BINDING_INPUT_BUFFER 1
#define BINDING_ENC_BUFFER 2
#define BINDING_INPUT_BUFFER 0
#define BINDING_OUTPUT_IMAGE 0
#endif
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
BEGIN_PUSH_CONSTANTS
UNIFORM(1) uvec2 block_dims;
UNIFORM(2) uint bytes_per_block_log2;
UNIFORM(3) uint layer_stride;
UNIFORM(4) uint block_size;
UNIFORM(5) uint x_shift;
UNIFORM(6) uint block_height;
UNIFORM(7) uint block_height_mask;
UNIFORM(2) uint layer_stride;
UNIFORM(3) uint block_size;
UNIFORM(4) uint x_shift;
UNIFORM(5) uint block_height;
UNIFORM(6) uint block_height_mask;
END_PUSH_CONSTANTS
struct EncodingData {
@@ -55,45 +49,35 @@ struct TexelWeightParams {
bool void_extent_hdr;
};
// Swizzle data
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
uint swizzle_table[];
};
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
uint astc_data[];
};
// ASTC Encodings data
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
EncodingData encoding_values[];
uvec4 astc_data[];
};
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
const uint GOB_SIZE_X = 64;
const uint GOB_SIZE_Y = 8;
const uint GOB_SIZE_Z = 1;
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
const uint GOB_SIZE_X_SHIFT = 6;
const uint GOB_SIZE_Y_SHIFT = 3;
const uint GOB_SIZE_Z_SHIFT = 0;
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT;
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
const int BLOCK_SIZE_IN_BYTES = 16;
const int BLOCK_INFO_ERROR = 0;
const int BLOCK_INFO_VOID_EXTENT_HDR = 1;
const int BLOCK_INFO_VOID_EXTENT_LDR = 2;
const int BLOCK_INFO_NORMAL = 3;
const uint BYTES_PER_BLOCK_LOG2 = 4;
const int JUST_BITS = 0;
const int QUINT = 1;
const int TRIT = 2;
// ASTC Encodings data, sorted in ascending order based on their BitLength value
// (see GetBitLength() function)
EncodingData encoding_values[22] = EncodingData[](
EncodingData(JUST_BITS, 0, 0, 0), EncodingData(JUST_BITS, 1, 0, 0), EncodingData(TRIT, 0, 0, 0),
EncodingData(JUST_BITS, 2, 0, 0), EncodingData(QUINT, 0, 0, 0), EncodingData(TRIT, 1, 0, 0),
EncodingData(JUST_BITS, 3, 0, 0), EncodingData(QUINT, 1, 0, 0), EncodingData(TRIT, 2, 0, 0),
EncodingData(JUST_BITS, 4, 0, 0), EncodingData(QUINT, 2, 0, 0), EncodingData(TRIT, 3, 0, 0),
EncodingData(JUST_BITS, 5, 0, 0), EncodingData(QUINT, 3, 0, 0), EncodingData(TRIT, 4, 0, 0),
EncodingData(JUST_BITS, 6, 0, 0), EncodingData(QUINT, 4, 0, 0), EncodingData(TRIT, 5, 0, 0),
EncodingData(JUST_BITS, 7, 0, 0), EncodingData(QUINT, 5, 0, 0), EncodingData(TRIT, 6, 0, 0),
EncodingData(JUST_BITS, 8, 0, 0)
);
// The following constants are expanded variants of the Replicate()
// function calls corresponding to the following arguments:
// value: index into the generated table
@@ -135,44 +119,37 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
// Input ASTC texture globals
uint current_index = 0;
int bitsread = 0;
uint total_bitsread = 0;
uint local_buff[16];
int total_bitsread = 0;
uvec4 local_buff;
// Color data globals
uint color_endpoint_data[16];
uvec4 color_endpoint_data;
int color_bitsread = 0;
uint total_color_bitsread = 0;
int color_index = 0;
// Four values, two endpoints, four maximum paritions
uint color_values[32];
int colvals_index = 0;
// Weight data globals
uint texel_weight_data[16];
uvec4 texel_weight_data;
int texel_bitsread = 0;
uint total_texel_bitsread = 0;
int texel_index = 0;
bool texel_flag = false;
// Global "vectors" to be pushed into when decoding
EncodingData result_vector[100];
EncodingData result_vector[144];
int result_index = 0;
EncodingData texel_vector[100];
EncodingData texel_vector[144];
int texel_vector_index = 0;
uint unquantized_texel_weights[2][144];
uint SwizzleOffset(uvec2 pos) {
pos = pos & SWIZZLE_MASK;
return swizzle_table[pos.y * 64 + pos.x];
}
uint ReadTexel(uint offset) {
// extract the 8-bit value from the 32-bit packed data.
return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8);
uint x = pos.x;
uint y = pos.y;
return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
(y % 2) * 16 + (x % 16);
}
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
@@ -278,14 +255,10 @@ uint Hash52(uint p) {
return p;
}
uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) {
if (partition_count == 1) {
return 0;
}
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) {
if (small_block) {
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partition_count - 1) * 1024;
@@ -299,10 +272,6 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
uint seed6 = uint((rnum >> 20) & 0xF);
uint seed7 = uint((rnum >> 24) & 0xF);
uint seed8 = uint((rnum >> 28) & 0xF);
uint seed9 = uint((rnum >> 18) & 0xF);
uint seed10 = uint((rnum >> 22) & 0xF);
uint seed11 = uint((rnum >> 26) & 0xF);
uint seed12 = uint(((rnum >> 30) | (rnum << 2)) & 0xF);
seed1 = (seed1 * seed1);
seed2 = (seed2 * seed2);
@@ -312,12 +281,8 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
seed6 = (seed6 * seed6);
seed7 = (seed7 * seed7);
seed8 = (seed8 * seed8);
seed9 = (seed9 * seed9);
seed10 = (seed10 * seed10);
seed11 = (seed11 * seed11);
seed12 = (seed12 * seed12);
int sh1, sh2, sh3;
uint sh1, sh2;
if ((seed & 1) > 0) {
sh1 = (seed & 2) > 0 ? 4 : 5;
sh2 = (partition_count == 3) ? 6 : 5;
@@ -325,25 +290,19 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
sh1 = (partition_count == 3) ? 6 : 5;
sh2 = (seed & 2) > 0 ? 4 : 5;
}
sh3 = (seed & 0x10) > 0 ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed1 = (seed1 >> sh1);
seed2 = (seed2 >> sh2);
seed3 = (seed3 >> sh1);
seed4 = (seed4 >> sh2);
seed5 = (seed5 >> sh1);
seed6 = (seed6 >> sh2);
seed7 = (seed7 >> sh1);
seed8 = (seed8 >> sh2);
seed9 = (seed9 >> sh3);
seed10 = (seed10 >> sh3);
seed11 = (seed11 >> sh3);
seed12 = (seed12 >> sh3);
uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
uint a = seed1 * x + seed2 * y + (rnum >> 14);
uint b = seed3 * x + seed4 * y + (rnum >> 10);
uint c = seed5 * x + seed6 * y + (rnum >> 6);
uint d = seed7 * x + seed8 * y + (rnum >> 2);
a &= 0x3F;
b &= 0x3F;
@@ -368,58 +327,37 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo
}
}
uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) {
return SelectPartition(seed, x, y, 0, partition_count, small_block);
}
uint ReadBit() {
if (current_index >= local_buff.length()) {
uint ExtractBits(uvec4 payload, int offset, int bits) {
if (bits <= 0) {
return 0;
}
uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1);
++bitsread;
++total_bitsread;
if (bitsread == 8) {
++current_index;
bitsread = 0;
int last_offset = offset + bits - 1;
int shifted_offset = offset >> 5;
if ((last_offset >> 5) == shifted_offset) {
return bitfieldExtract(payload[shifted_offset], offset & 31, bits);
}
return bit;
int first_bits = 32 - (offset & 31);
int result_first = int(bitfieldExtract(payload[shifted_offset], offset & 31, first_bits));
int result_second = int(bitfieldExtract(payload[shifted_offset + 1], 0, bits - first_bits));
return result_first | (result_second << first_bits);
}
uint StreamBits(uint num_bits) {
uint ret = 0;
for (uint i = 0; i < num_bits; i++) {
ret |= ((ReadBit() & 1) << i);
}
int int_bits = int(num_bits);
uint ret = ExtractBits(local_buff, total_bitsread, int_bits);
total_bitsread += int_bits;
return ret;
}
uint ReadColorBit() {
uint bit = 0;
if (texel_flag) {
bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1);
++texel_bitsread;
++total_texel_bitsread;
if (texel_bitsread == 8) {
++texel_index;
texel_bitsread = 0;
}
} else {
bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1);
++color_bitsread;
++total_color_bitsread;
if (color_bitsread == 8) {
++color_index;
color_bitsread = 0;
}
}
return bit;
}
uint StreamColorBits(uint num_bits) {
uint ret = 0;
for (uint i = 0; i < num_bits; i++) {
ret |= ((ReadColorBit() & 1) << i);
int int_bits = int(num_bits);
if (texel_flag) {
ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
texel_bitsread += int_bits;
} else {
ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
color_bitsread += int_bits;
}
return ret;
}
@@ -596,22 +534,16 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
for (uint i = 0; i < num_partitions; i++) {
num_values += ((modes[i] >> 2) + 1) << 1;
}
int range = 256;
while (--range > 0) {
EncodingData val = encoding_values[range];
// Find the largest encoding that's within color_data_bits
// TODO(ameerj): profile with binary search
int range = 0;
while (++range < encoding_values.length()) {
uint bit_length = GetBitLength(num_values, range);
if (bit_length <= color_data_bits) {
while (--range > 0) {
EncodingData newval = encoding_values[range];
if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) {
break;
}
}
++range;
if (bit_length > color_data_bits) {
break;
}
}
DecodeIntegerSequence(range, num_values);
DecodeIntegerSequence(range - 1, num_values);
uint out_index = 0;
for (int itr = 0; itr < result_index; ++itr) {
if (out_index >= num_values) {
@@ -1028,7 +960,7 @@ int FindLayout(uint mode) {
return 5;
}
TexelWeightParams DecodeBlockInfo(uint block_index) {
TexelWeightParams DecodeBlockInfo() {
TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false);
uint mode = StreamBits(11);
if ((mode & 0x1ff) == 0x1fc) {
@@ -1110,10 +1042,10 @@ TexelWeightParams DecodeBlockInfo(uint block_index) {
}
weight_index -= 2;
if ((mode_layout != 9) && ((mode & 0x200) != 0)) {
const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31);
const int max_weights[6] = int[6](7, 8, 9, 10, 11, 12);
params.max_weight = max_weights[weight_index];
} else {
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7);
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 6);
params.max_weight = max_weights[weight_index];
}
return params;
@@ -1144,8 +1076,8 @@ void FillVoidExtentLDR(ivec3 coord) {
}
}
void DecompressBlock(ivec3 coord, uint block_index) {
TexelWeightParams params = DecodeBlockInfo(block_index);
void DecompressBlock(ivec3 coord) {
TexelWeightParams params = DecodeBlockInfo();
if (params.error_state) {
FillError(coord);
return;
@@ -1212,7 +1144,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
// Read color data...
uint color_data_bits = remaining_bits;
while (remaining_bits > 0) {
int nb = int(min(remaining_bits, 8U));
int nb = int(min(remaining_bits, 32U));
uint b = StreamBits(nb);
color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb));
++ced_pointer;
@@ -1254,25 +1186,20 @@ void DecompressBlock(ivec3 coord, uint block_index) {
ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
}
for (uint i = 0; i < 16; i++) {
texel_weight_data[i] = local_buff[i];
}
for (uint i = 0; i < 8; i++) {
#define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16
uint a = REVERSE_BYTE(texel_weight_data[i]);
uint b = REVERSE_BYTE(texel_weight_data[15 - i]);
#undef REVERSE_BYTE
texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8));
texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8));
}
texel_weight_data = local_buff;
texel_weight_data = bitfieldReverse(texel_weight_data).wzyx;
uint clear_byte_start =
(GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
texel_weight_data[clear_byte_start - 1] =
texel_weight_data[clear_byte_start - 1] &
uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) &
uint(
((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
for (uint i = 0; i < 16 - clear_byte_start; i++) {
texel_weight_data[clear_byte_start + i] = 0U;
uint vec_index = (clear_byte_start - 1) >> 2;
texel_weight_data[vec_index] =
bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
for (uint i = clear_byte_start; i < 16; ++i) {
uint idx = i >> 2;
texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8);
}
texel_flag = true; // use texel "vector" and bit stream in integer decoding
DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
@@ -1281,8 +1208,11 @@ void DecompressBlock(ivec3 coord, uint block_index) {
for (uint j = 0; j < block_dims.y; j++) {
for (uint i = 0; i < block_dims.x; i++) {
uint local_partition = Select2DPartition(partition_index, i, j, num_partitions,
uint local_partition = 0;
if (num_partitions > 1) {
local_partition = Select2DPartition(partition_index, i, j, num_partitions,
(block_dims.y * block_dims.x) < 32);
}
vec4 p;
uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]);
uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]);
@@ -1303,7 +1233,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
void main() {
uvec3 pos = gl_GlobalInvocationID;
pos.x <<= bytes_per_block_log2;
pos.x <<= BYTES_PER_BLOCK_LOG2;
// Read as soon as possible due to its latency
const uint swizzle = SwizzleOffset(pos.xy);
@@ -1321,13 +1251,8 @@ void main() {
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
return;
}
uint block_index =
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
current_index = 0;
bitsread = 0;
for (int i = 0; i < 16; i++) {
local_buff[i] = ReadTexel(offset + i);
}
DecompressBlock(coord, block_index);
local_buff = astc_data[offset / 16];
DecompressBlock(coord);
}

View File

@@ -15,7 +15,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]

View File

@@ -32,7 +32,7 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL {

View File

@@ -18,10 +18,8 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
namespace {

View File

@@ -12,7 +12,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL {

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
}

View File

@@ -60,19 +60,14 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
swizzle_table_buffer.Create();
astc_buffer.Create();
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
0);
}
UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_ENC_BUFFER = 2;
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
const Extent2D tile_size{
@@ -80,34 +75,32 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(1, tile_size.width, tile_size.height);
// Ensure buffer data is valid before dispatching
glFlush();
for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
glUniform1ui(2, params.bytes_per_block_log2);
glUniform1ui(3, params.layer_stride);
glUniform1ui(4, params.block_size);
glUniform1ui(5, params.x_shift);
glUniform1ui(6, params.block_height);
glUniform1ui(7, params.block_height_mask);
glUniform1ui(2, params.layer_stride);
glUniform1ui(3, params.block_size);
glUniform1ui(4, params.x_shift);
glUniform1ui(5, params.block_height);
glUniform1ui(6, params.block_height_mask);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
// ASTC texture data
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
}

View File

@@ -62,7 +62,6 @@ private:
ProgramManager& program_manager;
OGLBuffer swizzle_table_buffer;
OGLBuffer astc_buffer;
OGLProgram astc_decoder_program;
OGLProgram block_linear_unswizzle_2d_program;

View File

@@ -30,16 +30,12 @@
namespace Vulkan {
using Tegra::Texture::SWIZZLE_TABLE;
using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
using namespace Tegra::Texture::ASTC;
namespace {
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
constexpr size_t ASTC_NUM_BINDINGS = 4;
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
constexpr size_t ASTC_NUM_BINDINGS = 2;
template <size_t size>
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
@@ -75,7 +71,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
.score = 2,
};
constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
{
.binding = ASTC_BINDING_INPUT_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
@@ -83,20 +79,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_ENC_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
{
.binding = ASTC_BINDING_OUTPUT_IMAGE,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -108,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
constexpr DescriptorBankInfo ASTC_BANK_INFO{
.uniform_buffers = 0,
.storage_buffers = 3,
.storage_buffers = 1,
.texture_buffers = 0,
.image_buffers = 0,
.textures = 0,
.images = 1,
.score = 4,
.score = 2,
};
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
@@ -135,22 +117,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_ENC_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry),
.stride = sizeof(DescriptorUpdateEntry),
},
{
.dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
.dstArrayElement = 0,
@@ -163,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
struct AstcPushConstants {
std::array<u32, 2> blocks_dims;
u32 bytes_per_block_log2;
u32 layer_stride;
u32 block_size;
u32 x_shift;
@@ -354,46 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
ASTCDecoderPass::~ASTCDecoderPass() = default;
void ASTCDecoderPass::MakeDataBuffer() {
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = TOTAL_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
sizeof(ASTC_ENCODINGS_VALUES));
// Tack on the swizzle table at the end of the buffer
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
sizeof(SWIZZLE_TABLE));
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
static constexpr VkMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
};
const VkBufferCopy copy{
.srcOffset = offset,
.dstOffset = 0,
.size = TOTAL_BUFFER_SIZE,
};
cmdbuf.CopyBuffer(src, dst, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, write_barrier);
});
}
void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {
using namespace VideoCommon::Accelerated;
@@ -402,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
scheduler.RequestOutsideRenderPassOperationContext();
if (!data_buffer) {
MakeDataBuffer();
}
const VkPipeline vk_pipeline = *pipeline;
const VkImageAspectFlags aspect_mask = image.AspectMask();
const VkImage vk_image = image.Handle();
@@ -436,16 +358,13 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
});
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
sizeof(SWIZZLE_TABLE));
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
@@ -453,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
ASSERT(params.bytes_per_block_log2 == 4);
scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
params, descriptor_data](vk::CommandBuffer cmdbuf) {
const AstcPushConstants uniforms{
.blocks_dims = block_dims,
.bytes_per_block_log2 = params.bytes_per_block_log2,
.layer_stride = params.layer_stride,
.block_size = params.block_size,
.x_shift = params.x_shift,

View File

@@ -96,15 +96,10 @@ public:
std::span<const VideoCommon::SwizzleParameters> swizzles);
private:
void MakeDataBuffer();
VKScheduler& scheduler;
StagingBufferPool& staging_buffer_pool;
VKUpdateDescriptorQueue& update_descriptor_queue;
MemoryAllocator& memory_allocator;
vk::Buffer data_buffer;
MemoryCommit data_buffer_commit;
};
} // namespace Vulkan

View File

@@ -32,7 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"

View File

@@ -19,6 +19,8 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"

View File

@@ -9,7 +9,7 @@
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"

View File

@@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
}

View File

@@ -6,7 +6,7 @@
#include "common/assert.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
@@ -14,6 +14,8 @@ namespace VideoCommon {
namespace {
using Tegra::Texture::TextureType;
constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {

View File

@@ -4,48 +4,11 @@
#pragma once
#include <algorithm>
#include <array>
#include <bit>
#include <memory>
#include <mutex>
#include <optional>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace VideoCommon {
@@ -61,352 +24,6 @@ using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateColorBuffer(size_t index);
/// Invalidate the contents of the depth buffer
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateDepthBuffer();
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -820,40 +437,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
}
}
template <class P>
void TextureCache<P>::InvalidateColorBuffer(size_t index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
color_buffer_id = FindColorBuffer(index, false);
if (!color_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
return;
}
// When invalidating a color buffer, the old contents are no longer relevant
ImageView& color_buffer = slot_image_views[color_buffer_id];
Image& image = slot_images[color_buffer.image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
runtime.InvalidateColorBuffer(color_buffer, index);
}
template <class P>
void TextureCache<P>::InvalidateDepthBuffer() {
ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
depth_buffer_id = FindDepthBuffer(false);
if (!depth_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
return;
}
// When invalidating the depth buffer, the old contents are no longer relevant
ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
ImageView& depth_buffer = slot_image_views[depth_buffer_id];
runtime.InvalidateDepthBuffer(depth_buffer);
}
template <class P>
typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
// TODO: Properly implement this

View File

@@ -0,0 +1,385 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <mutex>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
} // namespace VideoCommon

View File

@@ -151,6 +151,76 @@ private:
const IntType& m_Bits;
};
enum class IntegerEncoding { JustBits, Quint, Trit };
struct IntegerEncodedValue {
constexpr IntegerEncodedValue() = default;
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
: encoding{encoding_}, num_bits{num_bits_} {}
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
return encoding == other.encoding && num_bits == other.num_bits;
}
// Returns the number of bits required to encode num_vals values.
u32 GetBitLength(u32 num_vals) const {
u32 total_bits = num_bits * num_vals;
if (encoding == IntegerEncoding::Trit) {
total_bits += (num_vals * 8 + 4) / 5;
} else if (encoding == IntegerEncoding::Quint) {
total_bits += (num_vals * 7 + 2) / 3;
}
return total_bits;
}
IntegerEncoding encoding{};
u32 num_bits = 0;
u32 bit_value = 0;
union {
u32 quint_value = 0;
u32 trit_value;
};
};
// Returns a new instance of this struct that corresponds to the
// can take no more than mav_value values
static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
while (mav_value > 0) {
u32 check = mav_value + 1;
// Is mav_value a power of two?
if (!(check & (check - 1))) {
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
}
// Is mav_value of the type 3*2^n - 1?
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
}
// Is mav_value of the type 5*2^n - 1?
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
}
// Apparently it can't be represented with a bounded integer sequence...
// just iterate.
mav_value--;
}
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
}
static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
std::array<IntegerEncodedValue, 256> encodings{};
for (std::size_t i = 0; i < encodings.size(); ++i) {
encodings[i] = CreateEncoding(static_cast<u32>(i));
}
return encodings;
}
static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
namespace Tegra::Texture::ASTC {
using IntegerEncodedVector = boost::container::static_vector<
IntegerEncodedValue, 256,
@@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
return params;
}
static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
strm.ReadBits<13>();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
template <typename IntType>
static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
if (num_bits == 0 || to_bit == 0) {
return 0;
}
// Decode the RGBA components and renormalize them to the range [0, 255]
u16 r = static_cast<u16>(strm.ReadBits<16>());
u16 g = static_cast<u16>(strm.ReadBits<16>());
u16 b = static_cast<u16>(strm.ReadBits<16>());
u16 a = static_cast<u16>(strm.ReadBits<16>());
u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
(static_cast<u32>(a) & 0xFF00) << 16;
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = rgba;
const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
IntType res = v;
u32 reslen = num_bits;
while (reslen < to_bit) {
u32 comp = 0;
if (num_bits > to_bit - reslen) {
u32 newshift = to_bit - reslen;
comp = num_bits - newshift;
num_bits = newshift;
}
res = static_cast<IntType>(res << num_bits);
res = static_cast<IntType>(res | (v >> comp));
reslen += num_bits;
}
return res;
}
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
}
static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
return std::size_t(1) << num_bits;
}
template <typename IntType, u32 num_bits, u32 to_bit>
static constexpr auto MakeReplicateTable() {
std::array<IntType, NumReplicateEntries(num_bits)> table{};
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
table[value] = Replicate(value, num_bits, to_bit);
}
return table;
}
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
/// to the runtime implementation
static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
#undef READ_INT_VALUES
}
static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
u32 blockHeight) {
// Don't actually care about the void extent, just read the bits...
for (s32 i = 0; i < 4; ++i) {
strm.ReadBits<13>();
}
// Decode the RGBA components and renormalize them to the range [0, 255]
u16 r = static_cast<u16>(strm.ReadBits<16>());
u16 g = static_cast<u16>(strm.ReadBits<16>());
u16 b = static_cast<u16>(strm.ReadBits<16>());
u16 a = static_cast<u16>(strm.ReadBits<16>());
u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
(static_cast<u32>(a) & 0xFF00) << 16;
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = rgba;
}
}
}
static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
for (u32 j = 0; j < blockHeight; j++) {
for (u32 i = 0; i < blockWidth; i++) {
outBuf[j * blockWidth + i] = 0xFFFF00FF;
}
}
}
static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
InputBitStream strm(inBuf);

View File

@@ -9,117 +9,6 @@
namespace Tegra::Texture::ASTC {
enum class IntegerEncoding { JustBits, Quint, Trit };
struct IntegerEncodedValue {
constexpr IntegerEncodedValue() = default;
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
: encoding{encoding_}, num_bits{num_bits_} {}
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
return encoding == other.encoding && num_bits == other.num_bits;
}
// Returns the number of bits required to encode num_vals values.
u32 GetBitLength(u32 num_vals) const {
u32 total_bits = num_bits * num_vals;
if (encoding == IntegerEncoding::Trit) {
total_bits += (num_vals * 8 + 4) / 5;
} else if (encoding == IntegerEncoding::Quint) {
total_bits += (num_vals * 7 + 2) / 3;
}
return total_bits;
}
IntegerEncoding encoding{};
u32 num_bits = 0;
u32 bit_value = 0;
union {
u32 quint_value = 0;
u32 trit_value;
};
};
// Returns a new instance of this struct that corresponds to the
// can take no more than mav_value values
constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
while (mav_value > 0) {
u32 check = mav_value + 1;
// Is mav_value a power of two?
if (!(check & (check - 1))) {
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
}
// Is mav_value of the type 3*2^n - 1?
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
}
// Is mav_value of the type 5*2^n - 1?
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
}
// Apparently it can't be represented with a bounded integer sequence...
// just iterate.
mav_value--;
}
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
}
constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
std::array<IntegerEncodedValue, 256> encodings{};
for (std::size_t i = 0; i < encodings.size(); ++i) {
encodings[i] = CreateEncoding(static_cast<u32>(i));
}
return encodings;
}
constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
// is the same as [(num_bits - 1):0] and repeats all the way down.
template <typename IntType>
constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
if (num_bits == 0 || to_bit == 0) {
return 0;
}
const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
IntType res = v;
u32 reslen = num_bits;
while (reslen < to_bit) {
u32 comp = 0;
if (num_bits > to_bit - reslen) {
u32 newshift = to_bit - reslen;
comp = num_bits - newshift;
num_bits = newshift;
}
res = static_cast<IntType>(res << num_bits);
res = static_cast<IntType>(res | (v >> comp));
reslen += num_bits;
}
return res;
}
constexpr std::size_t NumReplicateEntries(u32 num_bits) {
return std::size_t(1) << num_bits;
}
template <typename IntType, u32 num_bits, u32 to_bit>
constexpr auto MakeReplicateTable() {
std::array<IntType, NumReplicateEntries(num_bits)> table{};
for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
table[value] = Replicate(value, num_bits, to_bit);
}
return table;
}
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);

View File

@@ -1161,7 +1161,8 @@ void Device::CollectPhysicalMemoryInfo() {
}
void Device::CollectToolingInfo() {
if (!ext_tooling_info) {
if (!ext_tooling_info || true) {
// Disabled to work around https://github.com/yuzu-emu/yuzu/issues/6835
return;
}
const auto vkGetPhysicalDeviceToolPropertiesEXT =

View File

@@ -228,7 +228,9 @@ void MemoryCommit::Release() {
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
export_allocations{export_allocations_} {}
export_allocations{export_allocations_},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
MemoryAllocator::~MemoryAllocator() = default;
@@ -258,7 +260,9 @@ MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage
}
MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image);
requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity);
auto commit = Commit(requirements, usage);
image.BindMemory(commit.Memory(), commit.Offset());
return commit;
}

View File

@@ -123,6 +123,8 @@ private:
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
const bool export_allocations; ///< True when memory allocations have to be exported.
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
};
/// Returns true when a memory usage is guaranteed to be host visible.

View File

@@ -24,6 +24,36 @@
<layout class="QHBoxLayout" name="GeneralHorizontalLayout">
<item>
<layout class="QVBoxLayout" name="GeneralVerticalLayout">
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QLabel" name="fps_cap_label">
<property name="text">
<string>Framerate Cap</string>
</property>
<property name="toolTip">
<string>Requires the use of the FPS Limiter Toggle hotkey to take effect.</string>
</property>
</widget>
</item>
<item>
<widget class="QSpinBox" name="fps_cap">
<property name="suffix">
<string>x</string>
</property>
<property name="minimum">
<number>1</number>
</property>
<property name="maximum">
<number>1000</number>
</property>
<property name="value">
<number>500</number>
</property>
</widget>
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
@@ -51,36 +81,6 @@
</item>
</layout>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QLabel" name="fps_cap_label">
<property name="text">
<string>Framerate Cap</string>
</property>
<property name="toolTip">
<string>Requires the use of the FPS Limiter Toggle hotkey to take effect.</string>
</property>
</widget>
</item>
<item>
<widget class="QSpinBox" name="fps_cap">
<property name="suffix">
<string>x</string>
</property>
<property name="minimum">
<number>1</number>
</property>
<property name="maximum">
<number>1000</number>
</property>
<property name="value">
<number>500</number>
</property>
</widget>
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="use_multi_core">
<property name="text">

View File

@@ -82,14 +82,17 @@
<string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
</property>
<property name="text">
<string>Use asynchronous shader building</string>
<string>Use asynchronous shader building (hack)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_fast_gpu_time">
<property name="toolTip">
<string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
</property>
<property name="text">
<string>Use Fast GPU Time</string>
<string>Use Fast GPU Time (hack)</string>
</property>
</widget>
</item>

View File

@@ -309,11 +309,14 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
buttons_param[button_id].Clear();
button_map[button_id]->setText(tr("[not set]"));
});
context_menu.addAction(tr("Toggle button"), [&] {
const bool toggle_value = !buttons_param[button_id].Get("toggle", false);
buttons_param[button_id].Set("toggle", toggle_value);
button_map[button_id]->setText(ButtonToText(buttons_param[button_id]));
});
if (buttons_param[button_id].Has("toggle")) {
context_menu.addAction(tr("Toggle button"), [&] {
const bool toggle_value =
!buttons_param[button_id].Get("toggle", false);
buttons_param[button_id].Set("toggle", toggle_value);
button_map[button_id]->setText(ButtonToText(buttons_param[button_id]));
});
}
if (buttons_param[button_id].Has("threshold")) {
context_menu.addAction(tr("Set threshold"), [&] {
const int button_threshold = static_cast<int>(

View File

@@ -122,6 +122,7 @@ void PlayerControlPreview::UpdateColors() {
colors.slider_arrow = QColor(14, 15, 18);
colors.font2 = QColor(255, 255, 255);
colors.indicator = QColor(170, 238, 255);
colors.indicator2 = QColor(100, 255, 100);
colors.deadzone = QColor(204, 136, 136);
colors.slider_button = colors.button;
}
@@ -139,6 +140,7 @@ void PlayerControlPreview::UpdateColors() {
colors.slider_arrow = QColor(65, 68, 73);
colors.font2 = QColor(0, 0, 0);
colors.indicator = QColor(0, 0, 200);
colors.indicator2 = QColor(0, 150, 0);
colors.deadzone = QColor(170, 0, 0);
colors.slider_button = QColor(153, 149, 149);
}
@@ -317,8 +319,7 @@ void PlayerControlPreview::DrawLeftController(QPainter& p, const QPointF center)
using namespace Settings::NativeAnalog;
DrawJoystick(p, center + QPointF(9, -69) + (axis_values[LStick].value * 8), 1.8f,
button_values[Settings::NativeButton::LStick]);
DrawRawJoystick(p, center + QPointF(-140, 90), axis_values[LStick].raw_value,
axis_values[LStick].properties);
DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0));
}
using namespace Settings::NativeButton;
@@ -432,8 +433,7 @@ void PlayerControlPreview::DrawRightController(QPainter& p, const QPointF center
using namespace Settings::NativeAnalog;
DrawJoystick(p, center + QPointF(-9, 11) + (axis_values[RStick].value * 8), 1.8f,
button_values[Settings::NativeButton::RStick]);
DrawRawJoystick(p, center + QPointF(140, 90), axis_values[RStick].raw_value,
axis_values[RStick].properties);
DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90));
}
using namespace Settings::NativeButton;
@@ -547,8 +547,7 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
DrawJoystick(p, center + QPointF(-65, -65) + (l_stick.value * 7), 1.62f, l_button);
DrawJoystick(p, center + QPointF(65, 12) + (r_stick.value * 7), 1.62f, r_button);
DrawRawJoystick(p, center + QPointF(-180, 90), l_stick.raw_value, l_stick.properties);
DrawRawJoystick(p, center + QPointF(180, 90), r_stick.raw_value, r_stick.properties);
DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90));
}
using namespace Settings::NativeButton;
@@ -634,8 +633,7 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
DrawJoystick(p, center + QPointF(-171, -41) + (l_stick.value * 4), 1.0f, l_button);
DrawJoystick(p, center + QPointF(171, 8) + (r_stick.value * 4), 1.0f, r_button);
DrawRawJoystick(p, center + QPointF(-50, 0), l_stick.raw_value, l_stick.properties);
DrawRawJoystick(p, center + QPointF(50, 0), r_stick.raw_value, r_stick.properties);
DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0));
}
using namespace Settings::NativeButton;
@@ -728,10 +726,7 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center)
button_values[Settings::NativeButton::LStick]);
DrawProJoystick(p, center + QPointF(51, 0), axis_values[RStick].value, 11,
button_values[Settings::NativeButton::RStick]);
DrawRawJoystick(p, center + QPointF(-50, 105), axis_values[LStick].raw_value,
axis_values[LStick].properties);
DrawRawJoystick(p, center + QPointF(50, 105), axis_values[RStick].raw_value,
axis_values[RStick].properties);
DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105));
}
using namespace Settings::NativeButton;
@@ -821,10 +816,7 @@ void PlayerControlPreview::DrawGCController(QPainter& p, const QPointF center) {
p.setBrush(colors.font);
DrawSymbol(p, center + QPointF(61, 37) + (axis_values[RStick].value * 9.5f), Symbol::C,
1.0f);
DrawRawJoystick(p, center + QPointF(-198, -125), axis_values[LStick].raw_value,
axis_values[LStick].properties);
DrawRawJoystick(p, center + QPointF(198, -125), axis_values[RStick].raw_value,
axis_values[RStick].properties);
DrawRawJoystick(p, center + QPointF(-198, -125), center + QPointF(198, -125));
}
using namespace Settings::NativeButton;
@@ -2358,8 +2350,33 @@ void PlayerControlPreview::DrawGCJoystick(QPainter& p, const QPointF center, boo
DrawCircle(p, center, 7.5f);
}
void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, const QPointF value,
const Input::AnalogProperties& properties) {
void PlayerControlPreview::DrawRawJoystick(QPainter& p, QPointF center_left, QPointF center_right) {
using namespace Settings::NativeAnalog;
if (controller_type != Settings::ControllerType::LeftJoycon) {
DrawJoystickProperties(p, center_right, axis_values[RStick].properties);
p.setPen(colors.indicator);
p.setBrush(colors.indicator);
DrawJoystickDot(p, center_right, axis_values[RStick].raw_value,
axis_values[RStick].properties);
p.setPen(colors.indicator2);
p.setBrush(colors.indicator2);
DrawJoystickDot(p, center_right, axis_values[RStick].value, axis_values[RStick].properties);
}
if (controller_type != Settings::ControllerType::RightJoycon) {
DrawJoystickProperties(p, center_left, axis_values[LStick].properties);
p.setPen(colors.indicator);
p.setBrush(colors.indicator);
DrawJoystickDot(p, center_left, axis_values[LStick].raw_value,
axis_values[LStick].properties);
p.setPen(colors.indicator2);
p.setBrush(colors.indicator2);
DrawJoystickDot(p, center_left, axis_values[LStick].value, axis_values[LStick].properties);
}
}
void PlayerControlPreview::DrawJoystickProperties(QPainter& p, const QPointF center,
const Input::AnalogProperties& properties) {
constexpr float size = 45.0f;
const float range = size * properties.range;
const float deadzone = size * properties.deadzone;
@@ -2376,10 +2393,14 @@ void PlayerControlPreview::DrawRawJoystick(QPainter& p, const QPointF center, co
pen.setColor(colors.deadzone);
p.setPen(pen);
DrawCircle(p, center, deadzone);
}
void PlayerControlPreview::DrawJoystickDot(QPainter& p, const QPointF center, const QPointF value,
const Input::AnalogProperties& properties) {
constexpr float size = 45.0f;
const float range = size * properties.range;
// Dot pointer
p.setPen(colors.indicator);
p.setBrush(colors.indicator);
DrawCircle(p, center + (value * range), 2);
}

View File

@@ -90,6 +90,7 @@ private:
QColor highlight2{};
QColor transparent{};
QColor indicator{};
QColor indicator2{};
QColor led_on{};
QColor led_off{};
QColor slider{};
@@ -139,7 +140,10 @@ private:
// Draw joystick functions
void DrawJoystick(QPainter& p, QPointF center, float size, bool pressed);
void DrawJoystickSideview(QPainter& p, QPointF center, float angle, float size, bool pressed);
void DrawRawJoystick(QPainter& p, QPointF center, QPointF value,
void DrawRawJoystick(QPainter& p, QPointF center_left, QPointF center_right);
void DrawJoystickProperties(QPainter& p, QPointF center,
const Input::AnalogProperties& properties);
void DrawJoystickDot(QPainter& p, QPointF center, QPointF value,
const Input::AnalogProperties& properties);
void DrawProJoystick(QPainter& p, QPointF center, QPointF offset, float scalar, bool pressed);
void DrawGCJoystick(QPainter& p, QPointF center, bool pressed);

View File

@@ -2814,8 +2814,6 @@ void GMainWindow::OnToggleFilterBar() {
}
void GMainWindow::OnCaptureScreenshot() {
OnPauseGame();
const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
const auto screenshot_path =
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir));
@@ -2827,23 +2825,22 @@ void GMainWindow::OnCaptureScreenshot() {
.arg(date);
if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
OnStartGame();
return;
}
#ifdef _WIN32
if (UISettings::values.enable_screenshot_save_as) {
OnPauseGame();
filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
tr("PNG Image (*.png)"));
OnStartGame();
if (filename.isEmpty()) {
OnStartGame();
return;
}
}
#endif
render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(),
filename);
OnStartGame();
}
// TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant

View File

@@ -1,5 +1,6 @@
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
# Credits to Samantas5855 and others for this function.
function(create_resource file output filename)
# Read hex data from file
file(READ ${file} filedata HEX)

View File

@@ -122,6 +122,10 @@ void EmuWindow_SDL2::OnResize() {
UpdateCurrentFramebufferLayout(width, height);
}
void EmuWindow_SDL2::ShowCursor(bool show_cursor) {
SDL_ShowCursor(show_cursor ? SDL_ENABLE : SDL_DISABLE);
}
void EmuWindow_SDL2::Fullscreen() {
switch (Settings::values.fullscreen_mode.GetValue()) {
case Settings::FullscreenMode::Exclusive:
@@ -228,6 +232,7 @@ void EmuWindow_SDL2::WaitEvent() {
}
}
// Credits to Samantas5855 and others for this function.
void EmuWindow_SDL2::SetWindowIcon() {
SDL_RWops* const yuzu_icon_stream = SDL_RWFromConstMem((void*)yuzu_icon, yuzu_icon_size);
if (yuzu_icon_stream == nullptr) {

View File

@@ -67,6 +67,9 @@ protected:
/// Called by WaitEvent when any event that may cause the window to be resized occurs
void OnResize();
/// Called when users want to hide the mouse cursor
void ShowCursor(bool show_cursor);
/// Called when user passes the fullscreen parameter flag
void Fullscreen();

View File

@@ -111,6 +111,7 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(InputCommon::InputSubsystem* input_subsyste
if (fullscreen) {
Fullscreen();
ShowCursor(false);
}
window_context = SDL_GL_CreateContext(render_window);

View File

@@ -45,6 +45,7 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(InputCommon::InputSubsystem* input_subsyste
if (fullscreen) {
Fullscreen();
ShowCursor(false);
}
switch (wm.subsystem) {