core: Move PageTable struct into Common.

Merge pull request #2244 from bunnei/gpu-mem-refactor
video_core: Refactor to use MemoryManager interface for all memory access.
2019-03-16 22:05:40 -04:00 · 2019-03-16 21:59:45 -04:00 · 2019-03-16 21:59:30 -04:00 · 2019-03-16 21:58:59 -04:00 · 2019-03-16 00:43:29 -04:00 · 2019-03-16 00:43:09 -04:00
67 changed files with 1446 additions and 1132 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -163,12 +163,6 @@ else()
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 endif()

-# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
-# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
-if (CMAKE_COMPILER_IS_GNUCC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
-endif()
-
 # Set file offset size to 64 bits.
 #
 # On modern Unixes, this is typically already the case. The lone exception is
@@ -185,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
 # System imported libraries
 # ======================

-find_package(Boost 1.63.0 QUIET)
+find_package(Boost 1.64.0 QUIET)
 if (NOT Boost_FOUND)
-    message(STATUS "Boost 1.63.0 or newer not found, falling back to externals")
+    message(STATUS "Boost 1.64.0 or newer not found, falling back to externals")

    set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
    set(Boost_NO_SYSTEM_PATHS OFF)
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -92,10 +92,14 @@ add_library(common STATIC
    logging/text_formatter.cpp
    logging/text_formatter.h
    math_util.h
+    memory_hook.cpp
+    memory_hook.h
    microprofile.cpp
    microprofile.h
    microprofileui.h
    misc.cpp
+    page_table.cpp
+    page_table.h
    param_package.cpp
    param_package.h
    quaternion.h
@@ -114,6 +118,8 @@ add_library(common STATIC
    threadsafe_queue.h
    timer.cpp
    timer.h
+    uint128.cpp
+    uint128.h
    vector_math.h
    web_result.h
 )
--- a/src/common/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "core/memory_hook.h"
+#include "common/memory_hook.h"

-namespace Memory {
+namespace Common {

 MemoryHook::~MemoryHook() = default;

-} // namespace Memory
+} // namespace Common
--- a/src/common/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@

 #include "common/common_types.h"

-namespace Memory {
+namespace Common {

 /**
 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };

 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+}
+
+} // namespace Common
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -437,8 +437,6 @@ add_library(core STATIC
    loader/xci.h
    memory.cpp
    memory.h
-    memory_hook.cpp
-    memory_hook.h
    memory_setup.h
    perf_stats.cpp
    perf_stats.h
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
        return std::max(parent.core_timing.GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return parent.core_timing.GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
    }

    ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,7 +12,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"

-namespace Memory {
+namespace Common {
 struct PageTable;
 }

@@ -70,7 +70,7 @@ private:
    Timing::CoreTiming& core_timing;
    DynarmicExclusiveMonitor& exclusive_monitor;

-    Memory::PageTable* current_page_table = nullptr;
+    Common::PageTable* current_page_table = nullptr;
 };

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"

 namespace Core::Timing {

@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }

+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.

 inline s64 msToCycles(int ms) {
    // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
    return cycles * 1000 / BASE_CLOCK_RATE;
 }

+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -31,7 +31,7 @@ namespace {
 */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
    // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
+    Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);

    // Initialize new "main" thread
    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -96,7 +96,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        auto* const thread_owner_process = current_thread->GetOwnerProcess();
        if (previous_process != thread_owner_process) {
            system.Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
+            Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
        }

        cpu_core.LoadContext(new_thread->GetContext());
@@ -199,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);

    // Yield this thread -- sleep for zero time and force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 }

 void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -215,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
    ASSERT(priority < THREADPRIO_COUNT);

    // Sleep for zero time to be able to force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);

    Thread* suggested_thread = nullptr;

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1284,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {

 /// Called when a thread exits
 static void ExitThread() {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+    auto& system = Core::System::GetInstance();

-    ExitCurrentThread();
-    Core::System::GetInstance().PrepareReschedule();
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->Stop();
+    system.CurrentScheduler().RemoveThread(current_thread);
+    system.PrepareReschedule();
 }

 /// Sleep the current thread
@@ -1300,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
        YieldAndWaitForLoadBalancing = -2,
    };

+    auto& system = Core::System::GetInstance();
+    auto& scheduler = system.CurrentScheduler();
+    auto* const current_thread = scheduler.GetCurrentThread();
+
    if (nanoseconds <= 0) {
-        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
        switch (static_cast<SleepType>(nanoseconds)) {
        case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithoutLoadBalancing(current_thread);
            break;
        case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithLoadBalancing(current_thread);
            break;
        case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            scheduler.YieldAndWaitForLoadBalancing(current_thread);
            break;
        default:
            UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
        }
    } else {
-        // Sleep current thread and check for next thread to schedule
-        WaitCurrentThread_Sleep();
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        GetCurrentThread()->WakeAfterDelay(nanoseconds);
+        current_thread->Sleep(nanoseconds);
    }

    // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
-        Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+        system.CpuCore(i).PrepareReschedule();
+    }
 }

 /// Wait process wide key atomic
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -68,11 +68,6 @@ void Thread::Stop() {
    owner_process->FreeTLSSlot(tls_address);
 }

-void WaitCurrentThread_Sleep() {
-    Thread* thread = GetCurrentThread();
-    thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
 void ExitCurrentThread() {
    Thread* thread = GetCurrentThread();
    thread->Stop();
@@ -391,6 +386,14 @@ void Thread::SetActivity(ThreadActivity value) {
    }
 }

+void Thread::Sleep(s64 nanoseconds) {
+    // Sleep current thread and check for next thread to schedule
+    SetStatus(ThreadStatus::WaitSleep);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    WakeAfterDelay(nanoseconds);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:

    void SetActivity(ThreadActivity value);

+    /// Sleeps this thread for the given amount of nanoseconds.
+    void Sleep(s64 nanoseconds);
+
 private:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;
@@ -460,14 +463,4 @@ private:
 */
 Thread* GetCurrentThread();

-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,13 +7,13 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"

 namespace Kernel {
@@ -177,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {

 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                   MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
    // This is the appropriately sized VMA that will turn into our allocation.
    CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
    VirtualMemoryArea& final_vma = vma_handle->second;
@@ -624,7 +624,7 @@ void VMManager::ClearPageTable() {
    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
    page_table.special_regions.clear();
    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }

 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"

 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
    // Settings for type = MMIO
    /// Physical address of the register area this VMA maps to.
    PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;

    /// Tests if this area can be merged to the right with `next`.
    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
     * @param mmio_handler The handler that will implement read and write for this MMIO region.
     */
    ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);

    /// Unmaps a range of addresses, splitting VMAs as necessary.
    ResultCode UnmapRange(VAddr target, u64 size);
@@ -509,7 +510,7 @@ public:

    /// Each VMManager has its own page table, which is set as the main one when the owning process
    /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};

 private:
    using VMAIter = VMAMap::iterator;
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
@@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    auto& gpu = system_instance.GPU();
    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
    ASSERT(cpu_addr);
-    gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+    gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);

    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/page_table.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -18,13 +19,14 @@
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"

 namespace Memory {

-static PageTable* current_page_table = nullptr;
+static Common::PageTable* current_page_table = nullptr;

-void SetCurrentPageTable(PageTable* page_table) {
+void SetCurrentPageTable(Common::PageTable* page_table) {
    current_page_table = page_table;

    auto& system = Core::System::GetInstance();
@@ -36,39 +38,20 @@ void SetCurrentPageTable(PageTable* page_table) {
    }
 }

-PageTable* GetCurrentPageTable() {
+Common::PageTable* GetCurrentPageTable() {
    return current_page_table;
 }

-PageTable::PageTable() = default;
-
-PageTable::PageTable(std::size_t address_space_width_in_bits) {
-    Resize(address_space_width_in_bits);
-}
-
-PageTable::~PageTable() = default;
-
-void PageTable::Resize(std::size_t address_space_width_in_bits) {
-    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
-
-    pointers.resize(num_page_table_entries);
-    attributes.resize(num_page_table_entries);
-
-    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
-    // vector size is subsequently decreased (via resize), the vector might not automatically
-    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
-    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
-
-    pointers.shrink_to_fit();
-    attributes.shrink_to_fit();
-}
-
-static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
+static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
+                     Common::PageType type) {
    LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
              (base + size) * PAGE_SIZE);

-    RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
-                                 FlushMode::FlushAndInvalidate);
+    // During boot, current_page_table might not be set yet, in which case we need not flush
+    if (current_page_table) {
+        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+                                                                   size * PAGE_SIZE);
+    }

    VAddr end = base + size;
    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -88,41 +71,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
    }
 }

-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 }

-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);

    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

-void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);

    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
    page_table.special_regions.erase(interval);
 }

-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook) {
    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook) {
    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.subtract(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

 /**
@@ -171,19 +160,19 @@ T Read(const VAddr vaddr) {
        return value;
    }

-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
        LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
        return 0;
-    case PageType::Memory:
+    case Common::PageType::Memory:
        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
        break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
        T value;
-        std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+        std::memcpy(&value, host_ptr, sizeof(T));
        return value;
    }
    default:
@@ -201,18 +190,19 @@ void Write(const VAddr vaddr, const T data) {
        return;
    }

-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
        LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                  static_cast<u32>(data), vaddr);
        return;
-    case PageType::Memory:
+    case Common::PageType::Memory:
        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
        break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+        std::memcpy(host_ptr, &data, sizeof(T));
        break;
    }
    default:
@@ -227,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
    if (page_pointer)
        return true;

-    if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
+    if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
        return true;

-    if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
+    if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
        return false;

    return false;
@@ -250,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
        return page_pointer + (vaddr & PAGE_MASK);
    }

-    if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
+    if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
+        Common::PageType::RasterizerCachedMemory) {
        return GetPointerFromVMA(vaddr);
    }

@@ -284,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {

    u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
    for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];

        if (cached) {
            // Switch page type to cached if now cached
            switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                // It is not necessary for a process to have this region mapped into its address
                // space, for example, a system module need not have a VRAM mapping.
                break;
-            case PageType::Memory:
-                page_type = PageType::RasterizerCachedMemory;
+            case Common::PageType::Memory:
+                page_type = Common::PageType::RasterizerCachedMemory;
                current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                break;
-            case PageType::RasterizerCachedMemory:
+            case Common::PageType::RasterizerCachedMemory:
                // There can be more than one GPU region mapped per CPU region, so it's common that
                // this area is already marked as cached.
                break;
@@ -307,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
        } else {
            // Switch page type to uncached if now uncached
            switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                // It is not necessary for a process to have this region mapped into its address
                // space, for example, a system module need not have a VRAM mapping.
                break;
-            case PageType::Memory:
+            case Common::PageType::Memory:
                // There can be more than one GPU region mapped per CPU region, so it's common that
                // this area is already unmarked as cached.
                break;
-            case PageType::RasterizerCachedMemory: {
+            case Common::PageType::RasterizerCachedMemory: {
                u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                if (pointer == nullptr) {
                    // It's possible that this function has been called while updating the pagetable
                    // after unmapping a VMA. In that case the underlying VMA will no longer exist,
                    // and we should just leave the pagetable entry blank.
-                    page_type = PageType::Unmapped;
+                    page_type = Common::PageType::Unmapped;
                } else {
-                    page_type = PageType::Memory;
+                    page_type = Common::PageType::Memory;
                    current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
                }
                break;
@@ -335,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
    }
 }

-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
-    auto& system_instance = Core::System::GetInstance();
-
-    // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
-    // null here
-    if (!system_instance.IsPoweredOn()) {
-        return;
-    }
-
-    const VAddr end = start + size;
-
-    const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
-        if (start >= region_end || end <= region_start) {
-            // No overlap with region
-            return;
-        }
-
-        const VAddr overlap_start = std::max(start, region_start);
-        const VAddr overlap_end = std::min(end, region_end);
-        const VAddr overlap_size = overlap_end - overlap_start;
-
-        auto& gpu = system_instance.GPU();
-        switch (mode) {
-        case FlushMode::Flush:
-            gpu.FlushRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            gpu.InvalidateRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
-            break;
-        }
-    };
-
-    const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
-    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
-    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
 u8 Read8(const VAddr addr) {
    return Read<u8>(addr);
 }
@@ -406,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, src_addr, size);
            std::memset(dest_buffer, 0, copy_amount);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
            std::memcpy(dest_buffer, src_ptr, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(dest_buffer, host_ptr, copy_amount);
            break;
        }
        default:
@@ -470,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, dest_addr, size);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
            std::memcpy(dest_ptr, src_buffer, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(host_ptr, src_buffer, copy_amount);
            break;
        }
        default:
@@ -516,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, dest_addr, size);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
            std::memset(dest_ptr, 0, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memset(host_ptr, 0, copy_amount);
            break;
        }
        default:
@@ -558,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, src_addr, size);
            ZeroBlock(process, dest_addr, copy_amount);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);
            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
            WriteBlock(process, dest_addr, src_ptr, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            WriteBlock(process, dest_addr, host_ptr, copy_amount);
            break;
        }
        default:
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -10,7 +10,10 @@
 #include <vector>
 #include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}

 namespace Kernel {
 class Process;
@@ -26,71 +29,6 @@ constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

-enum class PageType : u8 {
-    /// Page is unmapped and should cause an access error.
-    Unmapped,
-    /// Page is mapped to regular memory. This is the only type you can get pointers to.
-    Memory,
-    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
-    /// invalidation
-    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
-};
-
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works.
- */
-struct PageTable {
-    explicit PageTable();
-    explicit PageTable(std::size_t address_space_width_in_bits);
-    ~PageTable();
-
-    /**
-     * Resizes the page table to be able to accomodate enough pages within
-     * a given address space.
-     *
-     * @param address_space_width_in_bits The address size width in bits.
-     */
-    void Resize(std::size_t address_space_width_in_bits);
-
-    /**
-     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
-     */
-    std::vector<u8*> pointers;
-
-    /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
-     * of type `Special`.
-     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
-
-    /**
-     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
-     * the corresponding entry in `pointers` MUST be set to null.
-     */
-    std::vector<PageType> attributes;
-};
-
 /// Virtual user-space memory regions
 enum : VAddr {
    /// Read-only page containing kernel and system configuration values.
@@ -116,8 +54,8 @@ enum : VAddr {
 };

 /// Currently active page table
-void SetCurrentPageTable(PageTable* page_table);
-PageTable* GetCurrentPageTable();
+void SetCurrentPageTable(Common::PageTable* page_table);
+Common::PageTable* GetCurrentPageTable();

 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +99,4 @@ enum class FlushMode {
 */
 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);

-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
 } // namespace Memory
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
 #pragma once

 #include "common/common_types.h"
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}

 namespace Memory {

@@ -17,7 +21,7 @@ namespace Memory {
 * @param size The amount of bytes to map. Must be page-aligned.
 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
 */
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);

 /**
 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
 * @param size The amount of bytes to map. Must be page-aligned.
 * @param mmio_handler The handler that backs the mapping.
 */
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler);

-void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);

-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook);
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook);

 } // namespace Memory
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@

 #include <algorithm>

+#include "common/page_table.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -22,7 +23,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
    page_table->special_regions.clear();
    std::fill(page_table->attributes.begin(), page_table->attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);

    Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
    Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
 #include <vector>

 #include "common/common_types.h"
+#include "common/memory_hook.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/memory_hook.h"

-namespace Memory {
+namespace Common {
 struct PageTable;
 }

@@ -58,7 +58,7 @@ public:

 private:
    friend struct TestMemory;
-    struct TestMemory final : Memory::MemoryHook {
+    struct TestMemory final : Common::MemoryHook {
        explicit TestMemory(TestEnvironment* env_) : env(env_) {}
        TestEnvironment* env;

@@ -86,7 +86,7 @@ private:
    bool mutable_memory;
    std::shared_ptr<TestMemory> test_memory;
    std::vector<WriteRecord> write_records;
-    Memory::PageTable* page_table = nullptr;
+    Common::PageTable* page_table = nullptr;
    Kernel::KernelCore kernel;
 };

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -123,6 +123,8 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_sampler_cache.cpp
+        renderer_vulkan/vk_sampler_cache.h
        renderer_vulkan/vk_scheduler.cpp
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_stream_buffer.cpp
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
    }

    // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
    command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+                                  command_list_header.size * sizeof(u32));

    for (const CommandHeader& command_header : command_headers) {

--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -9,6 +9,7 @@
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 namespace Tegra::Engines {

@@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);

-    const GPUVAddr address = regs.dest.Address();
-    const auto dest_address =
-        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-    ASSERT_MSG(dest_address, "Invalid GPU address");
-
    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
-    // a dirty surface that will have to be written back to memory.
-    Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
+    // We do this before actually writing the new data because the destination address might
+    // contain a dirty surface that will have to be written back to memory.
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write32(address, data);

-    Memory::Write32(*dest_address, data);
    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }

 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
-    ASSERT_MSG(address, "Invalid GPU address");

    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
            // Write the current query sequence to the sequence address.
            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
            // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write32(sequence_address, sequence);
        } else {
            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
            // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
            query_result.timestamp = system.CoreTiming().GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
        break;
@@ -393,10 +391,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);

-    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
-    ASSERT_MSG(address, "Invalid GPU address");
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+
+    u8* ptr{memory_manager.GetPointer(address)};
+    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
+    memory_manager.Write32(address, value);

-    Memory::Write32(*address, value);
    dirty_flags.OnMemoryWrite();

    // Increment the current buffer position.
@@ -404,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }

 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_base_address = regs.tic.TICAddress();
-
-    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
-    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};

    Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));

    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }

 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
-    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
-    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};

    Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
    return tsc_entry;
 }

@@ -455,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {

-        const auto address = memory_manager.GpuToCpuAddress(current_texture);
-        ASSERT_MSG(address, "Invalid GPU address");
-
-        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};

        Texture::FullTextureInfo tex_info{};
        // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-
-    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};

    Texture::FullTextureInfo tex_info{};
    tex_info.index = static_cast<u32>(offset);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -9,6 +9,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 #include "video_core/textures/decoders.h"

 namespace Tegra::Engines {
@@ -42,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
    const GPUVAddr source = regs.src_address.Address();
    const GPUVAddr dest = regs.dst_address.Address();

-    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
-    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
-    ASSERT_MSG(source_cpu, "Invalid source GPU address");
-    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
-
    // TODO(Subv): Perform more research and implement all features of this engine.
    ASSERT(regs.exec.enable_swizzle == 0);
    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -69,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
        // y_count).
        if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
            return;
        }

@@ -78,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
        // rectangle. There is no need to manually flush/invalidate the regions because
        // CopyBlock does that for us.
        for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = *source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            const GPUVAddr source_line = source + line * regs.src_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
        }
        return;
    }
@@ -89,15 +85,18 @@ void MaxwellDMA::HandleCopy() {

    const std::size_t copy_size = regs.x_count * regs.y_count;

+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
+
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);

        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
    };

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -110,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
                           copy_size * src_bytes_per_pixel);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
-                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
+                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                  regs.src_params.pos_y);
    } else {
        ASSERT(regs.dst_params.size_z == 1);
@@ -124,7 +123,7 @@ void MaxwellDMA::HandleCopy() {

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
    }
 }

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
    const auto op =
        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
    if (op == GpuSemaphoreOperation::WriteLong) {
-        auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
        struct Block {
            u32 sequence;
            u32 zeros = 0;
@@ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
-        Memory::WriteBlock(*address, &block, sizeof(block));
+        memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
    } else {
-        const auto address =
-            memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-        const u32 word = Memory::Read32(*address);
+        const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
            (op == GpuSemaphoreOperation::AcquireGequal &&
             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
 }

 void GPU::ProcessSemaphoreRelease() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    Memory::Write32(*address, regs.semaphore_release);
+    memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
 }

 void GPU::ProcessSemaphoreAcquire() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    const u32 word = Memory::Read32(*address);
+    const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
    const auto value = regs.semaphore_acquire;
    if (word != value) {
        regs.acquire_active = true;
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -11,6 +11,11 @@
 #include "video_core/dma_pusher.h"
 #include "video_core/memory_manager.h"

+using CacheAddr = std::uintptr_t;
+inline CacheAddr ToCacheAddr(const void* host_ptr) {
+    return reinterpret_cast<CacheAddr>(host_ptr);
+}
+
 namespace Core {
 class System;
 }
@@ -209,13 +214,13 @@ public:
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;

 private:
    void ProcessBindMethod(const MethodCall& method_call);
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
    gpu_thread.SwapBuffers(std::move(framebuffer));
 }

-void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
    gpu_thread.FlushRegion(addr, size);
 }

-void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
    gpu_thread.InvalidateRegion(addr, size);
 }

-void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
    gpu_thread.FlushAndInvalidateRegion(addr, size);
 }

--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -26,9 +26,9 @@ public:
    void PushGPUEntries(Tegra::CommandList&& entries) override;
    void SwapBuffers(
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;

 private:
    GPUThread::ThreadManager gpu_thread;
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
    renderer.SwapBuffers(std::move(framebuffer));
 }

-void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
    renderer.Rasterizer().FlushRegion(addr, size);
 }

-void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
    renderer.Rasterizer().InvalidateRegion(addr, size);
 }

-void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
    renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
 }

--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -21,9 +21,9 @@ public:
    void PushGPUEntries(Tegra::CommandList&& entries) override;
    void SwapBuffers(
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
 };

 } // namespace VideoCommon
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,6 @@
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/frontend/scope_acquire_window_context.h"
-#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@

 namespace VideoCommon::GPUThread {

-/// Executes a single GPU thread command
-static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
-                           Tegra::DmaPusher& dma_pusher) {
-    if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
-        dma_pusher.Push(std::move(submit_list->entries));
-        dma_pusher.DispatchCalls();
-    } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
-        renderer.SwapBuffers(data->framebuffer);
-    } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
-        renderer.Rasterizer().FlushRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
-    } else {
-        UNREACHABLE();
-    }
-}
-
 /// Runs the GPU thread
 static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
                      SynchState& state) {
-
    MicroProfileOnThreadCreate("GpuThread");

-    auto WaitForWakeup = [&]() {
-        std::unique_lock<std::mutex> lock{state.signal_mutex};
-        state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
-    };
-
    // Wait for first GPU command before acquiring the window context
-    WaitForWakeup();
+    state.WaitForCommands();

    // If emulation was stopped during disk shader loading, abort before trying to acquire context
    if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p

    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};

+    CommandDataContainer next;
    while (state.is_running) {
-        if (!state.is_running) {
-            return;
+        state.WaitForCommands();
+        while (!state.queue.Empty()) {
+            state.queue.Pop(next);
+            if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
+                dma_pusher.Push(std::move(submit_list->entries));
+                dma_pusher.DispatchCalls();
+            } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
+                state.DecrementFramesCounter();
+                renderer.SwapBuffers(std::move(data->framebuffer));
+            } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
+                renderer.Rasterizer().FlushRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
+                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+                return;
+            } else {
+                UNREACHABLE();
+            }
        }
-
-        {
-            // Thread has been woken up, so make the previous write queue the next read queue
-            std::lock_guard<std::mutex> lock{state.signal_mutex};
-            std::swap(state.push_queue, state.pop_queue);
-        }
-
-        // Execute all of the GPU commands
-        while (!state.pop_queue->empty()) {
-            ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
-            state.pop_queue->pop();
-        }
-
-        state.UpdateIdleState();
-
-        // Signal that the GPU thread has finished processing commands
-        if (state.is_idle) {
-            state.idle_condition.notify_one();
-        }
-
-        // Wait for CPU thread to send more GPU commands
-        WaitForWakeup();
    }
 }

 ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
-                                                         std::ref(dma_pusher), std::ref(state)},
-      thread_id{thread.get_id()} {}
+                                                         std::ref(dma_pusher), std::ref(state)} {}

 ThreadManager::~ThreadManager() {
-    {
-        // Notify GPU thread that a shutdown is pending
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
-        state.is_running = false;
-    }
-
-    state.signal_condition.notify_one();
+    // Notify GPU thread that a shutdown is pending
+    PushCommand(EndProcessingCommand());
    thread.join();
 }

 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    PushCommand(SubmitListCommand(std::move(entries)), false, false);
+    PushCommand(SubmitListCommand(std::move(entries)));
 }

 void ThreadManager::SwapBuffers(
    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+    state.IncrementFramesCounter();
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+    state.WaitForFrames();
 }

-void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    // Block the CPU when using accurate emulation
-    PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
+    PushCommand(FlushRegionCommand(addr, size));
 }

-void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    PushCommand(InvalidateRegionCommand(addr, size), true, true);
+void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
+    if (state.queue.Empty()) {
+        // It's quicker to invalidate a single region on the CPU if the queue is already empty
+        renderer.Rasterizer().InvalidateRegion(addr, size);
+    } else {
+        PushCommand(InvalidateRegionCommand(addr, size));
+    }
 }

-void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
    InvalidateRegion(addr, size);
 }

-void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
-    {
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
-
-        if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
-            // Execute the command synchronously on the current thread
-            ExecuteCommand(&command_data, renderer, dma_pusher);
-            return;
-        }
-
-        // Push the command to the GPU thread
-        state.UpdateIdleState();
-        state.push_queue->emplace(command_data);
-    }
-
-    // Signal the GPU thread that commands are pending
-    state.signal_condition.notify_one();
-
-    if (wait_for_idle) {
-        // Wait for the GPU to be idle (all commands to be executed)
-        std::unique_lock<std::mutex> lock{state.idle_mutex};
-        state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
-    }
+void ThreadManager::PushCommand(CommandData&& command_data) {
+    state.queue.Push(CommandDataContainer(std::move(command_data)));
+    state.SignalCommands();
 }

 } // namespace VideoCommon::GPUThread
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -13,6 +13,9 @@
 #include <thread>
 #include <variant>

+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
+
 namespace Tegra {
 struct FramebufferConfig;
 class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;

 namespace VideoCommon::GPUThread {

+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
+
 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
    explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
        : framebuffer{std::move(framebuffer)} {}

-    std::optional<const Tegra::FramebufferConfig> framebuffer;
+    std::optional<Tegra::FramebufferConfig> framebuffer;
 };

 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
-    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}

-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };

 /// Command to signal to the GPU thread to invalidate a region
 struct InvalidateRegionCommand final {
-    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}

-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };

 /// Command to signal to the GPU thread to flush and invalidate a region
 struct FlushAndInvalidateRegionCommand final {
-    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
        : addr{addr}, size{size} {}

-    const VAddr addr;
-    const u64 size;
+    CacheAddr addr;
+    u64 size;
 };

-using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+using CommandData =
+    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+struct CommandDataContainer {
+    CommandDataContainer() = default;
+
+    CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
+
+    CommandDataContainer& operator=(const CommandDataContainer& t) {
+        data = std::move(t.data);
+        return *this;
+    }
+
+    CommandData data;
+};

 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    std::atomic<bool> is_running{true};
-    std::atomic<bool> is_idle{true};
-    std::condition_variable signal_condition;
-    std::mutex signal_mutex;
-    std::condition_variable idle_condition;
-    std::mutex idle_mutex;
+    std::atomic_bool is_running{true};
+    std::atomic_int queued_frame_count{};
+    std::mutex frames_mutex;
+    std::mutex commands_mutex;
+    std::condition_variable commands_condition;
+    std::condition_variable frames_condition;

-    // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
-    // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
-    // empty. This allows for efficient thread-safe access, as it does not require any copies.
-
-    using CommandQueue = std::queue<CommandData>;
-    std::array<CommandQueue, 2> command_queues;
-    CommandQueue* push_queue{&command_queues[0]};
-    CommandQueue* pop_queue{&command_queues[1]};
-
-    void UpdateIdleState() {
-        std::lock_guard<std::mutex> lock{idle_mutex};
-        is_idle = command_queues[0].empty() && command_queues[1].empty();
+    void IncrementFramesCounter() {
+        std::lock_guard<std::mutex> lock{frames_mutex};
+        ++queued_frame_count;
    }
+
+    void DecrementFramesCounter() {
+        {
+            std::lock_guard<std::mutex> lock{frames_mutex};
+            --queued_frame_count;
+
+            if (queued_frame_count) {
+                return;
+            }
+        }
+        frames_condition.notify_one();
+    }
+
+    void WaitForFrames() {
+        {
+            std::lock_guard<std::mutex> lock{frames_mutex};
+            if (!queued_frame_count) {
+                return;
+            }
+        }
+
+        // Wait for the GPU to be idle (all commands to be executed)
+        {
+            std::unique_lock<std::mutex> lock{frames_mutex};
+            frames_condition.wait(lock, [this] { return !queued_frame_count; });
+        }
+    }
+
+    void SignalCommands() {
+        {
+            std::unique_lock<std::mutex> lock{commands_mutex};
+            if (queue.Empty()) {
+                return;
+            }
+        }
+
+        commands_condition.notify_one();
+    }
+
+    void WaitForCommands() {
+        std::unique_lock<std::mutex> lock{commands_mutex};
+        commands_condition.wait(lock, [this] { return !queue.Empty(); });
+    }
+
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    CommandQueue queue;
 };

 /// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(VAddr addr, u64 size);
+    void FlushRegion(CacheAddr addr, u64 size);

    /// Notify rasterizer that any caches of the specified region should be invalidated
-    void InvalidateRegion(VAddr addr, u64 size);
+    void InvalidateRegion(CacheAddr addr, u64 size);

    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(VAddr addr, u64 size);
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);

 private:
    /// Pushes a command to be executed by the GPU thread
-    void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
-
-    /// Returns true if this is called by the GPU thread
-    bool IsGpuThread() const {
-        return std::this_thread::get_id() == thread_id;
-    }
+    void PushCommand(CommandData&& command_data);

 private:
    SynchState state;
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,6 +5,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"

 namespace Tegra {
@@ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
    return base_addr + (gpu_addr & PAGE_MASK);
 }

-std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
-    std::vector<GPUVAddr> results;
-    for (const auto& region : mapped_regions) {
-        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            const u64 offset{cpu_addr - region.cpu_addr};
-            results.push_back(region.gpu_addr + offset);
-        }
-    }
-    return results;
+u8 MemoryManager::Read8(GPUVAddr addr) {
+    return Memory::Read8(*GpuToCpuAddress(addr));
+}
+
+u16 MemoryManager::Read16(GPUVAddr addr) {
+    return Memory::Read16(*GpuToCpuAddress(addr));
+}
+
+u32 MemoryManager::Read32(GPUVAddr addr) {
+    return Memory::Read32(*GpuToCpuAddress(addr));
+}
+
+u64 MemoryManager::Read64(GPUVAddr addr) {
+    return Memory::Read64(*GpuToCpuAddress(addr));
+}
+
+void MemoryManager::Write8(GPUVAddr addr, u8 data) {
+    Memory::Write8(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write16(GPUVAddr addr, u16 data) {
+    Memory::Write16(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write32(GPUVAddr addr, u32 data) {
+    Memory::Write32(*GpuToCpuAddress(addr), data);
+}
+
+void MemoryManager::Write64(GPUVAddr addr, u64 data) {
+    Memory::Write64(*GpuToCpuAddress(addr), data);
+}
+
+u8* MemoryManager::GetPointer(GPUVAddr addr) {
+    return Memory::GetPointer(*GpuToCpuAddress(addr));
+}
+
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
+    std::memcpy(dest_buffer, GetPointer(src_addr), size);
+}
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), src_buffer, size);
+}
+
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
+    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
 }

 VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -27,12 +27,27 @@ public:
    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
-    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

    static constexpr u64 PAGE_BITS = 16;
    static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

+    u8 Read8(GPUVAddr addr);
+    u16 Read16(GPUVAddr addr);
+    u32 Read32(GPUVAddr addr);
+    u64 Read64(GPUVAddr addr);
+
+    void Write8(GPUVAddr addr, u8 data);
+    void Write16(GPUVAddr addr, u16 data);
+    void Write32(GPUVAddr addr, u32 data);
+    void Write64(GPUVAddr addr, u64 data);
+
+    u8* GetPointer(GPUVAddr vaddr);
+
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+
 private:
    enum class PageStatus : u64 {
        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "core/memory.h"
 #include "video_core/morton.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
 using Surface::GetBytesPerPixel;
 using Surface::PixelFormat;

-using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
+using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
 using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;

 template <bool morton_to_linear, PixelFormat format>
 static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
-                       u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
+                       u32 tile_width_spacing, u8* buffer, u8* addr) {
    constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);

    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
                                         stride, height, depth, block_height, block_depth,
                                         tile_width_spacing);
    } else {
-        Tegra::Texture::CopySwizzledData(
-            (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
-            depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
-            block_height, block_depth, tile_width_spacing);
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
+                                         bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
+                                         block_height, block_depth, tile_width_spacing);
    }
 }

 static constexpr ConversionArray morton_to_linear_fns = {
-    // clang-format off
-        MortonCopy<true, PixelFormat::ABGR8U>,
-        MortonCopy<true, PixelFormat::ABGR8S>,
-        MortonCopy<true, PixelFormat::ABGR8UI>,
-        MortonCopy<true, PixelFormat::B5G6R5U>,
-        MortonCopy<true, PixelFormat::A2B10G10R10U>,
-        MortonCopy<true, PixelFormat::A1B5G5R5U>,
-        MortonCopy<true, PixelFormat::R8U>,
-        MortonCopy<true, PixelFormat::R8UI>,
-        MortonCopy<true, PixelFormat::RGBA16F>,
-        MortonCopy<true, PixelFormat::RGBA16U>,
-        MortonCopy<true, PixelFormat::RGBA16UI>,
-        MortonCopy<true, PixelFormat::R11FG11FB10F>,
-        MortonCopy<true, PixelFormat::RGBA32UI>,
-        MortonCopy<true, PixelFormat::DXT1>,
-        MortonCopy<true, PixelFormat::DXT23>,
-        MortonCopy<true, PixelFormat::DXT45>,
-        MortonCopy<true, PixelFormat::DXN1>,
-        MortonCopy<true, PixelFormat::DXN2UNORM>,
-        MortonCopy<true, PixelFormat::DXN2SNORM>,
-        MortonCopy<true, PixelFormat::BC7U>,
-        MortonCopy<true, PixelFormat::BC6H_UF16>,
-        MortonCopy<true, PixelFormat::BC6H_SF16>,
-        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
-        MortonCopy<true, PixelFormat::BGRA8>,
-        MortonCopy<true, PixelFormat::RGBA32F>,
-        MortonCopy<true, PixelFormat::RG32F>,
-        MortonCopy<true, PixelFormat::R32F>,
-        MortonCopy<true, PixelFormat::R16F>,
-        MortonCopy<true, PixelFormat::R16U>,
-        MortonCopy<true, PixelFormat::R16S>,
-        MortonCopy<true, PixelFormat::R16UI>,
-        MortonCopy<true, PixelFormat::R16I>,
-        MortonCopy<true, PixelFormat::RG16>,
-        MortonCopy<true, PixelFormat::RG16F>,
-        MortonCopy<true, PixelFormat::RG16UI>,
-        MortonCopy<true, PixelFormat::RG16I>,
-        MortonCopy<true, PixelFormat::RG16S>,
-        MortonCopy<true, PixelFormat::RGB32F>,
-        MortonCopy<true, PixelFormat::RGBA8_SRGB>,
-        MortonCopy<true, PixelFormat::RG8U>,
-        MortonCopy<true, PixelFormat::RG8S>,
-        MortonCopy<true, PixelFormat::RG32UI>,
-        MortonCopy<true, PixelFormat::R32UI>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
-        MortonCopy<true, PixelFormat::BGRA8_SRGB>,
-        MortonCopy<true, PixelFormat::DXT1_SRGB>,
-        MortonCopy<true, PixelFormat::DXT23_SRGB>,
-        MortonCopy<true, PixelFormat::DXT45_SRGB>,
-        MortonCopy<true, PixelFormat::BC7U_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
-        MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
-        MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
-        MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
-        MortonCopy<true, PixelFormat::Z32F>,
-        MortonCopy<true, PixelFormat::Z16>,
-        MortonCopy<true, PixelFormat::Z24S8>,
-        MortonCopy<true, PixelFormat::S8Z24>,
-        MortonCopy<true, PixelFormat::Z32FS8>,
-    // clang-format on
+    MortonCopy<true, PixelFormat::ABGR8U>,
+    MortonCopy<true, PixelFormat::ABGR8S>,
+    MortonCopy<true, PixelFormat::ABGR8UI>,
+    MortonCopy<true, PixelFormat::B5G6R5U>,
+    MortonCopy<true, PixelFormat::A2B10G10R10U>,
+    MortonCopy<true, PixelFormat::A1B5G5R5U>,
+    MortonCopy<true, PixelFormat::R8U>,
+    MortonCopy<true, PixelFormat::R8UI>,
+    MortonCopy<true, PixelFormat::RGBA16F>,
+    MortonCopy<true, PixelFormat::RGBA16U>,
+    MortonCopy<true, PixelFormat::RGBA16UI>,
+    MortonCopy<true, PixelFormat::R11FG11FB10F>,
+    MortonCopy<true, PixelFormat::RGBA32UI>,
+    MortonCopy<true, PixelFormat::DXT1>,
+    MortonCopy<true, PixelFormat::DXT23>,
+    MortonCopy<true, PixelFormat::DXT45>,
+    MortonCopy<true, PixelFormat::DXN1>,
+    MortonCopy<true, PixelFormat::DXN2UNORM>,
+    MortonCopy<true, PixelFormat::DXN2SNORM>,
+    MortonCopy<true, PixelFormat::BC7U>,
+    MortonCopy<true, PixelFormat::BC6H_UF16>,
+    MortonCopy<true, PixelFormat::BC6H_SF16>,
+    MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+    MortonCopy<true, PixelFormat::BGRA8>,
+    MortonCopy<true, PixelFormat::RGBA32F>,
+    MortonCopy<true, PixelFormat::RG32F>,
+    MortonCopy<true, PixelFormat::R32F>,
+    MortonCopy<true, PixelFormat::R16F>,
+    MortonCopy<true, PixelFormat::R16U>,
+    MortonCopy<true, PixelFormat::R16S>,
+    MortonCopy<true, PixelFormat::R16UI>,
+    MortonCopy<true, PixelFormat::R16I>,
+    MortonCopy<true, PixelFormat::RG16>,
+    MortonCopy<true, PixelFormat::RG16F>,
+    MortonCopy<true, PixelFormat::RG16UI>,
+    MortonCopy<true, PixelFormat::RG16I>,
+    MortonCopy<true, PixelFormat::RG16S>,
+    MortonCopy<true, PixelFormat::RGB32F>,
+    MortonCopy<true, PixelFormat::RGBA8_SRGB>,
+    MortonCopy<true, PixelFormat::RG8U>,
+    MortonCopy<true, PixelFormat::RG8S>,
+    MortonCopy<true, PixelFormat::RG32UI>,
+    MortonCopy<true, PixelFormat::R32UI>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+    MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+    MortonCopy<true, PixelFormat::DXT1_SRGB>,
+    MortonCopy<true, PixelFormat::DXT23_SRGB>,
+    MortonCopy<true, PixelFormat::DXT45_SRGB>,
+    MortonCopy<true, PixelFormat::BC7U_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+    MortonCopy<true, PixelFormat::Z32F>,
+    MortonCopy<true, PixelFormat::Z16>,
+    MortonCopy<true, PixelFormat::Z24S8>,
+    MortonCopy<true, PixelFormat::S8Z24>,
+    MortonCopy<true, PixelFormat::Z32FS8>,
 };

 static constexpr ConversionArray linear_to_morton_fns = {
-    // clang-format off
-        MortonCopy<false, PixelFormat::ABGR8U>,
-        MortonCopy<false, PixelFormat::ABGR8S>,
-        MortonCopy<false, PixelFormat::ABGR8UI>,
-        MortonCopy<false, PixelFormat::B5G6R5U>,
-        MortonCopy<false, PixelFormat::A2B10G10R10U>,
-        MortonCopy<false, PixelFormat::A1B5G5R5U>,
-        MortonCopy<false, PixelFormat::R8U>,
-        MortonCopy<false, PixelFormat::R8UI>,
-        MortonCopy<false, PixelFormat::RGBA16F>,
-        MortonCopy<false, PixelFormat::RGBA16U>,
-        MortonCopy<false, PixelFormat::RGBA16UI>,
-        MortonCopy<false, PixelFormat::R11FG11FB10F>,
-        MortonCopy<false, PixelFormat::RGBA32UI>,
-        MortonCopy<false, PixelFormat::DXT1>,
-        MortonCopy<false, PixelFormat::DXT23>,
-        MortonCopy<false, PixelFormat::DXT45>,
-        MortonCopy<false, PixelFormat::DXN1>,
-        MortonCopy<false, PixelFormat::DXN2UNORM>,
-        MortonCopy<false, PixelFormat::DXN2SNORM>,
-        MortonCopy<false, PixelFormat::BC7U>,
-        MortonCopy<false, PixelFormat::BC6H_UF16>,
-        MortonCopy<false, PixelFormat::BC6H_SF16>,
-        // TODO(Subv): Swizzling ASTC formats are not supported
-        nullptr,
-        MortonCopy<false, PixelFormat::BGRA8>,
-        MortonCopy<false, PixelFormat::RGBA32F>,
-        MortonCopy<false, PixelFormat::RG32F>,
-        MortonCopy<false, PixelFormat::R32F>,
-        MortonCopy<false, PixelFormat::R16F>,
-        MortonCopy<false, PixelFormat::R16U>,
-        MortonCopy<false, PixelFormat::R16S>,
-        MortonCopy<false, PixelFormat::R16UI>,
-        MortonCopy<false, PixelFormat::R16I>,
-        MortonCopy<false, PixelFormat::RG16>,
-        MortonCopy<false, PixelFormat::RG16F>,
-        MortonCopy<false, PixelFormat::RG16UI>,
-        MortonCopy<false, PixelFormat::RG16I>,
-        MortonCopy<false, PixelFormat::RG16S>,
-        MortonCopy<false, PixelFormat::RGB32F>,
-        MortonCopy<false, PixelFormat::RGBA8_SRGB>,
-        MortonCopy<false, PixelFormat::RG8U>,
-        MortonCopy<false, PixelFormat::RG8S>,
-        MortonCopy<false, PixelFormat::RG32UI>,
-        MortonCopy<false, PixelFormat::R32UI>,
-        nullptr,
-        nullptr,
-        nullptr,
-        MortonCopy<false, PixelFormat::BGRA8_SRGB>,
-        MortonCopy<false, PixelFormat::DXT1_SRGB>,
-        MortonCopy<false, PixelFormat::DXT23_SRGB>,
-        MortonCopy<false, PixelFormat::DXT45_SRGB>,
-        MortonCopy<false, PixelFormat::BC7U_SRGB>,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        nullptr,
-        MortonCopy<false, PixelFormat::Z32F>,
-        MortonCopy<false, PixelFormat::Z16>,
-        MortonCopy<false, PixelFormat::Z24S8>,
-        MortonCopy<false, PixelFormat::S8Z24>,
-        MortonCopy<false, PixelFormat::Z32FS8>,
-    // clang-format on
+    MortonCopy<false, PixelFormat::ABGR8U>,
+    MortonCopy<false, PixelFormat::ABGR8S>,
+    MortonCopy<false, PixelFormat::ABGR8UI>,
+    MortonCopy<false, PixelFormat::B5G6R5U>,
+    MortonCopy<false, PixelFormat::A2B10G10R10U>,
+    MortonCopy<false, PixelFormat::A1B5G5R5U>,
+    MortonCopy<false, PixelFormat::R8U>,
+    MortonCopy<false, PixelFormat::R8UI>,
+    MortonCopy<false, PixelFormat::RGBA16F>,
+    MortonCopy<false, PixelFormat::RGBA16U>,
+    MortonCopy<false, PixelFormat::RGBA16UI>,
+    MortonCopy<false, PixelFormat::R11FG11FB10F>,
+    MortonCopy<false, PixelFormat::RGBA32UI>,
+    MortonCopy<false, PixelFormat::DXT1>,
+    MortonCopy<false, PixelFormat::DXT23>,
+    MortonCopy<false, PixelFormat::DXT45>,
+    MortonCopy<false, PixelFormat::DXN1>,
+    MortonCopy<false, PixelFormat::DXN2UNORM>,
+    MortonCopy<false, PixelFormat::DXN2SNORM>,
+    MortonCopy<false, PixelFormat::BC7U>,
+    MortonCopy<false, PixelFormat::BC6H_UF16>,
+    MortonCopy<false, PixelFormat::BC6H_SF16>,
+    // TODO(Subv): Swizzling ASTC formats are not supported
+    nullptr,
+    MortonCopy<false, PixelFormat::BGRA8>,
+    MortonCopy<false, PixelFormat::RGBA32F>,
+    MortonCopy<false, PixelFormat::RG32F>,
+    MortonCopy<false, PixelFormat::R32F>,
+    MortonCopy<false, PixelFormat::R16F>,
+    MortonCopy<false, PixelFormat::R16U>,
+    MortonCopy<false, PixelFormat::R16S>,
+    MortonCopy<false, PixelFormat::R16UI>,
+    MortonCopy<false, PixelFormat::R16I>,
+    MortonCopy<false, PixelFormat::RG16>,
+    MortonCopy<false, PixelFormat::RG16F>,
+    MortonCopy<false, PixelFormat::RG16UI>,
+    MortonCopy<false, PixelFormat::RG16I>,
+    MortonCopy<false, PixelFormat::RG16S>,
+    MortonCopy<false, PixelFormat::RGB32F>,
+    MortonCopy<false, PixelFormat::RGBA8_SRGB>,
+    MortonCopy<false, PixelFormat::RG8U>,
+    MortonCopy<false, PixelFormat::RG8S>,
+    MortonCopy<false, PixelFormat::RG32UI>,
+    MortonCopy<false, PixelFormat::R32UI>,
+    nullptr,
+    nullptr,
+    nullptr,
+    MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+    MortonCopy<false, PixelFormat::DXT1_SRGB>,
+    MortonCopy<false, PixelFormat::DXT23_SRGB>,
+    MortonCopy<false, PixelFormat::DXT45_SRGB>,
+    MortonCopy<false, PixelFormat::BC7U_SRGB>,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    MortonCopy<false, PixelFormat::Z32F>,
+    MortonCopy<false, PixelFormat::Z16>,
+    MortonCopy<false, PixelFormat::Z24S8>,
+    MortonCopy<false, PixelFormat::S8Z24>,
+    MortonCopy<false, PixelFormat::Z32FS8>,
 };

 static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
    return morton_to_linear_fns[static_cast<std::size_t>(format)];
 }

-/// 8x8 Z-Order coordinate from 2D coordinates
-static u32 MortonInterleave(u32 x, u32 y) {
-    static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
-    static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
-    return xlut[x % 8] + ylut[y % 8];
-}
-
-/// Calculates the offset of the position of the pixel in Morton order
-static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
-    // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
-    // of which is composed of four 2x2 subtiles each of which is composed of four texels.
-    // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
-    // texels are laid out in a 2x2 subtile like this:
-    // 2 3
-    // 0 1
-    //
-    // The full 8x8 tile has the texels arranged like this:
-    //
-    // 42 43 46 47 58 59 62 63
-    // 40 41 44 45 56 57 60 61
-    // 34 35 38 39 50 51 54 55
-    // 32 33 36 37 48 49 52 53
-    // 10 11 14 15 26 27 30 31
-    // 08 09 12 13 24 25 28 29
-    // 02 03 06 07 18 19 22 23
-    // 00 01 04 05 16 17 20 21
-    //
-    // This pattern is what's called Z-order curve, or Morton order.
-
-    const unsigned int block_height = 8;
-    const unsigned int coarse_x = x & ~7;
-
-    u32 i = MortonInterleave(x, y);
-
-    const unsigned int offset = coarse_x * block_height;
-
-    return (i + offset) * bytes_per_pixel;
-}
-
 static u32 MortonInterleave128(u32 x, u32 y) {
    // 128x128 Z-Order coordinate from 2D coordinates
    static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {

 void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
                   u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, std::size_t buffer_size, VAddr addr) {
-
+                   u8* buffer, u8* addr) {
    GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
-                                     tile_width_spacing, buffer, buffer_size, addr);
+                                     tile_width_spacing, buffer, addr);
 }

-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
-                         u8* morton_data, u8* linear_data, bool morton_to_linear) {
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+                         u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
+    const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
    u8* data_ptrs[2];
    for (u32 y = 0; y < height; ++y) {
        for (u32 x = 0; x < width; ++x) {
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };

 void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
                   u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
-                   u8* buffer, std::size_t buffer_size, VAddr addr);
+                   u8* buffer, u8* addr);

-void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
-                         u8* morton_data, u8* linear_data, bool morton_to_linear);
+void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
+                         u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);

 } // namespace VideoCore
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <mutex>
 #include <set>
 #include <unordered_map>

@@ -12,14 +13,26 @@

 #include "common/common_types.h"
 #include "core/settings.h"
+#include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"

 class RasterizerCacheObject {
 public:
+    explicit RasterizerCacheObject(const u8* host_ptr)
+        : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
+
    virtual ~RasterizerCacheObject();

+    CacheAddr GetCacheAddr() const {
+        return cache_addr;
+    }
+
+    const u8* GetHostPtr() const {
+        return host_ptr;
+    }
+
    /// Gets the address of the shader in guest memory, required for cache management
-    virtual VAddr GetAddr() const = 0;
+    virtual VAddr GetCpuAddr() const = 0;

    /// Gets the size of the shader in guest memory, required for cache management
    virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
    bool is_registered{};      ///< Whether the object is currently registered with the cache
    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+    CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
+    const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
 };

 template <class T>
@@ -68,7 +83,9 @@ public:
    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}

    /// Write any cached resources overlapping the specified region back to memory
-    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
        for (auto& object : objects) {
            FlushObject(object);
@@ -76,7 +93,9 @@ public:
    }

    /// Mark the specified region as being invalidated
-    void InvalidateRegion(VAddr addr, u64 size) {
+    void InvalidateRegion(CacheAddr addr, u64 size) {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
        for (auto& object : objects) {
            if (!object->IsRegistered()) {
@@ -89,48 +108,60 @@ public:

    /// Invalidates everything in the cache
    void InvalidateAll() {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        while (interval_cache.begin() != interval_cache.end()) {
            Unregister(*interval_cache.begin()->second.begin());
        }
    }

 protected:
-    /// Tries to get an object from the cache with the specified address
-    T TryGet(VAddr addr) const {
+    /// Tries to get an object from the cache with the specified cache address
+    T TryGet(CacheAddr addr) const {
        const auto iter = map_cache.find(addr);
        if (iter != map_cache.end())
            return iter->second;
        return nullptr;
    }

+    T TryGet(const void* addr) const {
+        const auto iter = map_cache.find(ToCacheAddr(addr));
+        if (iter != map_cache.end())
+            return iter->second;
+        return nullptr;
+    }
+
    /// Register an object into the cache
    void Register(const T& object) {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        object->SetIsRegistered(true);
        interval_cache.add({GetInterval(object), ObjectSet{object}});
-        map_cache.insert({object->GetAddr(), object});
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
+        map_cache.insert({object->GetCacheAddr(), object});
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
    }

    /// Unregisters an object from the cache
    void Unregister(const T& object) {
-        object->SetIsRegistered(false);
-        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        if (Settings::values.use_accurate_gpu_emulation) {
-            FlushObject(object);
-        }
+        std::lock_guard<std::recursive_mutex> lock{mutex};

+        object->SetIsRegistered(false);
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(object->GetAddr());
+        map_cache.erase(object->GetCacheAddr());
    }

    /// Returns a ticks counter used for tracking when cached objects were last modified
    u64 GetModifiedTicks() {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        return ++modified_ticks;
    }

    /// Flushes the specified object, updating appropriate cache state as needed
    void FlushObject(const T& object) {
+        std::lock_guard<std::recursive_mutex> lock{mutex};
+
        if (!object->IsDirty()) {
            return;
        }
@@ -140,7 +171,7 @@ protected:

 private:
    /// Returns a list of cached objects from the specified memory region, ordered by access time
-    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+    std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
        if (size == 0) {
            return {};
        }
@@ -164,17 +195,18 @@ private:
    }

    using ObjectSet = std::set<T>;
-    using ObjectCache = std::unordered_map<VAddr, T>;
-    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+    using ObjectCache = std::unordered_map<CacheAddr, T>;
+    using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
    using ObjectInterval = typename IntervalCache::interval_type;

    static auto GetInterval(const T& object) {
-        return ObjectInterval::right_open(object->GetAddr(),
-                                          object->GetAddr() + object->GetSizeInBytes());
+        return ObjectInterval::right_open(object->GetCacheAddr(),
+                                          object->GetCacheAddr() + object->GetSizeInBytes());
    }

    ObjectCache map_cache;
    IntervalCache interval_cache; ///< Cache of objects
    u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
    VideoCore::RasterizerInterface& rasterizer;
+    std::recursive_mutex mutex;
 };
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -35,14 +35,14 @@ public:
    virtual void FlushAll() = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushRegion(CacheAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
    /// and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;

    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -63,7 +63,7 @@ public:
    }

    /// Increase/decrease the number of object in pages touching the specified region
-    virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+    virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}

    /// Initialize disk cached resources for the game being emulated
    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,24 +13,28 @@

 namespace OpenGL {

+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
+                                                                                host_ptr} {}
+
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
    : RasterizerCache{rasterizer}, stream_buffer(size, true) {}

 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                      std::size_t alignment, bool cache) {
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");

    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
    cache &= size >= 2048;

+    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
    if (cache) {
-        auto entry = TryGet(*cpu_addr);
+        auto entry = TryGet(host_ptr);
        if (entry) {
-            if (entry->size >= size && entry->alignment == alignment) {
-                return entry->offset;
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
            }
            Unregister(entry);
        }
@@ -39,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
    AlignBuffer(alignment);
    const GLintptr uploaded_offset = buffer_offset;

-    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+    if (!host_ptr) {
+        return uploaded_offset;
+    }

+    std::memcpy(buffer_ptr, host_ptr, size);
    buffer_ptr += size;
    buffer_offset += size;

    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>();
-        entry->offset = uploaded_offset;
-        entry->size = size;
-        entry->alignment = alignment;
-        entry->addr = *cpu_addr;
+        auto entry = std::make_shared<CachedBufferEntry>(
+            *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
        Register(entry);
    }

--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {

 class RasterizerOpenGL;

-struct CachedBufferEntry final : public RasterizerCacheObject {
-    VAddr GetAddr() const override {
-        return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
+                               std::size_t alignment, u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
    }

    std::size_t GetSizeInBytes() const override {
        return size;
    }

+    std::size_t GetSize() const {
+        return size;
+    }
+
+    GLintptr GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
    // We do not have to flush this cache as things in it are never modified by us.
    void Flush() override {}

-    VAddr addr;
-    std::size_t size;
-    GLintptr offset;
-    std::size_t alignment;
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    GLintptr offset{};
+    std::size_t alignment{};
 };

 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +14,13 @@

 namespace OpenGL {

-CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
+    : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
    buffer.Create();
    // Bind and unbind the buffer so it gets allocated by the driver
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
-    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+    LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
 }

 void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +35,10 @@ void CachedGlobalRegion::Reload(u32 size_) {

    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+    glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
 }

-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
    const auto search{reserve.find(addr)};
    if (search == reserve.end()) {
        return {};
@@ -46,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
    return search->second;
 }

-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
-    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
+                                                              u8* host_ptr) {
+    GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
    if (!region) {
        // No reserved surface available, create a new one and reserve it
-        region = std::make_shared<CachedGlobalRegion>(addr, size);
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
+        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
        ReserveGlobalRegion(region);
    }
    region->Reload(size);
@@ -58,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
 }

 void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
-    reserve.insert_or_assign(region->GetAddr(), std::move(region));
+    reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
 }

 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {

    auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
-    const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
-        cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
-    ASSERT(cbuf_addr);
-
-    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
-    const auto size = Memory::Read32(*cbuf_addr + 8);
-    const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
-    ASSERT(actual_addr);
+    auto& memory_manager{gpu.MemoryManager()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
+                    global_region.GetCbufOffset()};
+    const auto actual_addr{memory_manager.Read64(addr)};
+    const auto size{memory_manager.Read32(addr + 8)};

    // Look up global region in the cache based on address
-    GlobalRegion region = TryGet(*actual_addr);
+    const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
+    GlobalRegion region{TryGet(host_ptr)};

    if (!region) {
        // No global region found - create a new one
-        region = GetUncachedGlobalRegion(*actual_addr, size);
+        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
        Register(region);
    }

--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;

 class CachedGlobalRegion final : public RasterizerCacheObject {
 public:
-    explicit CachedGlobalRegion(VAddr addr, u32 size);
+    explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);

-    /// Gets the address of the shader in guest memory, required for cache management
-    VAddr GetAddr() const override {
-        return addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
    }

-    /// Gets the size of the shader in guest memory, required for cache management
    std::size_t GetSizeInBytes() const override {
        return size;
    }
@@ -53,9 +51,8 @@ public:
    }

 private:
-    VAddr addr{};
+    VAddr cpu_addr{};
    u32 size{};
-
    OGLBuffer buffer;
 };

@@ -68,11 +65,11 @@ public:
                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);

 private:
-    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
+    GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
    void ReserveGlobalRegion(GlobalRegion region);

-    std::unordered_map<VAddr, GlobalRegion> reserve;
+    std::unordered_map<CacheAddr, GlobalRegion> reserve;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);

    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    const u8* source{Memory::GetPointer(*cpu_addr)};
+    const u8* source{memory_manager.GetPointer(gpu_addr)};

    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
        for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
    return index_offset;
 }

-} // namespace OpenGL
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
    return boost::make_iterator_range(map.equal_range(interval));
 }

-void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
    const u64 page_start{addr >> Memory::PAGE_BITS};
    const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};

@@ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() {

 void RasterizerOpenGL::FlushAll() {}

-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.FlushRegion(addr, size);
 }

-void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.InvalidateRegion(addr, size);
    shader_cache.InvalidateRegion(addr, size);
@@ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    buffer_cache.InvalidateRegion(addr, size);
 }

-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
    FlushRegion(addr, size);
    InvalidateRegion(addr, size);
 }
@@ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,

    MICROPROFILE_SCOPE(OpenGL_CacheManagement);

-    const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
+    const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
    if (!surface) {
        return {};
    }
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -57,9 +57,9 @@ public:
    void DrawArrays() override;
    void Clear() override;
    void FlushAll() override;
-    void FlushRegion(VAddr addr, u64 size) override;
-    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
                               const Common::Rectangle<u32>& src_rect,
@@ -67,7 +67,7 @@ public:
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
-    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
    void LoadDiskResources(const std::atomic_bool& stop_loading,
                           const VideoCore::DiskResourceLoadCallback& callback) override;

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -57,10 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {

 void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};

-    addr = cpu_addr ? *cpu_addr : 0;
    gpu_addr = gpu_addr_;
+    host_ptr = memory_manager.GetPointer(gpu_addr_);
    size_in_bytes = SizeInBytesRaw();

    if (IsPixelFormatASTC(pixel_format)) {
@@ -446,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
            MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                          params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                          params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
-                          gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+                          gl_buffer.data() + offset_gl, params.host_ptr + offset);
            offset += layer_size;
            offset_gl += gl_size;
        }
@@ -455,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
        MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                      params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
                      params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
-                      gl_buffer.data(), gl_buffer.size(), params.addr + offset);
+                      gl_buffer.data(), params.host_ptr + offset);
    }
 }

@@ -513,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
                              "reinterpretation but the texture is tiled.");
        }
        const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
-
+        auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
        glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
-                        Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
+                        memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
    }

    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -563,8 +562,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
 }

 CachedSurface::CachedSurface(const SurfaceParams& params)
-    : params(params), gl_target(SurfaceTargetToGL(params.target)),
-      cached_size_in_bytes(params.size_in_bytes) {
+    : params{params}, gl_target{SurfaceTargetToGL(params.target)},
+      cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
    texture.Create(gl_target);

    // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)

    ApplyTextureDefaults(texture.handle, params.max_mip_level);

-    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
+    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());

    // Clamp size to mapped GPU memory region
    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -616,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
        LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
        cached_size_in_bytes = max_size;
    }
+
+    cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() {
        const u32 bpp = params.GetFormatBpp() / 8;
        const u32 copy_size = params.width * bpp;
        if (params.pitch == copy_size) {
-            std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
-                        params.size_in_bytes_gl);
+            std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
        } else {
-            const u8* start = Memory::GetPointer(params.addr);
+            const u8* start{params.host_ptr};
            u8* write_to = gl_buffer[0].data();
            for (u32 h = params.height; h > 0; h--) {
                std::memcpy(write_to, start, copy_size);
@@ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() {
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
    Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
                                           params.height, params.depth, true, true);
-    const u8* const texture_src_data = Memory::GetPointer(params.addr);
-    ASSERT(texture_src_data);
    if (params.is_tiled) {
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                   params.block_width, static_cast<u32>(params.target));
@@ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() {
        const u32 bpp = params.GetFormatBpp() / 8;
        const u32 copy_size = params.width * bpp;
        if (params.pitch == copy_size) {
-            std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+            std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
        } else {
-            u8* start = Memory::GetPointer(params.addr);
+            u8* start{params.host_ptr};
            const u8* read_to = gl_buffer[0].data();
            for (u32 h = params.height; h > 0; h--) {
                std::memcpy(start, read_to, copy_size);
@@ -927,12 +925,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
-    if (params.addr == 0 || params.height * params.width == 0) {
+    if (params.gpu_addr == 0 || params.height * params.width == 0) {
        return {};
    }

    // Look up surface in the cache based on address
-    Surface surface{TryGet(params.addr)};
+    Surface surface{TryGet(params.host_ptr)};
    if (surface) {
        if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
            // Use the cached surface as-is unless it's not synced with memory
@@ -981,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
                                                   const Surface& dst_surface) {
    const auto& init_params{src_surface->GetSurfaceParams()};
    const auto& dst_params{dst_surface->GetSurfaceParams()};
-    VAddr address = init_params.addr;
-    const std::size_t layer_size = dst_params.LayerMemorySize();
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    Tegra::GPUVAddr address{init_params.gpu_addr};
+    const std::size_t layer_size{dst_params.LayerMemorySize()};
    for (u32 layer = 0; layer < dst_params.depth; layer++) {
        for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
-            const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
-            const Surface& copy = TryGet(sub_address);
-            if (!copy)
+            const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
+            const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
+            if (!copy) {
                continue;
+            }
            const auto& src_params{copy->GetSurfaceParams()};
            const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
            const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1163,7 +1163,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
    const auto& dst_params{dst_surface->GetSurfaceParams()};

    // Flush enough memory for both the source and destination surface
-    FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+    FlushRegion(ToCacheAddr(src_params.host_ptr),
+                std::max(src_params.MemorySize(), dst_params.MemorySize()));

    LoadSurface(dst_surface);
 }
@@ -1215,8 +1216,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    return new_surface;
 }

-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
-    return TryGet(addr);
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
+    return TryGet(host_ptr);
 }

 void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1243,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
    return {};
 }

-static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
-    const std::size_t size = params.LayerMemorySize();
-    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
+                                           u32 mipmap) {
+    const std::size_t size{params.LayerMemorySize()};
+    Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
    for (u32 i = 0; i < params.depth; i++) {
        if (start == addr) {
            return {i};
@@ -1267,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
            src_params.height == dst_params.MipHeight(*level) &&
            src_params.block_height >= dst_params.MipBlockHeight(*level)) {
            const std::optional<u32> slot =
-                TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+                TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
            if (slot.has_value()) {
                glCopyImageSubData(render_surface->Texture().handle,
                                   SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1285,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
 }

 static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
-    const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
-    const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+    const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
    if (bound2 > bound1)
        return true;
    const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1327,7 +1329,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
 void RasterizerCacheOpenGL::SignalPostDrawCall() {
    for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
        if (current_color_buffers[i] != nullptr) {
-            Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+            Surface intersect =
+                CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
            if (intersect != nullptr) {
                PartialReinterpretSurface(current_color_buffers[i], intersect);
                texception = true;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -296,7 +296,7 @@ struct SurfaceParams {
    bool is_array;
    bool srgb_conversion;
    // Parameters used for caching
-    VAddr addr;
+    u8* host_ptr;
    Tegra::GPUVAddr gpu_addr;
    std::size_t size_in_bytes;
    std::size_t size_in_bytes_gl;
@@ -345,10 +345,10 @@ class RasterizerOpenGL;

 class CachedSurface final : public RasterizerCacheObject {
 public:
-    CachedSurface(const SurfaceParams& params);
+    explicit CachedSurface(const SurfaceParams& params);

-    VAddr GetAddr() const override {
-        return params.addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
    }

    std::size_t GetSizeInBytes() const override {
@@ -432,6 +432,7 @@ private:
    std::size_t memory_size;
    bool reinterpreted = false;
    bool must_reload = false;
+    VAddr cpu_addr{};
 };

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -449,7 +450,7 @@ public:
    Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);

    /// Tries to find a framebuffer using on the provided CPU address
-    Surface TryFindFramebufferSurface(VAddr addr) const;
+    Surface TryFindFramebufferSurface(const u8* host_ptr) const;

    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +507,12 @@ private:
    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
    Surface last_depth_buffer;

-    using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+    using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;

    static auto GetReinterpretInterval(const Surface& object) {
-        return SurfaceInterval::right_open(object->GetAddr() + 1,
-                                           object->GetAddr() + object->GetMemorySize() - 1);
+        return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
+                                           object->GetCacheAddr() + object->GetMemorySize() - 1);
    }

    // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +524,7 @@ private:
        reinterpret_surface->MarkReinterpreted();
    }

-    Surface CollideOnReinterpretedSurface(VAddr addr) const {
+    Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
        const SurfaceInterval interval{addr};
        for (auto& pair :
             boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,19 +32,16 @@ struct UnspecializedShader {
 namespace {

 /// Gets the address for the specified shader stage program
-VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                                            shader_config.offset);
-    ASSERT_MSG(address, "Invalid GPU address");
-    return *address;
+Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }

 /// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(const u8* host_ptr) {
    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
+    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
    return program_code;
 }

@@ -214,12 +211,13 @@ std::set<GLenum> GetSupportedFormats() {

 } // namespace

-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                           ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
-                           ProgramCode&& program_code, ProgramCode&& program_code_b)
-    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
-      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
+    : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
+      program_type{program_type}, disk_cache{disk_cache},
+      precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {

    const std::size_t code_size = CalculateProgramSize(program_code);
    const std::size_t code_size_b =
@@ -243,12 +241,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
    disk_cache.SaveRaw(raw);
 }

-CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                           ShaderDiskCacheOpenGL& disk_cache,
+CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                           const PrecompiledPrograms& precompiled_programs,
-                           GLShader::ProgramResult result)
-    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
-      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+                           GLShader::ProgramResult result, u8* host_ptr)
+    : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
+      disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
+                                                                              host_ptr} {

    code = std::move(result.first);
    entries = result.second;
@@ -271,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
                disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
            }

-            LabelGLObject(GL_PROGRAM, program->handle, addr);
+            LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
        }

        handle = program->handle;
@@ -323,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
        disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
    }

-    LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+    LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);

    return target_program->handle;
 };
@@ -486,29 +485,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
        return last_shaders[static_cast<u32>(program)];
    }

-    const VAddr program_addr{GetShaderAddress(program)};
+    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
+    const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};

    // Look up shader in the cache based on address
-    Shader shader{TryGet(program_addr)};
+    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+    Shader shader{TryGet(host_ptr)};

    if (!shader) {
        // No shader found - create a new one
-        ProgramCode program_code = GetShaderCode(program_addr);
+        ProgramCode program_code{GetShaderCode(host_ptr)};
        ProgramCode program_code_b;
        if (program == Maxwell::ShaderProgram::VertexA) {
-            program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+            program_code_b = GetShaderCode(
+                memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
        }
        const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-
+        const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
        const auto found = precompiled_shaders.find(unique_identifier);
        if (found != precompiled_shaders.end()) {
            shader =
-                std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
-                                               precompiled_programs, found->second);
+                std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+                                               precompiled_programs, found->second, host_ptr);
        } else {
            shader = std::make_shared<CachedShader>(
-                program_addr, unique_identifier, program, disk_cache, precompiled_programs,
-                std::move(program_code), std::move(program_code_b));
+                cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                std::move(program_code), std::move(program_code_b), host_ptr);
        }
        Register(shader);
    }
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;

 class CachedShader final : public RasterizerCacheObject {
 public:
-    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                          ShaderDiskCacheOpenGL& disk_cache,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                          const PrecompiledPrograms& precompiled_programs,
-                          ProgramCode&& program_code, ProgramCode&& program_code_b);
+                          ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);

-    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
-                          ShaderDiskCacheOpenGL& disk_cache,
+    explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
+                          Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
                          const PrecompiledPrograms& precompiled_programs,
-                          GLShader::ProgramResult result);
+                          GLShader::ProgramResult result, u8* host_ptr);

-    VAddr GetAddr() const override {
-        return addr;
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
    }

    std::size_t GetSizeInBytes() const override {
@@ -91,7 +91,8 @@ private:

    ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;

-    VAddr addr{};
+    u8* host_ptr{};
+    VAddr cpu_addr{};
    u64 unique_identifier{};
    Maxwell::ShaderProgram program_type{};
    ShaderDiskCacheOpenGL& disk_cache;
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -164,12 +164,13 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
        // Reset the screen info's display texture to its own permanent texture
        screen_info.display_texture = screen_info.texture.resource.handle;

-        Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
-                                             Memory::FlushMode::Flush);
+        rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);

-        VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
-                                       Memory::GetPointer(framebuffer_addr),
-                                       gl_framebuffer_data.data(), true);
+        constexpr u32 linear_bpp = 4;
+        VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
+                                       framebuffer.width, framebuffer.height, bytes_per_pixel,
+                                       linear_bpp, Memory::GetPointer(framebuffer_addr),
+                                       gl_framebuffer_data.data());

        glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));

--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -17,6 +17,11 @@

 namespace Vulkan {

+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
+                                     std::size_t alignment, u8* host_ptr)
+    : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
+                                                                                host_ptr} {}
+
 VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
@@ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default;
 u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
                                bool cache) {
    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT(cpu_addr);
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");

    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
    cache &= size >= 2048;

+    const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
    if (cache) {
-        if (auto entry = TryGet(*cpu_addr); entry) {
-            if (entry->size >= size && entry->alignment == alignment) {
-                return entry->offset;
+        auto entry = TryGet(host_ptr);
+        if (entry) {
+            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
+                return entry->GetOffset();
            }
            Unregister(entry);
        }
@@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
    AlignBuffer(alignment);
    const u64 uploaded_offset = buffer_offset;

-    Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+    if (!host_ptr) {
+        return uploaded_offset;
+    }

+    std::memcpy(buffer_ptr, host_ptr, size);
    buffer_ptr += size;
    buffer_offset += size;

    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>();
-        entry->offset = uploaded_offset;
-        entry->size = size;
-        entry->alignment = alignment;
-        entry->addr = *cpu_addr;
+        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
+                                                         alignment, host_ptr);
        Register(entry);
    }

--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -24,22 +24,39 @@ class VKFence;
 class VKMemoryManager;
 class VKStreamBuffer;

-struct CachedBufferEntry final : public RasterizerCacheObject {
-    VAddr GetAddr() const override {
-        return addr;
+class CachedBufferEntry final : public RasterizerCacheObject {
+public:
+    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
+                               u8* host_ptr);
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
    }

    std::size_t GetSizeInBytes() const override {
        return size;
    }

+    std::size_t GetSize() const {
+        return size;
+    }
+
+    u64 GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetAlignment() const {
+        return alignment;
+    }
+
    // We do not have to flush this cache as things in it are never modified by us.
    void Flush() override {}

-    VAddr addr;
-    std::size_t size;
-    u64 offset;
-    std::size_t alignment;
+private:
+    VAddr cpu_addr{};
+    std::size_t size{};
+    u64 offset{};
+    std::size_t alignment{};
 };

 class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -0,0 +1,81 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <optional>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/cityhash.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) {
+    // TODO(Rodrigo): Manage integer border colors
+    if (color == std::array<float, 4>{0, 0, 0, 0}) {
+        return vk::BorderColor::eFloatTransparentBlack;
+    } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
+        return vk::BorderColor::eFloatOpaqueBlack;
+    } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
+        return vk::BorderColor::eFloatOpaqueWhite;
+    } else {
+        return {};
+    }
+}
+
+std::size_t SamplerCacheKey::Hash() const {
+    static_assert(sizeof(raw) % sizeof(u64) == 0);
+    return static_cast<std::size_t>(
+        Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
+}
+
+bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
+    return raw == rhs.raw;
+}
+
+VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
+
+VKSamplerCache::~VKSamplerCache() = default;
+
+vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
+    const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
+    auto& sampler = entry->second;
+    if (is_cache_miss) {
+        sampler = CreateSampler(tsc);
+    }
+    return *sampler;
+}
+
+UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
+    const float max_anisotropy = tsc.GetMaxAnisotropy();
+    const bool has_anisotropy = max_anisotropy > 1.0f;
+
+    const auto border_color = tsc.GetBorderColor();
+    const auto vk_border_color = TryConvertBorderColor(border_color);
+    UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
+                         border_color[0], border_color[1], border_color[2], border_color[3]);
+
+    constexpr bool unnormalized_coords = false;
+
+    const vk::SamplerCreateInfo sampler_ci(
+        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
+        MaxwellToVK::Sampler::Filter(tsc.min_filter),
+        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
+        max_anisotropy, tsc.depth_compare_enabled,
+        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
+        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
+        unnormalized_coords);
+
+    const auto& dld = device.GetDispatchLoader();
+    const auto dev = device.GetLogical();
+    return dev.createSamplerUnique(sampler_ci, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/textures/texture.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
+    std::size_t Hash() const;
+
+    bool operator==(const SamplerCacheKey& rhs) const;
+
+    bool operator!=(const SamplerCacheKey& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::SamplerCacheKey> {
+    std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKSamplerCache {
+public:
+    explicit VKSamplerCache(const VKDevice& device);
+    ~VKSamplerCache();
+
+    vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
+
+private:
+    UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
+
+    const VKDevice& device;
+    std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
+};
+
+} // namespace Vulkan
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
 #include <cstring>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
    }
 }

-void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
                      u32 block_depth, u32 width_spacing) {
    CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
                     (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
-                     bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
-                     block_height, block_depth, width_spacing);
+                     bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
+                     width_spacing);
 }

-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
-                                 u32 block_height, u32 block_depth, u32 width_spacing) {
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth, u32 block_height,
+                                 u32 block_depth, u32 width_spacing) {
    std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
    UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
                     width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
 }

 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height) {
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
    const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
                                  gob_size_x};
    for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
            const u32 gob_address =
                gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
            const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
-            const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
-            const VAddr dest_addr = swizzled_data + swizzled_offset;
+            u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
+            u8* dest_addr = swizzled_data + swizzled_offset;

-            Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
+            std::memcpy(dest_addr, source_line, bytes_per_pixel);
        }
    }
 }

 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y) {
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y) {
    for (u32 line = 0; line < subrect_height; ++line) {
        const u32 y2 = line + offset_y;
        const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
            const u32 x2 = (x + offset_x) * bytes_per_pixel;
            const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
            const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
-            const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
-            const VAddr source_addr = swizzled_data + swizzled_offset;
+            u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
+            u8* source_addr = swizzled_data + swizzled_offset;

-            Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
+            std::memcpy(dest_line, source_addr, bytes_per_pixel);
        }
    }
 }
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
 }

 /// Unswizzles a swizzled texture without changing its format.
-void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                      u32 block_height = TICEntry::DefaultBlockHeight,
                      u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);

 /// Unswizzles a swizzled texture without changing its format.
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
-                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
+                                 u32 width, u32 height, u32 depth,
                                 u32 block_height = TICEntry::DefaultBlockHeight,
                                 u32 block_depth = TICEntry::DefaultBlockHeight,
                                 u32 width_spacing = 0);
@@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height

 /// Copies an untiled subrectangle into a tiled surface.
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                    u32 block_height);
+                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);

 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
-                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
-                      u32 block_height, u32 offset_x, u32 offset_y);
+                      u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+                      u32 offset_x, u32 offset_y);

 } // namespace Tegra::Texture
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -283,31 +283,36 @@ enum class TextureMipmapFilter : u32 {

 struct TSCEntry {
    union {
-        BitField<0, 3, WrapMode> wrap_u;
-        BitField<3, 3, WrapMode> wrap_v;
-        BitField<6, 3, WrapMode> wrap_p;
-        BitField<9, 1, u32> depth_compare_enabled;
-        BitField<10, 3, DepthCompareFunc> depth_compare_func;
-        BitField<13, 1, u32> srgb_conversion;
-        BitField<20, 3, u32> max_anisotropy;
+        struct {
+            union {
+                BitField<0, 3, WrapMode> wrap_u;
+                BitField<3, 3, WrapMode> wrap_v;
+                BitField<6, 3, WrapMode> wrap_p;
+                BitField<9, 1, u32> depth_compare_enabled;
+                BitField<10, 3, DepthCompareFunc> depth_compare_func;
+                BitField<13, 1, u32> srgb_conversion;
+                BitField<20, 3, u32> max_anisotropy;
+            };
+            union {
+                BitField<0, 2, TextureFilter> mag_filter;
+                BitField<4, 2, TextureFilter> min_filter;
+                BitField<6, 2, TextureMipmapFilter> mipmap_filter;
+                BitField<9, 1, u32> cubemap_interface_filtering;
+                BitField<12, 13, u32> mip_lod_bias;
+            };
+            union {
+                BitField<0, 12, u32> min_lod_clamp;
+                BitField<12, 12, u32> max_lod_clamp;
+                BitField<24, 8, u32> srgb_border_color_r;
+            };
+            union {
+                BitField<12, 8, u32> srgb_border_color_g;
+                BitField<20, 8, u32> srgb_border_color_b;
+            };
+            std::array<f32, 4> border_color;
+        };
+        std::array<u8, 0x20> raw;
    };
-    union {
-        BitField<0, 2, TextureFilter> mag_filter;
-        BitField<4, 2, TextureFilter> min_filter;
-        BitField<6, 2, TextureMipmapFilter> mipmap_filter;
-        BitField<9, 1, u32> cubemap_interface_filtering;
-        BitField<12, 13, u32> mip_lod_bias;
-    };
-    union {
-        BitField<0, 12, u32> min_lod_clamp;
-        BitField<12, 12, u32> max_lod_clamp;
-        BitField<24, 8, u32> srgb_border_color_r;
-    };
-    union {
-        BitField<12, 8, u32> srgb_border_color_g;
-        BitField<20, 8, u32> srgb_border_color_b;
-    };
-    std::array<f32, 4> border_color;

    float GetMaxAnisotropy() const {
        return static_cast<float>(1U << max_anisotropy);
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -209,7 +209,7 @@ void Config::ReadPlayerValues() {
    for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
        auto& player = Settings::values.players[p];

-        player.connected = qt_config->value(QString("player_%1_connected").arg(p), false).toBool();
+        player.connected = ReadSetting(QString("player_%1_connected").arg(p), false).toBool();

        player.type = static_cast<Settings::ControllerType>(
            qt_config
@@ -269,7 +269,7 @@ void Config::ReadPlayerValues() {
 }

 void Config::ReadDebugValues() {
-    Settings::values.debug_pad_enabled = qt_config->value("debug_pad_enabled", false).toBool();
+    Settings::values.debug_pad_enabled = ReadSetting("debug_pad_enabled", false).toBool();
    for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
        std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
        Settings::values.debug_pad_buttons[i] =
@@ -298,7 +298,7 @@ void Config::ReadDebugValues() {
 }

 void Config::ReadKeyboardValues() {
-    Settings::values.keyboard_enabled = qt_config->value("keyboard_enabled", false).toBool();
+    Settings::values.keyboard_enabled = ReadSetting("keyboard_enabled", false).toBool();

    std::transform(default_keyboard_keys.begin(), default_keyboard_keys.end(),
                   Settings::values.keyboard_keys.begin(), InputCommon::GenerateKeyboardParam);
@@ -311,7 +311,7 @@ void Config::ReadKeyboardValues() {
 }

 void Config::ReadMouseValues() {
-    Settings::values.mouse_enabled = qt_config->value("mouse_enabled", false).toBool();
+    Settings::values.mouse_enabled = ReadSetting("mouse_enabled", false).toBool();

    for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
        std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
@@ -327,16 +327,14 @@ void Config::ReadMouseValues() {
 }

 void Config::ReadTouchscreenValues() {
-    Settings::values.touchscreen.enabled = qt_config->value("touchscreen_enabled", true).toBool();
+    Settings::values.touchscreen.enabled = ReadSetting("touchscreen_enabled", true).toBool();
    Settings::values.touchscreen.device =
-        qt_config->value("touchscreen_device", "engine:emu_window").toString().toStdString();
+        ReadSetting("touchscreen_device", "engine:emu_window").toString().toStdString();

-    Settings::values.touchscreen.finger = qt_config->value("touchscreen_finger", 0).toUInt();
-    Settings::values.touchscreen.rotation_angle = qt_config->value("touchscreen_angle", 0).toUInt();
-    Settings::values.touchscreen.diameter_x =
-        qt_config->value("touchscreen_diameter_x", 15).toUInt();
-    Settings::values.touchscreen.diameter_y =
-        qt_config->value("touchscreen_diameter_y", 15).toUInt();
+    Settings::values.touchscreen.finger = ReadSetting("touchscreen_finger", 0).toUInt();
+    Settings::values.touchscreen.rotation_angle = ReadSetting("touchscreen_angle", 0).toUInt();
+    Settings::values.touchscreen.diameter_x = ReadSetting("touchscreen_diameter_x", 15).toUInt();
+    Settings::values.touchscreen.diameter_y = ReadSetting("touchscreen_diameter_y", 15).toUInt();
    qt_config->endGroup();
 }

@@ -357,42 +355,41 @@ void Config::ReadValues() {
    ReadTouchscreenValues();

    Settings::values.motion_device =
-        qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
+        ReadSetting("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
            .toString()
            .toStdString();

    qt_config->beginGroup("Core");
-    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
-    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
+    Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
-    Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
-    Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
-    Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
-    Settings::values.use_disk_shader_cache =
-        qt_config->value("use_disk_shader_cache", false).toBool();
+    Settings::values.resolution_factor = ReadSetting("resolution_factor", 1.0).toFloat();
+    Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool();
+    Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt();
+    Settings::values.use_disk_shader_cache = ReadSetting("use_disk_shader_cache", true).toBool();
    Settings::values.use_accurate_gpu_emulation =
-        qt_config->value("use_accurate_gpu_emulation", false).toBool();
+        ReadSetting("use_accurate_gpu_emulation", false).toBool();
    Settings::values.use_asynchronous_gpu_emulation =
-        qt_config->value("use_asynchronous_gpu_emulation", false).toBool();
+        ReadSetting("use_asynchronous_gpu_emulation", false).toBool();

-    Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat();
-    Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat();
-    Settings::values.bg_blue = qt_config->value("bg_blue", 0.0).toFloat();
+    Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
+    Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
+    Settings::values.bg_blue = ReadSetting("bg_blue", 0.0).toFloat();
    qt_config->endGroup();

    qt_config->beginGroup("Audio");
-    Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
+    Settings::values.sink_id = ReadSetting("output_engine", "auto").toString().toStdString();
    Settings::values.enable_audio_stretching =
-        qt_config->value("enable_audio_stretching", true).toBool();
+        ReadSetting("enable_audio_stretching", true).toBool();
    Settings::values.audio_device_id =
-        qt_config->value("output_device", "auto").toString().toStdString();
-    Settings::values.volume = qt_config->value("volume", 1).toFloat();
+        ReadSetting("output_device", "auto").toString().toStdString();
+    Settings::values.volume = ReadSetting("volume", 1).toFloat();
    qt_config->endGroup();

    qt_config->beginGroup("Data Storage");
-    Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool();
+    Settings::values.use_virtual_sd = ReadSetting("use_virtual_sd", true).toBool();
    FileUtil::GetUserPath(
        FileUtil::UserPath::NANDDir,
        qt_config
@@ -410,30 +407,30 @@ void Config::ReadValues() {
    qt_config->endGroup();

    qt_config->beginGroup("Core");
-    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
-    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
+    Settings::values.use_cpu_jit = ReadSetting("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = ReadSetting("use_multi_core", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("System");
-    Settings::values.use_docked_mode = qt_config->value("use_docked_mode", false).toBool();
-    Settings::values.enable_nfc = qt_config->value("enable_nfc", true).toBool();
+    Settings::values.use_docked_mode = ReadSetting("use_docked_mode", false).toBool();
+    Settings::values.enable_nfc = ReadSetting("enable_nfc", true).toBool();

-    Settings::values.current_user = std::clamp<int>(qt_config->value("current_user", 0).toInt(), 0,
-                                                    Service::Account::MAX_USERS - 1);
+    Settings::values.current_user =
+        std::clamp<int>(ReadSetting("current_user", 0).toInt(), 0, Service::Account::MAX_USERS - 1);

-    Settings::values.language_index = qt_config->value("language_index", 1).toInt();
+    Settings::values.language_index = ReadSetting("language_index", 1).toInt();

-    const auto rng_seed_enabled = qt_config->value("rng_seed_enabled", false).toBool();
+    const auto rng_seed_enabled = ReadSetting("rng_seed_enabled", false).toBool();
    if (rng_seed_enabled) {
-        Settings::values.rng_seed = qt_config->value("rng_seed", 0).toULongLong();
+        Settings::values.rng_seed = ReadSetting("rng_seed", 0).toULongLong();
    } else {
        Settings::values.rng_seed = std::nullopt;
    }

-    const auto custom_rtc_enabled = qt_config->value("custom_rtc_enabled", false).toBool();
+    const auto custom_rtc_enabled = ReadSetting("custom_rtc_enabled", false).toBool();
    if (custom_rtc_enabled) {
        Settings::values.custom_rtc =
-            std::chrono::seconds(qt_config->value("custom_rtc", 0).toULongLong());
+            std::chrono::seconds(ReadSetting("custom_rtc", 0).toULongLong());
    } else {
        Settings::values.custom_rtc = std::nullopt;
    }
@@ -441,35 +438,35 @@ void Config::ReadValues() {
    qt_config->endGroup();

    qt_config->beginGroup("Miscellaneous");
-    Settings::values.log_filter = qt_config->value("log_filter", "*:Info").toString().toStdString();
-    Settings::values.use_dev_keys = qt_config->value("use_dev_keys", false).toBool();
+    Settings::values.log_filter = ReadSetting("log_filter", "*:Info").toString().toStdString();
+    Settings::values.use_dev_keys = ReadSetting("use_dev_keys", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("Debugging");
-    Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool();
-    Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt();
-    Settings::values.program_args = qt_config->value("program_args", "").toString().toStdString();
-    Settings::values.dump_exefs = qt_config->value("dump_exefs", false).toBool();
-    Settings::values.dump_nso = qt_config->value("dump_nso", false).toBool();
+    Settings::values.use_gdbstub = ReadSetting("use_gdbstub", false).toBool();
+    Settings::values.gdbstub_port = ReadSetting("gdbstub_port", 24689).toInt();
+    Settings::values.program_args = ReadSetting("program_args", "").toString().toStdString();
+    Settings::values.dump_exefs = ReadSetting("dump_exefs", false).toBool();
+    Settings::values.dump_nso = ReadSetting("dump_nso", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("WebService");
-    Settings::values.enable_telemetry = qt_config->value("enable_telemetry", true).toBool();
+    Settings::values.enable_telemetry = ReadSetting("enable_telemetry", true).toBool();
    Settings::values.web_api_url =
-        qt_config->value("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
-    Settings::values.yuzu_username = qt_config->value("yuzu_username").toString().toStdString();
-    Settings::values.yuzu_token = qt_config->value("yuzu_token").toString().toStdString();
+        ReadSetting("web_api_url", "https://api.yuzu-emu.org").toString().toStdString();
+    Settings::values.yuzu_username = ReadSetting("yuzu_username").toString().toStdString();
+    Settings::values.yuzu_token = ReadSetting("yuzu_token").toString().toStdString();
    qt_config->endGroup();

    const auto size = qt_config->beginReadArray("DisabledAddOns");
    for (int i = 0; i < size; ++i) {
        qt_config->setArrayIndex(i);
-        const auto title_id = qt_config->value("title_id", 0).toULongLong();
+        const auto title_id = ReadSetting("title_id", 0).toULongLong();
        std::vector<std::string> out;
        const auto d_size = qt_config->beginReadArray("disabled");
        for (int j = 0; j < d_size; ++j) {
            qt_config->setArrayIndex(j);
-            out.push_back(qt_config->value("d", "").toString().toStdString());
+            out.push_back(ReadSetting("d", "").toString().toStdString());
        }
        qt_config->endArray();
        Settings::values.disabled_addons.insert_or_assign(title_id, out);
@@ -477,41 +474,38 @@ void Config::ReadValues() {
    qt_config->endArray();

    qt_config->beginGroup("UI");
-    UISettings::values.theme = qt_config->value("theme", UISettings::themes[0].second).toString();
+    UISettings::values.theme = ReadSetting("theme", UISettings::themes[0].second).toString();
    UISettings::values.enable_discord_presence =
-        qt_config->value("enable_discord_presence", true).toBool();
+        ReadSetting("enable_discord_presence", true).toBool();
    UISettings::values.screenshot_resolution_factor =
-        static_cast<u16>(qt_config->value("screenshot_resolution_factor", 0).toUInt());
-    UISettings::values.select_user_on_boot =
-        qt_config->value("select_user_on_boot", false).toBool();
+        static_cast<u16>(ReadSetting("screenshot_resolution_factor", 0).toUInt());
+    UISettings::values.select_user_on_boot = ReadSetting("select_user_on_boot", false).toBool();

    qt_config->beginGroup("UIGameList");
-    UISettings::values.show_unknown = qt_config->value("show_unknown", true).toBool();
-    UISettings::values.show_add_ons = qt_config->value("show_add_ons", true).toBool();
-    UISettings::values.icon_size = qt_config->value("icon_size", 64).toUInt();
-    UISettings::values.row_1_text_id = qt_config->value("row_1_text_id", 3).toUInt();
-    UISettings::values.row_2_text_id = qt_config->value("row_2_text_id", 2).toUInt();
+    UISettings::values.show_unknown = ReadSetting("show_unknown", true).toBool();
+    UISettings::values.show_add_ons = ReadSetting("show_add_ons", true).toBool();
+    UISettings::values.icon_size = ReadSetting("icon_size", 64).toUInt();
+    UISettings::values.row_1_text_id = ReadSetting("row_1_text_id", 3).toUInt();
+    UISettings::values.row_2_text_id = ReadSetting("row_2_text_id", 2).toUInt();
    qt_config->endGroup();

    qt_config->beginGroup("UILayout");
-    UISettings::values.geometry = qt_config->value("geometry").toByteArray();
-    UISettings::values.state = qt_config->value("state").toByteArray();
-    UISettings::values.renderwindow_geometry =
-        qt_config->value("geometryRenderWindow").toByteArray();
-    UISettings::values.gamelist_header_state =
-        qt_config->value("gameListHeaderState").toByteArray();
+    UISettings::values.geometry = ReadSetting("geometry").toByteArray();
+    UISettings::values.state = ReadSetting("state").toByteArray();
+    UISettings::values.renderwindow_geometry = ReadSetting("geometryRenderWindow").toByteArray();
+    UISettings::values.gamelist_header_state = ReadSetting("gameListHeaderState").toByteArray();
    UISettings::values.microprofile_geometry =
-        qt_config->value("microProfileDialogGeometry").toByteArray();
+        ReadSetting("microProfileDialogGeometry").toByteArray();
    UISettings::values.microprofile_visible =
-        qt_config->value("microProfileDialogVisible", false).toBool();
+        ReadSetting("microProfileDialogVisible", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("Paths");
-    UISettings::values.roms_path = qt_config->value("romsPath").toString();
-    UISettings::values.symbols_path = qt_config->value("symbolsPath").toString();
-    UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString();
-    UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool();
-    UISettings::values.recent_files = qt_config->value("recentFiles").toStringList();
+    UISettings::values.roms_path = ReadSetting("romsPath").toString();
+    UISettings::values.symbols_path = ReadSetting("symbolsPath").toString();
+    UISettings::values.gamedir = ReadSetting("gameListRootDir", ".").toString();
+    UISettings::values.gamedir_deepscan = ReadSetting("gameListDeepScan", false).toBool();
+    UISettings::values.recent_files = ReadSetting("recentFiles").toStringList();
    qt_config->endGroup();

    qt_config->beginGroup("Shortcuts");
@@ -524,8 +518,8 @@ void Config::ReadValues() {
            qt_config->beginGroup(hotkey);
            UISettings::values.shortcuts.emplace_back(UISettings::Shortcut(
                group + "/" + hotkey,
-                UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(),
-                                               qt_config->value("Context").toInt())));
+                UISettings::ContextualShortcut(ReadSetting("KeySeq").toString(),
+                                               ReadSetting("Context").toInt())));
            qt_config->endGroup();
        }

@@ -533,16 +527,16 @@ void Config::ReadValues() {
    }
    qt_config->endGroup();

-    UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool();
-    UISettings::values.fullscreen = qt_config->value("fullscreen", false).toBool();
-    UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool();
-    UISettings::values.show_filter_bar = qt_config->value("showFilterBar", true).toBool();
-    UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool();
-    UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool();
-    UISettings::values.first_start = qt_config->value("firstStart", true).toBool();
-    UISettings::values.callout_flags = qt_config->value("calloutFlags", 0).toUInt();
-    UISettings::values.show_console = qt_config->value("showConsole", false).toBool();
-    UISettings::values.profile_index = qt_config->value("profileIndex", 0).toUInt();
+    UISettings::values.single_window_mode = ReadSetting("singleWindowMode", true).toBool();
+    UISettings::values.fullscreen = ReadSetting("fullscreen", false).toBool();
+    UISettings::values.display_titlebar = ReadSetting("displayTitleBars", true).toBool();
+    UISettings::values.show_filter_bar = ReadSetting("showFilterBar", true).toBool();
+    UISettings::values.show_status_bar = ReadSetting("showStatusBar", true).toBool();
+    UISettings::values.confirm_before_closing = ReadSetting("confirmClose", true).toBool();
+    UISettings::values.first_start = ReadSetting("firstStart", true).toBool();
+    UISettings::values.callout_flags = ReadSetting("calloutFlags", 0).toUInt();
+    UISettings::values.show_console = ReadSetting("showConsole", false).toBool();
+    UISettings::values.profile_index = ReadSetting("profileIndex", 0).toUInt();

    ApplyDefaultProfileIfInputInvalid();

@@ -553,62 +547,79 @@ void Config::SavePlayerValues() {
    for (std::size_t p = 0; p < Settings::values.players.size(); ++p) {
        const auto& player = Settings::values.players[p];

-        qt_config->setValue(QString("player_%1_connected").arg(p), player.connected);
-        qt_config->setValue(QString("player_%1_type").arg(p), static_cast<u8>(player.type));
+        WriteSetting(QString("player_%1_connected").arg(p), player.connected, false);
+        WriteSetting(QString("player_%1_type").arg(p), static_cast<u8>(player.type),
+                     static_cast<u8>(Settings::ControllerType::DualJoycon));

-        qt_config->setValue(QString("player_%1_body_color_left").arg(p), player.body_color_left);
-        qt_config->setValue(QString("player_%1_body_color_right").arg(p), player.body_color_right);
-        qt_config->setValue(QString("player_%1_button_color_left").arg(p),
-                            player.button_color_left);
-        qt_config->setValue(QString("player_%1_button_color_right").arg(p),
-                            player.button_color_right);
+        WriteSetting(QString("player_%1_body_color_left").arg(p), player.body_color_left,
+                     Settings::JOYCON_BODY_NEON_BLUE);
+        WriteSetting(QString("player_%1_body_color_right").arg(p), player.body_color_right,
+                     Settings::JOYCON_BODY_NEON_RED);
+        WriteSetting(QString("player_%1_button_color_left").arg(p), player.button_color_left,
+                     Settings::JOYCON_BUTTONS_NEON_BLUE);
+        WriteSetting(QString("player_%1_button_color_right").arg(p), player.button_color_right,
+                     Settings::JOYCON_BUTTONS_NEON_RED);

        for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-            qt_config->setValue(QString("player_%1_").arg(p) +
-                                    QString::fromStdString(Settings::NativeButton::mapping[i]),
-                                QString::fromStdString(player.buttons[i]));
+            std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
+            WriteSetting(QString("player_%1_").arg(p) +
+                             QString::fromStdString(Settings::NativeButton::mapping[i]),
+                         QString::fromStdString(player.buttons[i]),
+                         QString::fromStdString(default_param));
        }
        for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-            qt_config->setValue(QString("player_%1_").arg(p) +
-                                    QString::fromStdString(Settings::NativeAnalog::mapping[i]),
-                                QString::fromStdString(player.analogs[i]));
+            std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
+                default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
+                default_analogs[i][3], default_analogs[i][4], 0.5f);
+            WriteSetting(QString("player_%1_").arg(p) +
+                             QString::fromStdString(Settings::NativeAnalog::mapping[i]),
+                         QString::fromStdString(player.analogs[i]),
+                         QString::fromStdString(default_param));
        }
    }
 }

 void Config::SaveDebugValues() {
-    qt_config->setValue("debug_pad_enabled", Settings::values.debug_pad_enabled);
+    WriteSetting("debug_pad_enabled", Settings::values.debug_pad_enabled, false);
    for (int i = 0; i < Settings::NativeButton::NumButtons; ++i) {
-        qt_config->setValue(QString("debug_pad_") +
-                                QString::fromStdString(Settings::NativeButton::mapping[i]),
-                            QString::fromStdString(Settings::values.debug_pad_buttons[i]));
+        std::string default_param = InputCommon::GenerateKeyboardParam(default_buttons[i]);
+        WriteSetting(QString("debug_pad_") +
+                         QString::fromStdString(Settings::NativeButton::mapping[i]),
+                     QString::fromStdString(Settings::values.debug_pad_buttons[i]),
+                     QString::fromStdString(default_param));
    }
    for (int i = 0; i < Settings::NativeAnalog::NumAnalogs; ++i) {
-        qt_config->setValue(QString("debug_pad_") +
-                                QString::fromStdString(Settings::NativeAnalog::mapping[i]),
-                            QString::fromStdString(Settings::values.debug_pad_analogs[i]));
+        std::string default_param = InputCommon::GenerateAnalogParamFromKeys(
+            default_analogs[i][0], default_analogs[i][1], default_analogs[i][2],
+            default_analogs[i][3], default_analogs[i][4], 0.5f);
+        WriteSetting(QString("debug_pad_") +
+                         QString::fromStdString(Settings::NativeAnalog::mapping[i]),
+                     QString::fromStdString(Settings::values.debug_pad_analogs[i]),
+                     QString::fromStdString(default_param));
    }
 }

 void Config::SaveMouseValues() {
-    qt_config->setValue("mouse_enabled", Settings::values.mouse_enabled);
+    WriteSetting("mouse_enabled", Settings::values.mouse_enabled, false);

    for (int i = 0; i < Settings::NativeMouseButton::NumMouseButtons; ++i) {
-        qt_config->setValue(QString("mouse_") +
-                                QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
-                            QString::fromStdString(Settings::values.mouse_buttons[i]));
+        std::string default_param = InputCommon::GenerateKeyboardParam(default_mouse_buttons[i]);
+        WriteSetting(QString("mouse_") +
+                         QString::fromStdString(Settings::NativeMouseButton::mapping[i]),
+                     QString::fromStdString(Settings::values.mouse_buttons[i]),
+                     QString::fromStdString(default_param));
    }
 }

 void Config::SaveTouchscreenValues() {
-    qt_config->setValue("touchscreen_enabled", Settings::values.touchscreen.enabled);
-    qt_config->setValue("touchscreen_device",
-                        QString::fromStdString(Settings::values.touchscreen.device));
+    WriteSetting("touchscreen_enabled", Settings::values.touchscreen.enabled, true);
+    WriteSetting("touchscreen_device", QString::fromStdString(Settings::values.touchscreen.device),
+                 "engine:emu_window");

-    qt_config->setValue("touchscreen_finger", Settings::values.touchscreen.finger);
-    qt_config->setValue("touchscreen_angle", Settings::values.touchscreen.rotation_angle);
-    qt_config->setValue("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x);
-    qt_config->setValue("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y);
+    WriteSetting("touchscreen_finger", Settings::values.touchscreen.finger, 0);
+    WriteSetting("touchscreen_angle", Settings::values.touchscreen.rotation_angle, 0);
+    WriteSetting("touchscreen_diameter_x", Settings::values.touchscreen.diameter_x, 15);
+    WriteSetting("touchscreen_diameter_y", Settings::values.touchscreen.diameter_y, 15);
 }

 void Config::SaveValues() {
@@ -619,91 +630,96 @@ void Config::SaveValues() {
    SaveMouseValues();
    SaveTouchscreenValues();

-    qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device));
-    qt_config->setValue("keyboard_enabled", Settings::values.keyboard_enabled);
+    WriteSetting("motion_device", QString::fromStdString(Settings::values.motion_device),
+                 "engine:motion_emu,update_period:100,sensitivity:0.01");
+    WriteSetting("keyboard_enabled", Settings::values.keyboard_enabled, false);

    qt_config->endGroup();

    qt_config->beginGroup("Core");
-    qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
-    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
+    WriteSetting("use_cpu_jit", Settings::values.use_cpu_jit, true);
+    WriteSetting("use_multi_core", Settings::values.use_multi_core, false);
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
-    qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
-    qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
-    qt_config->setValue("frame_limit", Settings::values.frame_limit);
-    qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
-    qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
-    qt_config->setValue("use_asynchronous_gpu_emulation",
-                        Settings::values.use_asynchronous_gpu_emulation);
+    WriteSetting("resolution_factor", (double)Settings::values.resolution_factor, 1.0);
+    WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true);
+    WriteSetting("frame_limit", Settings::values.frame_limit, 100);
+    WriteSetting("use_disk_shader_cache", Settings::values.use_disk_shader_cache, true);
+    WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
+    WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
+                 false);

    // Cast to double because Qt's written float values are not human-readable
-    qt_config->setValue("bg_red", (double)Settings::values.bg_red);
-    qt_config->setValue("bg_green", (double)Settings::values.bg_green);
-    qt_config->setValue("bg_blue", (double)Settings::values.bg_blue);
+    WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);
+    WriteSetting("bg_green", (double)Settings::values.bg_green, 0.0);
+    WriteSetting("bg_blue", (double)Settings::values.bg_blue, 0.0);
    qt_config->endGroup();

    qt_config->beginGroup("Audio");
-    qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
-    qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
-    qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
-    qt_config->setValue("volume", Settings::values.volume);
+    WriteSetting("output_engine", QString::fromStdString(Settings::values.sink_id), "auto");
+    WriteSetting("enable_audio_stretching", Settings::values.enable_audio_stretching, true);
+    WriteSetting("output_device", QString::fromStdString(Settings::values.audio_device_id), "auto");
+    WriteSetting("volume", Settings::values.volume, 1.0f);
    qt_config->endGroup();

    qt_config->beginGroup("Data Storage");
-    qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd);
-    qt_config->setValue("nand_directory",
-                        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
-    qt_config->setValue("sdmc_directory",
-                        QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
+    WriteSetting("use_virtual_sd", Settings::values.use_virtual_sd, true);
+    WriteSetting("nand_directory",
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)));
+    WriteSetting("sdmc_directory",
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
+                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)));
    qt_config->endGroup();

    qt_config->beginGroup("System");
-    qt_config->setValue("use_docked_mode", Settings::values.use_docked_mode);
-    qt_config->setValue("enable_nfc", Settings::values.enable_nfc);
-    qt_config->setValue("current_user", Settings::values.current_user);
-    qt_config->setValue("language_index", Settings::values.language_index);
+    WriteSetting("use_docked_mode", Settings::values.use_docked_mode, false);
+    WriteSetting("enable_nfc", Settings::values.enable_nfc, true);
+    WriteSetting("current_user", Settings::values.current_user, 0);
+    WriteSetting("language_index", Settings::values.language_index, 1);

-    qt_config->setValue("rng_seed_enabled", Settings::values.rng_seed.has_value());
-    qt_config->setValue("rng_seed", Settings::values.rng_seed.value_or(0));
+    WriteSetting("rng_seed_enabled", Settings::values.rng_seed.has_value(), false);
+    WriteSetting("rng_seed", Settings::values.rng_seed.value_or(0), 0);

-    qt_config->setValue("custom_rtc_enabled", Settings::values.custom_rtc.has_value());
-    qt_config->setValue("custom_rtc",
-                        QVariant::fromValue<long long>(
-                            Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()));
+    WriteSetting("custom_rtc_enabled", Settings::values.custom_rtc.has_value(), false);
+    WriteSetting("custom_rtc",
+                 QVariant::fromValue<long long>(
+                     Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
+                 0);

    qt_config->endGroup();

    qt_config->beginGroup("Miscellaneous");
-    qt_config->setValue("log_filter", QString::fromStdString(Settings::values.log_filter));
-    qt_config->setValue("use_dev_keys", Settings::values.use_dev_keys);
+    WriteSetting("log_filter", QString::fromStdString(Settings::values.log_filter), "*:Info");
+    WriteSetting("use_dev_keys", Settings::values.use_dev_keys, false);
    qt_config->endGroup();

    qt_config->beginGroup("Debugging");
-    qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub);
-    qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port);
-    qt_config->setValue("program_args", QString::fromStdString(Settings::values.program_args));
-    qt_config->setValue("dump_exefs", Settings::values.dump_exefs);
-    qt_config->setValue("dump_nso", Settings::values.dump_nso);
+    WriteSetting("use_gdbstub", Settings::values.use_gdbstub, false);
+    WriteSetting("gdbstub_port", Settings::values.gdbstub_port, 24689);
+    WriteSetting("program_args", QString::fromStdString(Settings::values.program_args), "");
+    WriteSetting("dump_exefs", Settings::values.dump_exefs, false);
+    WriteSetting("dump_nso", Settings::values.dump_nso, false);
    qt_config->endGroup();

    qt_config->beginGroup("WebService");
-    qt_config->setValue("enable_telemetry", Settings::values.enable_telemetry);
-    qt_config->setValue("web_api_url", QString::fromStdString(Settings::values.web_api_url));
-    qt_config->setValue("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
-    qt_config->setValue("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
+    WriteSetting("enable_telemetry", Settings::values.enable_telemetry, true);
+    WriteSetting("web_api_url", QString::fromStdString(Settings::values.web_api_url),
+                 "https://api.yuzu-emu.org");
+    WriteSetting("yuzu_username", QString::fromStdString(Settings::values.yuzu_username));
+    WriteSetting("yuzu_token", QString::fromStdString(Settings::values.yuzu_token));
    qt_config->endGroup();

    qt_config->beginWriteArray("DisabledAddOns");
    int i = 0;
    for (const auto& elem : Settings::values.disabled_addons) {
        qt_config->setArrayIndex(i);
-        qt_config->setValue("title_id", QVariant::fromValue<u64>(elem.first));
+        WriteSetting("title_id", QVariant::fromValue<u64>(elem.first), 0);
        qt_config->beginWriteArray("disabled");
        for (std::size_t j = 0; j < elem.second.size(); ++j) {
            qt_config->setArrayIndex(static_cast<int>(j));
-            qt_config->setValue("d", QString::fromStdString(elem.second[j]));
+            WriteSetting("d", QString::fromStdString(elem.second[j]), "");
        }
        qt_config->endArray();
        ++i;
@@ -711,60 +727,86 @@ void Config::SaveValues() {
    qt_config->endArray();

    qt_config->beginGroup("UI");
-    qt_config->setValue("theme", UISettings::values.theme);
-    qt_config->setValue("enable_discord_presence", UISettings::values.enable_discord_presence);
-    qt_config->setValue("screenshot_resolution_factor",
-                        UISettings::values.screenshot_resolution_factor);
-    qt_config->setValue("select_user_on_boot", UISettings::values.select_user_on_boot);
+    WriteSetting("theme", UISettings::values.theme, UISettings::themes[0].second);
+    WriteSetting("enable_discord_presence", UISettings::values.enable_discord_presence, true);
+    WriteSetting("screenshot_resolution_factor", UISettings::values.screenshot_resolution_factor,
+                 0);
+    WriteSetting("select_user_on_boot", UISettings::values.select_user_on_boot, false);

    qt_config->beginGroup("UIGameList");
-    qt_config->setValue("show_unknown", UISettings::values.show_unknown);
-    qt_config->setValue("show_add_ons", UISettings::values.show_add_ons);
-    qt_config->setValue("icon_size", UISettings::values.icon_size);
-    qt_config->setValue("row_1_text_id", UISettings::values.row_1_text_id);
-    qt_config->setValue("row_2_text_id", UISettings::values.row_2_text_id);
+    WriteSetting("show_unknown", UISettings::values.show_unknown, true);
+    WriteSetting("show_add_ons", UISettings::values.show_add_ons, true);
+    WriteSetting("icon_size", UISettings::values.icon_size, 64);
+    WriteSetting("row_1_text_id", UISettings::values.row_1_text_id, 3);
+    WriteSetting("row_2_text_id", UISettings::values.row_2_text_id, 2);
    qt_config->endGroup();

    qt_config->beginGroup("UILayout");
-    qt_config->setValue("geometry", UISettings::values.geometry);
-    qt_config->setValue("state", UISettings::values.state);
-    qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry);
-    qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state);
-    qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
-    qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible);
+    WriteSetting("geometry", UISettings::values.geometry);
+    WriteSetting("state", UISettings::values.state);
+    WriteSetting("geometryRenderWindow", UISettings::values.renderwindow_geometry);
+    WriteSetting("gameListHeaderState", UISettings::values.gamelist_header_state);
+    WriteSetting("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
+    WriteSetting("microProfileDialogVisible", UISettings::values.microprofile_visible, false);
    qt_config->endGroup();

    qt_config->beginGroup("Paths");
-    qt_config->setValue("romsPath", UISettings::values.roms_path);
-    qt_config->setValue("symbolsPath", UISettings::values.symbols_path);
-    qt_config->setValue("screenshotPath", UISettings::values.screenshot_path);
-    qt_config->setValue("gameListRootDir", UISettings::values.gamedir);
-    qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan);
-    qt_config->setValue("recentFiles", UISettings::values.recent_files);
+    WriteSetting("romsPath", UISettings::values.roms_path);
+    WriteSetting("symbolsPath", UISettings::values.symbols_path);
+    WriteSetting("screenshotPath", UISettings::values.screenshot_path);
+    WriteSetting("gameListRootDir", UISettings::values.gamedir, ".");
+    WriteSetting("gameListDeepScan", UISettings::values.gamedir_deepscan, false);
+    WriteSetting("recentFiles", UISettings::values.recent_files);
    qt_config->endGroup();

    qt_config->beginGroup("Shortcuts");
    for (auto shortcut : UISettings::values.shortcuts) {
-        qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first);
-        qt_config->setValue(shortcut.first + "/Context", shortcut.second.second);
+        WriteSetting(shortcut.first + "/KeySeq", shortcut.second.first);
+        WriteSetting(shortcut.first + "/Context", shortcut.second.second);
    }
    qt_config->endGroup();

-    qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode);
-    qt_config->setValue("fullscreen", UISettings::values.fullscreen);
-    qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar);
-    qt_config->setValue("showFilterBar", UISettings::values.show_filter_bar);
-    qt_config->setValue("showStatusBar", UISettings::values.show_status_bar);
-    qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing);
-    qt_config->setValue("firstStart", UISettings::values.first_start);
-    qt_config->setValue("calloutFlags", UISettings::values.callout_flags);
-    qt_config->setValue("showConsole", UISettings::values.show_console);
-    qt_config->setValue("profileIndex", UISettings::values.profile_index);
+    WriteSetting("singleWindowMode", UISettings::values.single_window_mode, true);
+    WriteSetting("fullscreen", UISettings::values.fullscreen, false);
+    WriteSetting("displayTitleBars", UISettings::values.display_titlebar, true);
+    WriteSetting("showFilterBar", UISettings::values.show_filter_bar, true);
+    WriteSetting("showStatusBar", UISettings::values.show_status_bar, true);
+    WriteSetting("confirmClose", UISettings::values.confirm_before_closing, true);
+    WriteSetting("firstStart", UISettings::values.first_start, true);
+    WriteSetting("calloutFlags", UISettings::values.callout_flags, 0);
+    WriteSetting("showConsole", UISettings::values.show_console, false);
+    WriteSetting("profileIndex", UISettings::values.profile_index, 0);
    qt_config->endGroup();
 }

+QVariant Config::ReadSetting(const QString& name) const {
+    return qt_config->value(name);
+}
+
+QVariant Config::ReadSetting(const QString& name, const QVariant& default_value) const {
+    QVariant result;
+    if (qt_config->value(name + "/default", false).toBool()) {
+        result = default_value;
+    } else {
+        result = qt_config->value(name, default_value);
+    }
+    return result;
+}
+
+void Config::WriteSetting(const QString& name, const QVariant& value) {
+    qt_config->setValue(name, value);
+}
+
+void Config::WriteSetting(const QString& name, const QVariant& value,
+                          const QVariant& default_value) {
+    qt_config->setValue(name + "/default", value == default_value);
+    qt_config->setValue(name, value);
+}
+
 void Config::Reload() {
    ReadValues();
+    // To apply default value changes
+    SaveValues();
    Settings::Apply();
 }

--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -42,6 +42,11 @@ private:
    void SaveMouseValues();
    void SaveTouchscreenValues();

+    QVariant ReadSetting(const QString& name) const;
+    QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
+    void WriteSetting(const QString& name, const QVariant& value);
+    void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);
+
    std::unique_ptr<QSettings> qt_config;
    std::string qt_config_loc;
 };
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
    // TODO: Implement a good way to visualize alpha components!

    QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
-    std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);

    // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
    // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
    auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
-        *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width,
-        surface_height, 1U);
+        gpu.MemoryManager().GetPointer(surface_address), 1, 1,
+        Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);

    auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
                                                      surface_width, surface_height);
Author	SHA1	Message	Date
bunnei	93da8e0abf	core: Move PageTable struct into Common.	2019-03-16 22:05:40 -04:00
bunnei	2392e146b0	Merge pull request #2244 from bunnei/gpu-mem-refactor video_core: Refactor to use MemoryManager interface for all memory access.	2019-03-16 21:59:45 -04:00
bunnei	bf41132aa9	Merge pull request #2243 from bunnei/mem-simplify-cache memory: Simplify rasterizer cache operations.	2019-03-16 21:59:30 -04:00
bunnei	059465d496	Merge pull request #2129 from FernandoS27/cntpct Correct CNTPCT from using CPU Cycles to using Clock Cycles	2019-03-16 21:58:59 -04:00
bunnei	29c242721a	Merge pull request #2241 from lioncash/compile-flags CMakeLists: Remove now-unnecessary GCC special-casing	2019-03-16 00:43:29 -04:00
bunnei	bdf2da4ee8	Merge pull request #2242 from lioncash/thread-fn kernel/thread: Remove WaitCurrentThread_Sleep() and ExitCurrentThread()	2019-03-16 00:43:09 -04:00
bunnei	10118c71e0	memory: Simplify rasterizer cache operations.	2019-03-16 00:41:08 -04:00
bunnei	574e89d924	video_core: Refactor to use MemoryManager interface for all memory access. # Conflicts: # src/video_core/engines/kepler_memory.cpp # src/video_core/engines/maxwell_3d.cpp # src/video_core/morton.cpp # src/video_core/morton.h # src/video_core/renderer_opengl/gl_global_cache.cpp # src/video_core/renderer_opengl/gl_global_cache.h # src/video_core/renderer_opengl/gl_rasterizer_cache.cpp	2019-03-16 00:38:48 -04:00
bunnei	47b622825c	Merge pull request #2237 from bunnei/cache-host-addr gpu: Use host address for caching instead of guest address.	2019-03-16 00:05:24 -04:00
Lioncash	51d7f6bffc	kernel/thread: Move thread exiting logic from ExitCurrentThread to svcExitThread Puts the operation on global state in the same places as the rest of the svc calls.	2019-03-15 23:58:37 -04:00
Lioncash	c892cf01fa	kernel/thread: Migrate WaitCurrentThread_Sleep into the Thread interface Rather than make a global accessor for this sort of thing. We can make it a part of the thread interface itself. This allows getting rid of a hidden global accessor in the kernel code.	2019-03-15 23:58:31 -04:00
bunnei	06ac6460d3	Merge pull request #2048 from FearlessTobi/port-3924 Port citra-emu/citra#3924: "citra_qt: Settings (configuration) rework"	2019-03-15 22:23:38 -04:00
Lioncash	e5b004e903	CMakeLists: Remove now-unnecessary GCC special-casing This issue has since been fixed in newer versions of Boost, so we don't need to worry about this anymore.	2019-03-15 20:49:58 -04:00
bunnei	2eaf6c41a4	gpu: Use host address for caching instead of guest address.	2019-03-14 22:34:42 -04:00
bunnei	84d3cdf7d7	Merge pull request #2233 from ReinUsesLisp/morton-cleanup video_core/morton: Miscellaneous changes	2019-03-14 21:23:12 -04:00
bunnei	6788ebffc8	Merge pull request #2229 from ReinUsesLisp/vk-sampler-cache vk_sampler_cache: Implement a sampler cache	2019-03-14 21:22:34 -04:00
ReinUsesLisp	ffe2e50458	video_core/morton: Use enum to describe MortonCopyPixels128 mode	2019-03-13 16:35:21 -03:00
ReinUsesLisp	6ed6129b4f	video_core/morton: Remove unused parameter in MortonSwizzle	2019-03-13 16:35:10 -03:00
ReinUsesLisp	9030a8259f	video_core/morton: Remove clang-format off when it's not needed	2019-03-13 16:16:45 -03:00
ReinUsesLisp	fdf76a25ab	video_core/morton: Remove unused functions	2019-03-13 16:15:54 -03:00
Mat M	a3734d7e31	vk_sampler_cache: Use operator== instead of memcmp Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-03-12 21:05:36 -03:00
ReinUsesLisp	aa59d77c3b	vk_sampler_cache: Implement a sampler cache	2019-03-12 20:20:57 -03:00
ReinUsesLisp	8ebeb9ade2	video_core/texture: Add a raw representation of TSCEntry	2019-03-12 16:56:29 -03:00
zhupengfei	39e895c5ff	citra_qt: Settings (configuration) rework	2019-03-07 16:55:50 +01:00
Fernando Sahmkow	a8d4927e29	Corrections, documenting and fixes.	2019-02-16 16:52:24 -04:00
Fernando Sahmkow	ecccfe0337	Use u128 on Clock Cycles calculation.	2019-02-15 22:57:16 -04:00
Fernando Sahmkow	3ea48e8ebe	Implement 128 bits Unsigned Integer Multiplication and Division.	2019-02-15 22:55:31 -04:00
Fernando Sahmkow	5b7ec71fb7	Correct CNTPCT to use Clock Cycles instead of Cpu Cycles.	2019-02-15 22:55:29 -04:00