kernel/thread: Remove BoostPriority()

This is a holdover from Citra that currently remains unused, so it can be removed from the Thread interface.
Merge pull request #2378 from lioncash/ro
2019-04-15 06:59:19 -04:00 · 2019-04-13 22:16:10 -04:00 · 2019-04-13 22:14:51 -04:00 · 2019-04-13 22:14:04 -04:00 · 2019-04-13 22:09:58 -04:00 · 2019-04-13 22:09:27 -04:00
141 changed files with 5338 additions and 1835 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -40,3 +40,9 @@
 [submodule "Vulkan-Headers"]
    path = externals/Vulkan-Headers
    url = https://github.com/KhronosGroup/Vulkan-Headers.git
+[submodule "externals/zstd"]
+    path = externals/zstd
+    url = https://github.com/facebook/zstd
+[submodule "sirit"]
+    path = externals/sirit
+    url = https://github.com/ReinUsesLisp/sirit
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -309,7 +309,7 @@ if (CLANG_FORMAT)
    set(CCOMMENT "Running clang format against all the .h and .cpp files in src/")
    if (WIN32)
        add_custom_target(clang-format
-            COMMAND powershell.exe -Command "Get-ChildItem ${SRCS}/* -Include *.cpp,*.h -Recurse | Foreach {${CLANG_FORMAT} -i $_.fullname}"
+            COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}"
            COMMENT ${CCOMMENT})
    elseif(MINGW)
        add_custom_target(clang-format
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
 add_library(unicorn-headers INTERFACE)
 target_include_directories(unicorn-headers INTERFACE ./unicorn/include)

+# Zstandard
+add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL)
+target_include_directories(libzstd_static INTERFACE ./zstd/lib)
+
 # SoundTouch
 add_subdirectory(soundtouch)

@@ -68,6 +72,11 @@ if (USE_DISCORD_PRESENCE)
    target_include_directories(discord-rpc INTERFACE ./discord-rpc/include)
 endif()

+# Sirit
+if (ENABLE_VULKAN)
+    add_subdirectory(sirit)
+endif()
+
 if (ENABLE_WEB_SERVICE)
    # LibreSSL
    set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
--- a/externals/sirit
+++ b/externals/sirit
--- a/externals/zstd
+++ b/externals/zstd
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -125,6 +125,8 @@ add_library(common STATIC
    uint128.h
    vector_math.h
    web_result.h
+    zstd_compression.cpp
+    zstd_compression.h
 )

 if(ARCHITECTURE_x86_64)
@@ -138,4 +140,4 @@ endif()
 create_target_directory_groups(common)

 target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
-target_link_libraries(common PRIVATE lz4_static)
+target_link_libraries(common PRIVATE lz4_static libzstd_static)
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -57,3 +57,21 @@ __declspec(noinline, noreturn)

 #define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!")
 #define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE(_a_, _b_)                                                                \
+    do {                                                                                           \
+        ASSERT(_a_);                                                                               \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
+
+// If the assert is ignored, execute _b_
+#define ASSERT_OR_EXECUTE_MSG(_a_, _b_, ...)                                                       \
+    do {                                                                                           \
+        ASSERT_MSG(_a_, __VA_ARGS__);                                                              \
+        if (!(_a_)) {                                                                              \
+            _b_                                                                                    \
+        }                                                                                          \
+    } while (0)
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -32,7 +32,7 @@ inline u32 CountLeadingZeroes32(u32 value) {
    return 32;
 }

-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
    unsigned long leading_zero = 0;

    if (_BitScanReverse64(&leading_zero, value) != 0) {
@@ -47,15 +47,15 @@ inline u32 CountLeadingZeroes32(u32 value) {
        return 32;
    }

-    return __builtin_clz(value);
+    return static_cast<u32>(__builtin_clz(value));
 }

-inline u64 CountLeadingZeroes64(u64 value) {
+inline u32 CountLeadingZeroes64(u64 value) {
    if (value == 0) {
        return 64;
    }

-    return __builtin_clzll(value);
+    return static_cast<u32>(__builtin_clzll(value));
 }
 #endif

@@ -70,7 +70,7 @@ inline u32 CountTrailingZeroes32(u32 value) {
    return 32;
 }

-inline u64 CountTrailingZeroes64(u64 value) {
+inline u32 CountTrailingZeroes64(u64 value) {
    unsigned long trailing_zero = 0;

    if (_BitScanForward64(&trailing_zero, value) != 0) {
@@ -85,15 +85,15 @@ inline u32 CountTrailingZeroes32(u32 value) {
        return 32;
    }

-    return __builtin_ctz(value);
+    return static_cast<u32>(__builtin_ctz(value));
 }

-inline u64 CountTrailingZeroes64(u64 value) {
+inline u32 CountTrailingZeroes64(u64 value) {
    if (value == 0) {
        return 64;
    }

-    return __builtin_ctzll(value);
+    return static_cast<u32>(__builtin_ctzll(value));
 }
 #endif

--- a/src/common/multi_level_queue.h
+++ b/src/common/multi_level_queue.h
@@ -72,7 +72,7 @@ public:
                u64 prios = mlq.used_priorities;
                prios &= ~((1ULL << (current_priority + 1)) - 1);
                if (prios == 0) {
-                    current_priority = mlq.depth();
+                    current_priority = static_cast<u32>(mlq.depth());
                } else {
                    current_priority = CountTrailingZeroes64(prios);
                    it = GetBeginItForPrio();
--- a/src/common/scope_exit.h
+++ b/src/common/scope_exit.h
@@ -20,7 +20,7 @@ struct ScopeExitHelper {

 template <typename Func>
 ScopeExitHelper<Func> ScopeExit(Func&& func) {
-    return ScopeExitHelper<Func>(std::move(func));
+    return ScopeExitHelper<Func>(std::forward<Func>(func));
 }
 } // namespace detail

--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -21,11 +21,6 @@

 #if defined(_MSC_VER)
 #include <cstdlib>
-#elif defined(__linux__)
-#include <byteswap.h>
-#elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) ||                     \
-    defined(__NetBSD__) || defined(__OpenBSD__)
-#include <sys/endian.h>
 #endif
 #include <cstring>
 #include "common/common_types.h"
@@ -62,86 +57,49 @@
 namespace Common {

 #ifdef _MSC_VER
-inline u16 swap16(u16 _data) {
-    return _byteswap_ushort(_data);
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return _byteswap_ushort(data);
 }
-inline u32 swap32(u32 _data) {
-    return _byteswap_ulong(_data);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return _byteswap_ulong(data);
 }
-inline u64 swap64(u64 _data) {
-    return _byteswap_uint64(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return _byteswap_uint64(data);
 }
-#elif defined(ARCHITECTURE_ARM) && (__ARM_ARCH >= 6)
-inline u16 swap16(u16 _data) {
-    u32 data = _data;
-    __asm__("rev16 %0, %1\n" : "=l"(data) : "l"(data));
-    return (u16)data;
-}
-inline u32 swap32(u32 _data) {
-    __asm__("rev %0, %1\n" : "=l"(_data) : "l"(_data));
-    return _data;
-}
-inline u64 swap64(u64 _data) {
-    return ((u64)swap32(_data) << 32) | swap32(_data >> 32);
-}
-#elif __linux__
-inline u16 swap16(u16 _data) {
-    return bswap_16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return bswap_32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return bswap_64(_data);
-}
-#elif __APPLE__
-inline __attribute__((always_inline)) u16 swap16(u16 _data) {
-    return (_data >> 8) | (_data << 8);
-}
-inline __attribute__((always_inline)) u32 swap32(u32 _data) {
-    return __builtin_bswap32(_data);
-}
-inline __attribute__((always_inline)) u64 swap64(u64 _data) {
-    return __builtin_bswap64(_data);
-}
-#elif defined(__Bitrig__) || defined(__OpenBSD__)
+#elif defined(__clang__) || defined(__GNUC__)
+#if defined(__Bitrig__) || defined(__OpenBSD__)
 // redefine swap16, swap32, swap64 as inline functions
 #undef swap16
 #undef swap32
 #undef swap64
-inline u16 swap16(u16 _data) {
-    return __swap16(_data);
+#endif
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
+    return __builtin_bswap16(data);
 }
-inline u32 swap32(u32 _data) {
-    return __swap32(_data);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return __builtin_bswap32(data);
 }
-inline u64 swap64(u64 _data) {
-    return __swap64(_data);
-}
-#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__)
-inline u16 swap16(u16 _data) {
-    return bswap16(_data);
-}
-inline u32 swap32(u32 _data) {
-    return bswap32(_data);
-}
-inline u64 swap64(u64 _data) {
-    return bswap64(_data);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return __builtin_bswap64(data);
 }
 #else
-// Slow generic implementation.
-inline u16 swap16(u16 data) {
+// Generic implementation.
+[[nodiscard]] inline u16 swap16(u16 data) noexcept {
    return (data >> 8) | (data << 8);
 }
-inline u32 swap32(u32 data) {
-    return (swap16(data) << 16) | swap16(data >> 16);
+[[nodiscard]] inline u32 swap32(u32 data) noexcept {
+    return ((data & 0xFF000000U) >> 24) | ((data & 0x00FF0000U) >> 8) |
+           ((data & 0x0000FF00U) << 8) | ((data & 0x000000FFU) << 24);
 }
-inline u64 swap64(u64 data) {
-    return ((u64)swap32(data) << 32) | swap32(data >> 32);
+[[nodiscard]] inline u64 swap64(u64 data) noexcept {
+    return ((data & 0xFF00000000000000ULL) >> 56) | ((data & 0x00FF000000000000ULL) >> 40) |
+           ((data & 0x0000FF0000000000ULL) >> 24) | ((data & 0x000000FF00000000ULL) >> 8) |
+           ((data & 0x00000000FF000000ULL) << 8) | ((data & 0x0000000000FF0000ULL) << 24) |
+           ((data & 0x000000000000FF00ULL) << 40) | ((data & 0x00000000000000FFULL) << 56);
 }
 #endif

-inline float swapf(float f) {
+[[nodiscard]] inline float swapf(float f) noexcept {
    static_assert(sizeof(u32) == sizeof(float), "float must be the same size as uint32_t.");

    u32 value;
@@ -153,7 +111,7 @@ inline float swapf(float f) {
    return f;
 }

-inline double swapd(double f) {
+[[nodiscard]] inline double swapd(double f) noexcept {
    static_assert(sizeof(u64) == sizeof(double), "double must be the same size as uint64_t.");

    u64 value;
--- a/src/common/zstd_compression.cpp
+++ b/src/common/zstd_compression.cpp
@@ -0,0 +1,53 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <zstd.h>
+
+#include "common/assert.h"
+#include "common/zstd_compression.h"
+
+namespace Common::Compression {
+
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
+    compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
+
+    const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
+    std::vector<u8> compressed(max_compressed_size);
+
+    const std::size_t compressed_size =
+        ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
+
+    if (ZSTD_isError(compressed_size)) {
+        // Compression failed
+        return {};
+    }
+
+    compressed.resize(compressed_size);
+
+    return compressed;
+}
+
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
+    return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
+}
+
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
+    const std::size_t decompressed_size =
+        ZSTD_getDecompressedSize(compressed.data(), compressed.size());
+    std::vector<u8> decompressed(decompressed_size);
+
+    const std::size_t uncompressed_result_size = ZSTD_decompress(
+        decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
+
+    if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
+        // Decompression failed
+        return {};
+    }
+    return decompressed;
+}
+
+} // namespace Common::Compression
--- a/src/common/zstd_compression.h
+++ b/src/common/zstd_compression.h
@@ -0,0 +1,42 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Common::Compression {
+
+/**
+ * Compresses a source memory region with Zstandard and returns the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ * @param compression_level the used compression level. Should be between 1 and 22.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
+
+/**
+ * Compresses a source memory region with Zstandard with the default compression level and returns
+ * the compressed data in a vector.
+ *
+ * @param source the uncompressed source memory region.
+ * @param source_size the size in bytes of the uncompressed source memory region.
+ *
+ * @return the compressed data.
+ */
+std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
+
+/**
+ * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
+ *
+ * @param compressed the compressed source memory region.
+ *
+ * @return the decompressed data.
+ */
+std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
+
+} // namespace Common::Compression
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -14,6 +14,7 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/vm_manager.h"
@@ -99,7 +100,7 @@ public:
    }

    void CallSVC(u32 swi) override {
-        Kernel::CallSVC(swi);
+        Kernel::CallSVC(parent.system, swi);
    }

    void AddTicks(u64 ticks) override {
@@ -112,14 +113,14 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        parent.core_timing.AddTicks(amortized_ticks);
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(parent.core_timing.GetDowncount(), 0);
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
+        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
    }

    ARM_Dynarmic& parent;
@@ -129,7 +130,7 @@ public:
 };

 std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
-    auto* current_process = Core::CurrentProcess();
+    auto* current_process = system.Kernel().CurrentProcess();
    auto** const page_table = current_process->VMManager().page_table.pointers.data();

    Dynarmic::A64::UserConfig config;
@@ -163,7 +164,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)

 void ARM_Dynarmic::Run() {
    MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
-    ASSERT(Memory::GetCurrentPageTable() == current_page_table);

    jit->Run();
 }
@@ -172,10 +172,10 @@ void ARM_Dynarmic::Step() {
    cb->InterpreterFallback(jit->GetPC(), 1);
 }

-ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
                           std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
-      core_index{core_index}, core_timing{core_timing},
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
+      core_index{core_index}, system{system},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
    ThreadContext ctx{};
    inner_unicorn.SaveContext(ctx);
@@ -278,7 +278,6 @@ void ARM_Dynarmic::ClearExclusiveState() {

 void ARM_Dynarmic::PageTableChanged() {
    jit = MakeJit();
-    current_page_table = Memory::GetCurrentPageTable();
 }

 DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,23 +12,15 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"

-namespace Common {
-struct PageTable;
-}
-
-namespace Core::Timing {
-class CoreTiming;
-}
-
 namespace Core {

 class ARM_Dynarmic_Callbacks;
 class DynarmicExclusiveMonitor;
+class System;

 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
-                 std::size_t core_index);
+    ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
    ~ARM_Dynarmic() override;

    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -67,10 +59,8 @@ private:
    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
-    Timing::CoreTiming& core_timing;
+    System& system;
    DynarmicExclusiveMonitor& exclusive_monitor;
-
-    Common::PageTable* current_page_table = nullptr;
 };

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -10,7 +10,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/svc.h"
-#include "core/memory.h"

 namespace Core {

@@ -49,20 +48,6 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
    }
 }

-static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
-    u32 esr{};
-    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
-
-    auto ec = esr >> 26;
-    auto iss = esr & 0xFFFFFF;
-
-    switch (ec) {
-    case 0x15: // SVC
-        Kernel::CallSVC(iss);
-        break;
-    }
-}
-
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                               void* user_data) {
    ARM_Interface::ThreadContext ctx{};
@@ -72,7 +57,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return {};
 }

-ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
+ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
@@ -177,7 +162,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
+        ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
    }
 }

@@ -190,14 +175,15 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    core_timing.AddTicks(num_instructions);
+    system.CoreTiming().AddTicks(num_instructions);
    if (GDBStub::IsServerEnabled()) {
-        if (last_bkpt_hit) {
+        if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
        }
+
        Kernel::Thread* thread = Kernel::GetCurrentThread();
        SaveContext(thread->GetContext());
-        if (last_bkpt_hit || GDBStub::GetCpuStepFlag()) {
+        if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
            last_bkpt_hit = false;
            GDBStub::Break();
            GDBStub::SendTrap(thread, 5);
@@ -272,4 +258,20 @@ void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
    last_bkpt_hit = true;
 }

+void ARM_Unicorn::InterruptHook(uc_engine* uc, u32 int_no, void* user_data) {
+    u32 esr{};
+    CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
+
+    const auto ec = esr >> 26;
+    const auto iss = esr & 0xFFFFFF;
+
+    auto* const arm_instance = static_cast<ARM_Unicorn*>(user_data);
+
+    switch (ec) {
+    case 0x15: // SVC
+        Kernel::CallSVC(arm_instance->system, iss);
+        break;
+    }
+}
+
 } // namespace Core
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,15 +9,13 @@
 #include "core/arm/arm_interface.h"
 #include "core/gdbstub/gdbstub.h"

-namespace Core::Timing {
-class CoreTiming;
-}
-
 namespace Core {

+class System;
+
 class ARM_Unicorn final : public ARM_Interface {
 public:
-    explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
+    explicit ARM_Unicorn(System& system);
    ~ARM_Unicorn() override;

    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -47,10 +45,12 @@ public:
    void RecordBreak(GDBStub::BreakpointAddress bkpt);

 private:
+    static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
+
    uc_engine* uc{};
-    Timing::CoreTiming& core_timing;
+    System& system;
    GDBStub::BreakpointAddress last_bkpt{};
-    bool last_bkpt_hit;
+    bool last_bkpt_hit = false;
 };

 } // namespace Core
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -17,6 +17,7 @@
 #include "core/core_timing.h"
 #include "core/cpu_core_manager.h"
 #include "core/file_sys/mode.h"
+#include "core/file_sys/registered_cache.h"
 #include "core/file_sys/vfs_concat.h"
 #include "core/file_sys/vfs_real.h"
 #include "core/gdbstub/gdbstub.h"
@@ -108,6 +109,8 @@ struct System::Impl {
        // Create a default fs if one doesn't already exist.
        if (virtual_filesystem == nullptr)
            virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
+        if (content_provider == nullptr)
+            content_provider = std::make_unique<FileSys::ContentProviderUnion>();

        /// Create default implementations of applets if one is not provided.
        if (profile_selector == nullptr)
@@ -249,6 +252,8 @@ struct System::Impl {
    Kernel::KernelCore kernel;
    /// RealVfsFilesystem instance
    FileSys::VirtualFilesystem virtual_filesystem;
+    /// ContentProviderUnion instance
+    std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
    std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -488,6 +493,27 @@ const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
    return *impl->software_keyboard;
 }

+void System::SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider) {
+    impl->content_provider = std::move(provider);
+}
+
+FileSys::ContentProvider& System::GetContentProvider() {
+    return *impl->content_provider;
+}
+
+const FileSys::ContentProvider& System::GetContentProvider() const {
+    return *impl->content_provider;
+}
+
+void System::RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
+                                     FileSys::ContentProvider* provider) {
+    impl->content_provider->SetSlot(slot, provider);
+}
+
+void System::ClearContentProvider(FileSys::ContentProviderUnionSlot slot) {
+    impl->content_provider->ClearSlot(slot);
+}
+
 void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) {
    impl->web_browser = std::move(applet);
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -21,6 +21,9 @@ class WebBrowserApplet;

 namespace FileSys {
 class CheatList;
+class ContentProvider;
+class ContentProviderUnion;
+enum class ContentProviderUnionSlot;
 class VfsFilesystem;
 } // namespace FileSys

@@ -270,6 +273,17 @@ public:
    Frontend::WebBrowserApplet& GetWebBrowser();
    const Frontend::WebBrowserApplet& GetWebBrowser() const;

+    void SetContentProvider(std::unique_ptr<FileSys::ContentProviderUnion> provider);
+
+    FileSys::ContentProvider& GetContentProvider();
+
+    const FileSys::ContentProvider& GetContentProvider() const;
+
+    void RegisterContentProvider(FileSys::ContentProviderUnionSlot slot,
+                                 FileSys::ContentProvider* provider);
+
+    void ClearContentProvider(FileSys::ContentProviderUnionSlot slot);
+
 private:
    System();

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -55,13 +55,13 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
 #else
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
    } else {
-        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
+        arm_interface = std::make_unique<ARM_Unicorn>(system);
    }

    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -22,6 +22,7 @@
 #include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/crypto/aes_util.h"
 #include "core/crypto/key_manager.h"
 #include "core/crypto/partition_data_manager.h"
@@ -794,7 +795,7 @@ void KeyManager::DeriveBase() {

 void KeyManager::DeriveETicket(PartitionDataManager& data) {
    // ETicket keys
-    const auto es = Service::FileSystem::GetUnionContents().GetEntry(
+    const auto es = Core::System::GetInstance().GetContentProvider().GetEntry(
        0x0100000000000033, FileSys::ContentRecordType::Program);

    if (es == nullptr)
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -10,6 +10,7 @@
 #include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/ips_layer.h"
@@ -69,7 +70,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
        }
    }

-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();

    const auto& disabled = Settings::values.disabled_addons[title_id];
    const auto update_disabled =
@@ -155,7 +156,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
    return out;
 }

-std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
+std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso, const std::string& name) const {
    if (nso.size() < sizeof(Loader::NSOHeader)) {
        return nso;
    }
@@ -171,18 +172,19 @@ std::vector<u8> PatchManager::PatchNSO(const std::vector<u8>& nso) const {
    const auto build_id = build_id_raw.substr(0, build_id_raw.find_last_not_of('0') + 1);

    if (Settings::values.dump_nso) {
-        LOG_INFO(Loader, "Dumping NSO for build_id={}, title_id={:016X}", build_id, title_id);
+        LOG_INFO(Loader, "Dumping NSO for name={}, build_id={}, title_id={:016X}", name, build_id,
+                 title_id);
        const auto dump_dir = Service::FileSystem::GetModificationDumpRoot(title_id);
        if (dump_dir != nullptr) {
            const auto nso_dir = GetOrCreateDirectoryRelative(dump_dir, "/nso");
-            const auto file = nso_dir->CreateFile(fmt::format("{}.nso", build_id));
+            const auto file = nso_dir->CreateFile(fmt::format("{}-{}.nso", name, build_id));

            file->Resize(nso.size());
            file->WriteBytes(nso);
        }
    }

-    LOG_INFO(Loader, "Patching NSO for build_id={}", build_id);
+    LOG_INFO(Loader, "Patching NSO for name={}, build_id={}", name, build_id);

    const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
    auto patch_dirs = load_dir->GetSubdirectories();
@@ -345,7 +347,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
    if (romfs == nullptr)
        return romfs;

-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();

    // Game Updates
    const auto update_tid = GetUpdateTitleID(title_id);
@@ -392,7 +394,7 @@ static bool IsDirValidAndNonEmpty(const VirtualDir& dir) {
 std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNames(
    VirtualFile update_raw) const {
    std::map<std::string, std::string, std::less<>> out;
-    const auto installed = Service::FileSystem::GetUnionContents();
+    const auto& installed = Core::System::GetInstance().GetContentProvider();
    const auto& disabled = Settings::values.disabled_addons[title_id];

    // Game Updates
@@ -466,10 +468,10 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam

    // DLC
    const auto dlc_entries = installed.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data);
-    std::vector<RegisteredCacheEntry> dlc_match;
+    std::vector<ContentProviderEntry> dlc_match;
    dlc_match.reserve(dlc_entries.size());
    std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match),
-                 [this, &installed](const RegisteredCacheEntry& entry) {
+                 [this, &installed](const ContentProviderEntry& entry) {
                     return (entry.title_id & DLC_BASE_TITLE_ID_MASK) == title_id &&
                            installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success;
                 });
@@ -492,7 +494,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
 }

 std::pair<std::unique_ptr<NACP>, VirtualFile> PatchManager::GetControlMetadata() const {
-    const auto installed{Service::FileSystem::GetUnionContents()};
+    const auto& installed = Core::System::GetInstance().GetContentProvider();

    const auto base_control_nca = installed.GetEntry(title_id, ContentRecordType::Control);
    if (base_control_nca == nullptr)
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -44,7 +44,7 @@ public:
    // Currently tracked NSO patches:
    // - IPS
    // - IPSwitch
-    std::vector<u8> PatchNSO(const std::vector<u8>& nso) const;
+    std::vector<u8> PatchNSO(const std::vector<u8>& nso, const std::string& name) const;

    // Checks to see if PatchNSO() will have any effect given the NSO's build ID.
    // Used to prevent expensive copies in NSO loader.
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -23,19 +23,19 @@ namespace FileSys {
 // The size of blocks to use when vfs raw copying into nand.
 constexpr size_t VFS_RC_LARGE_COPY_BLOCK = 0x400000;

-std::string RegisteredCacheEntry::DebugInfo() const {
+std::string ContentProviderEntry::DebugInfo() const {
    return fmt::format("title_id={:016X}, content_type={:02X}", title_id, static_cast<u8>(type));
 }

-bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
    return (lhs.title_id < rhs.title_id) || (lhs.title_id == rhs.title_id && lhs.type < rhs.type);
 }

-bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
    return std::tie(lhs.title_id, lhs.type) == std::tie(rhs.title_id, rhs.type);
 }

-bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs) {
+bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs) {
    return !operator==(lhs, rhs);
 }

@@ -84,7 +84,7 @@ static std::string GetCNMTName(TitleType type, u64 title_id) {
    return fmt::format("{}_{:016x}.cnmt", TITLE_TYPE_NAMES[index], title_id);
 }

-static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
+ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
    switch (type) {
    case NCAContentType::Program:
        // TODO(DarkLordZach): Differentiate between Program and Patch
@@ -104,6 +104,28 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
    }
 }

+ContentProvider::~ContentProvider() = default;
+
+bool ContentProvider::HasEntry(ContentProviderEntry entry) const {
+    return HasEntry(entry.title_id, entry.type);
+}
+
+VirtualFile ContentProvider::GetEntryUnparsed(ContentProviderEntry entry) const {
+    return GetEntryUnparsed(entry.title_id, entry.type);
+}
+
+VirtualFile ContentProvider::GetEntryRaw(ContentProviderEntry entry) const {
+    return GetEntryRaw(entry.title_id, entry.type);
+}
+
+std::unique_ptr<NCA> ContentProvider::GetEntry(ContentProviderEntry entry) const {
+    return GetEntry(entry.title_id, entry.type);
+}
+
+std::vector<ContentProviderEntry> ContentProvider::ListEntries() const {
+    return ListEntriesFilter(std::nullopt, std::nullopt, std::nullopt);
+}
+
 VirtualFile RegisteredCache::OpenFileOrDirectoryConcat(const VirtualDir& dir,
                                                       std::string_view path) const {
    const auto file = dir->GetFileRelative(path);
@@ -161,8 +183,8 @@ VirtualFile RegisteredCache::GetFileAtID(NcaID id) const {
    return file;
 }

-static std::optional<NcaID> CheckMapForContentRecord(
-    const boost::container::flat_map<u64, CNMT>& map, u64 title_id, ContentRecordType type) {
+static std::optional<NcaID> CheckMapForContentRecord(const std::map<u64, CNMT>& map, u64 title_id,
+                                                     ContentRecordType type) {
    if (map.find(title_id) == map.end())
        return {};

@@ -268,7 +290,7 @@ void RegisteredCache::Refresh() {
    AccumulateYuzuMeta();
 }

-RegisteredCache::RegisteredCache(VirtualDir dir_, RegisteredCacheParsingFunction parsing_function)
+RegisteredCache::RegisteredCache(VirtualDir dir_, ContentProviderParsingFunction parsing_function)
    : dir(std::move(dir_)), parser(std::move(parsing_function)) {
    Refresh();
 }
@@ -279,19 +301,11 @@ bool RegisteredCache::HasEntry(u64 title_id, ContentRecordType type) const {
    return GetEntryRaw(title_id, type) != nullptr;
 }

-bool RegisteredCache::HasEntry(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry) != nullptr;
-}
-
 VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
    const auto id = GetNcaIDFromMetadata(title_id, type);
    return id ? GetFileAtID(*id) : nullptr;
 }

-VirtualFile RegisteredCache::GetEntryUnparsed(RegisteredCacheEntry entry) const {
-    return GetEntryUnparsed(entry.title_id, entry.type);
-}
-
 std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
    const auto meta_iter = meta.find(title_id);
    if (meta_iter != meta.end())
@@ -309,10 +323,6 @@ VirtualFile RegisteredCache::GetEntryRaw(u64 title_id, ContentRecordType type) c
    return id ? parser(GetFileAtID(*id), *id) : nullptr;
 }

-VirtualFile RegisteredCache::GetEntryRaw(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry.title_id, entry.type);
-}
-
 std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType type) const {
    const auto raw = GetEntryRaw(title_id, type);
    if (raw == nullptr)
@@ -320,10 +330,6 @@ std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType t
    return std::make_unique<NCA>(raw, nullptr, 0, keys);
 }

-std::unique_ptr<NCA> RegisteredCache::GetEntry(RegisteredCacheEntry entry) const {
-    return GetEntry(entry.title_id, entry.type);
-}
-
 template <typename T>
 void RegisteredCache::IterateAllMetadata(
    std::vector<T>& out, std::function<T(const CNMT&, const ContentRecord&)> proc,
@@ -348,25 +354,14 @@ void RegisteredCache::IterateAllMetadata(
    }
 }

-std::vector<RegisteredCacheEntry> RegisteredCache::ListEntries() const {
-    std::vector<RegisteredCacheEntry> out;
-    IterateAllMetadata<RegisteredCacheEntry>(
-        out,
-        [](const CNMT& c, const ContentRecord& r) {
-            return RegisteredCacheEntry{c.GetTitleID(), r.type};
-        },
-        [](const CNMT& c, const ContentRecord& r) { return true; });
-    return out;
-}
-
-std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
+std::vector<ContentProviderEntry> RegisteredCache::ListEntriesFilter(
    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
    std::optional<u64> title_id) const {
-    std::vector<RegisteredCacheEntry> out;
-    IterateAllMetadata<RegisteredCacheEntry>(
+    std::vector<ContentProviderEntry> out;
+    IterateAllMetadata<ContentProviderEntry>(
        out,
        [](const CNMT& c, const ContentRecord& r) {
-            return RegisteredCacheEntry{c.GetTitleID(), r.type};
+            return ContentProviderEntry{c.GetTitleID(), r.type};
        },
        [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
            if (title_type && *title_type != c.GetType())
@@ -521,37 +516,56 @@ bool RegisteredCache::RawInstallYuzuMeta(const CNMT& cnmt) {
                        }) != yuzu_meta.end();
 }

-RegisteredCacheUnion::RegisteredCacheUnion(std::vector<RegisteredCache*> caches)
-    : caches(std::move(caches)) {}
+ContentProviderUnion::~ContentProviderUnion() = default;

-void RegisteredCacheUnion::Refresh() {
-    for (const auto& c : caches)
-        c->Refresh();
+void ContentProviderUnion::SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider) {
+    providers[slot] = provider;
 }

-bool RegisteredCacheUnion::HasEntry(u64 title_id, ContentRecordType type) const {
-    return std::any_of(caches.begin(), caches.end(), [title_id, type](const auto& cache) {
-        return cache->HasEntry(title_id, type);
-    });
+void ContentProviderUnion::ClearSlot(ContentProviderUnionSlot slot) {
+    providers[slot] = nullptr;
 }

-bool RegisteredCacheUnion::HasEntry(RegisteredCacheEntry entry) const {
-    return HasEntry(entry.title_id, entry.type);
+void ContentProviderUnion::Refresh() {
+    for (auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        provider.second->Refresh();
+    }
 }

-std::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryVersion(title_id);
-        if (res)
+bool ContentProviderUnion::HasEntry(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        if (provider.second->HasEntry(title_id, type))
+            return true;
+    }
+
+    return false;
+}
+
+std::optional<u32> ContentProviderUnion::GetEntryVersion(u64 title_id) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        const auto res = provider.second->GetEntryVersion(title_id);
+        if (res != std::nullopt)
            return res;
    }

-    return {};
+    return std::nullopt;
 }

-VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryUnparsed(title_id, type);
+VirtualFile ContentProviderUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        const auto res = provider.second->GetEntryUnparsed(title_id, type);
        if (res != nullptr)
            return res;
    }
@@ -559,13 +573,12 @@ VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordTy
    return nullptr;
 }

-VirtualFile RegisteredCacheUnion::GetEntryUnparsed(RegisteredCacheEntry entry) const {
-    return GetEntryUnparsed(entry.title_id, entry.type);
-}
+VirtualFile ContentProviderUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;

-VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType type) const {
-    for (const auto& c : caches) {
-        const auto res = c->GetEntryRaw(title_id, type);
+        const auto res = provider.second->GetEntryRaw(title_id, type);
        if (res != nullptr)
            return res;
    }
@@ -573,30 +586,30 @@ VirtualFile RegisteredCacheUnion::GetEntryRaw(u64 title_id, ContentRecordType ty
    return nullptr;
 }

-VirtualFile RegisteredCacheUnion::GetEntryRaw(RegisteredCacheEntry entry) const {
-    return GetEntryRaw(entry.title_id, entry.type);
+std::unique_ptr<NCA> ContentProviderUnion::GetEntry(u64 title_id, ContentRecordType type) const {
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        auto res = provider.second->GetEntry(title_id, type);
+        if (res != nullptr)
+            return res;
+    }
+
+    return nullptr;
 }

-std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(u64 title_id, ContentRecordType type) const {
-    const auto raw = GetEntryRaw(title_id, type);
-    if (raw == nullptr)
-        return nullptr;
-    return std::make_unique<NCA>(raw);
-}
+std::vector<ContentProviderEntry> ContentProviderUnion::ListEntriesFilter(
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
+    std::vector<ContentProviderEntry> out;

-std::unique_ptr<NCA> RegisteredCacheUnion::GetEntry(RegisteredCacheEntry entry) const {
-    return GetEntry(entry.title_id, entry.type);
-}
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;

-std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
-    std::vector<RegisteredCacheEntry> out;
-    for (const auto& c : caches) {
-        c->IterateAllMetadata<RegisteredCacheEntry>(
-            out,
-            [](const CNMT& c, const ContentRecord& r) {
-                return RegisteredCacheEntry{c.GetTitleID(), r.type};
-            },
-            [](const CNMT& c, const ContentRecord& r) { return true; });
+        const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
+        std::copy(vec.begin(), vec.end(), std::back_inserter(out));
    }

    std::sort(out.begin(), out.end());
@@ -604,25 +617,87 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
    return out;
 }

-std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
+std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>>
+ContentProviderUnion::ListEntriesFilterOrigin(std::optional<ContentProviderUnionSlot> origin,
+                                              std::optional<TitleType> title_type,
+                                              std::optional<ContentRecordType> record_type,
+                                              std::optional<u64> title_id) const {
+    std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> out;
+
+    for (const auto& provider : providers) {
+        if (provider.second == nullptr)
+            continue;
+
+        if (origin.has_value() && *origin != provider.first)
+            continue;
+
+        const auto vec = provider.second->ListEntriesFilter(title_type, record_type, title_id);
+        std::transform(vec.begin(), vec.end(), std::back_inserter(out),
+                       [&provider](const ContentProviderEntry& entry) {
+                           return std::make_pair(provider.first, entry);
+                       });
+    }
+
+    std::sort(out.begin(), out.end());
+    out.erase(std::unique(out.begin(), out.end()), out.end());
+    return out;
+}
+
+ManualContentProvider::~ManualContentProvider() = default;
+
+void ManualContentProvider::AddEntry(TitleType title_type, ContentRecordType content_type,
+                                     u64 title_id, VirtualFile file) {
+    entries.insert_or_assign({title_type, content_type, title_id}, file);
+}
+
+void ManualContentProvider::ClearAllEntries() {
+    entries.clear();
+}
+
+void ManualContentProvider::Refresh() {}
+
+bool ManualContentProvider::HasEntry(u64 title_id, ContentRecordType type) const {
+    return GetEntryRaw(title_id, type) != nullptr;
+}
+
+std::optional<u32> ManualContentProvider::GetEntryVersion(u64 title_id) const {
+    return std::nullopt;
+}
+
+VirtualFile ManualContentProvider::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
+    return GetEntryRaw(title_id, type);
+}
+
+VirtualFile ManualContentProvider::GetEntryRaw(u64 title_id, ContentRecordType type) const {
+    const auto iter =
+        std::find_if(entries.begin(), entries.end(), [title_id, type](const auto& entry) {
+            const auto [title_type, content_type, e_title_id] = entry.first;
+            return content_type == type && e_title_id == title_id;
+        });
+    if (iter == entries.end())
+        return nullptr;
+    return iter->second;
+}
+
+std::unique_ptr<NCA> ManualContentProvider::GetEntry(u64 title_id, ContentRecordType type) const {
+    const auto res = GetEntryRaw(title_id, type);
+    if (res == nullptr)
+        return nullptr;
+    return std::make_unique<NCA>(res, nullptr, 0, keys);
+}
+
+std::vector<ContentProviderEntry> ManualContentProvider::ListEntriesFilter(
    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
    std::optional<u64> title_id) const {
-    std::vector<RegisteredCacheEntry> out;
-    for (const auto& c : caches) {
-        c->IterateAllMetadata<RegisteredCacheEntry>(
-            out,
-            [](const CNMT& c, const ContentRecord& r) {
-                return RegisteredCacheEntry{c.GetTitleID(), r.type};
-            },
-            [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-                if (title_type && *title_type != c.GetType())
-                    return false;
-                if (record_type && *record_type != r.type)
-                    return false;
-                if (title_id && *title_id != c.GetTitleID())
-                    return false;
-                return true;
-            });
+    std::vector<ContentProviderEntry> out;
+
+    for (const auto& entry : entries) {
+        const auto [e_title_type, e_content_type, e_title_id] = entry.first;
+        if ((title_type == std::nullopt || e_title_type == *title_type) &&
+            (record_type == std::nullopt || e_content_type == *record_type) &&
+            (title_id == std::nullopt || e_title_id == *title_id)) {
+            out.emplace_back(ContentProviderEntry{e_title_id, e_content_type});
+        }
    }

    std::sort(out.begin(), out.end());
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -21,12 +21,13 @@ class NSP;
 class XCI;

 enum class ContentRecordType : u8;
+enum class NCAContentType : u8;
 enum class TitleType : u8;

 struct ContentRecord;

 using NcaID = std::array<u8, 0x10>;
-using RegisteredCacheParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
+using ContentProviderParsingFunction = std::function<VirtualFile(const VirtualFile&, const NcaID&)>;
 using VfsCopyFunction = std::function<bool(const VirtualFile&, const VirtualFile&, size_t)>;

 enum class InstallResult {
@@ -36,7 +37,7 @@ enum class InstallResult {
    ErrorMetaFailed,
 };

-struct RegisteredCacheEntry {
+struct ContentProviderEntry {
    u64 title_id;
    ContentRecordType type;

@@ -47,12 +48,46 @@ constexpr u64 GetUpdateTitleID(u64 base_title_id) {
    return base_title_id | 0x800;
 }

+ContentRecordType GetCRTypeFromNCAType(NCAContentType type);
+
 // boost flat_map requires operator< for O(log(n)) lookups.
-bool operator<(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
+bool operator<(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);

 // std unique requires operator== to identify duplicates.
-bool operator==(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
-bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs);
+bool operator==(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
+bool operator!=(const ContentProviderEntry& lhs, const ContentProviderEntry& rhs);
+
+class ContentProvider {
+public:
+    virtual ~ContentProvider();
+
+    virtual void Refresh() = 0;
+
+    virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0;
+    virtual bool HasEntry(ContentProviderEntry entry) const;
+
+    virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0;
+
+    virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0;
+    virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
+
+    virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0;
+    virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
+
+    virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0;
+    virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
+
+    virtual std::vector<ContentProviderEntry> ListEntries() const;
+
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
+    virtual std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const = 0;
+
+protected:
+    // A single instance of KeyManager to be used by GetEntry()
+    Core::Crypto::KeyManager keys;
+};

 /*
 * A class that catalogues NCAs in the registered directory structure.
@@ -67,39 +102,32 @@ bool operator!=(const RegisteredCacheEntry& lhs, const RegisteredCacheEntry& rhs
 * (This impl also supports substituting the nca dir for an nca file, as that's more convenient
 * when 4GB splitting can be ignored.)
 */
-class RegisteredCache {
-    friend class RegisteredCacheUnion;
-
+class RegisteredCache : public ContentProvider {
 public:
    // Parsing function defines the conversion from raw file to NCA. If there are other steps
    // besides creating the NCA from the file (e.g. NAX0 on SD Card), that should go in a custom
    // parsing function.
    explicit RegisteredCache(VirtualDir dir,
-                             RegisteredCacheParsingFunction parsing_function =
+                             ContentProviderParsingFunction parsing_function =
                                 [](const VirtualFile& file, const NcaID& id) { return file; });
-    ~RegisteredCache();
+    ~RegisteredCache() override;

-    void Refresh();
+    void Refresh() override;

-    bool HasEntry(u64 title_id, ContentRecordType type) const;
-    bool HasEntry(RegisteredCacheEntry entry) const;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;

-    std::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;

-    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;

-    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;

-    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const;
-    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;

-    std::vector<RegisteredCacheEntry> ListEntries() const;
    // If a parameter is not std::nullopt, it will be filtered for from all entries.
-    std::vector<RegisteredCacheEntry> ListEntriesFilter(
+    std::vector<ContentProviderEntry> ListEntriesFilter(
        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
-        std::optional<u64> title_id = {}) const;
+        std::optional<u64> title_id = {}) const override;

    // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure
    // there is a meta NCA and all of them are accessible.
@@ -131,46 +159,70 @@ private:
    bool RawInstallYuzuMeta(const CNMT& cnmt);

    VirtualDir dir;
-    RegisteredCacheParsingFunction parser;
-    Core::Crypto::KeyManager keys;
+    ContentProviderParsingFunction parser;

    // maps tid -> NcaID of meta
-    boost::container::flat_map<u64, NcaID> meta_id;
+    std::map<u64, NcaID> meta_id;
    // maps tid -> meta
-    boost::container::flat_map<u64, CNMT> meta;
+    std::map<u64, CNMT> meta;
    // maps tid -> meta for CNMT in yuzu_meta
-    boost::container::flat_map<u64, CNMT> yuzu_meta;
+    std::map<u64, CNMT> yuzu_meta;
 };

-// Combines multiple RegisteredCaches (i.e. SysNAND, UserNAND, SDMC) into one interface.
-class RegisteredCacheUnion {
+enum class ContentProviderUnionSlot {
+    SysNAND,        ///< System NAND
+    UserNAND,       ///< User NAND
+    SDMC,           ///< SD Card
+    FrontendManual, ///< Frontend-defined game list or similar
+};
+
+// Combines multiple ContentProvider(s) (i.e. SysNAND, UserNAND, SDMC) into one interface.
+class ContentProviderUnion : public ContentProvider {
 public:
-    explicit RegisteredCacheUnion(std::vector<RegisteredCache*> caches);
+    ~ContentProviderUnion() override;

-    void Refresh();
+    void SetSlot(ContentProviderUnionSlot slot, ContentProvider* provider);
+    void ClearSlot(ContentProviderUnionSlot slot);

-    bool HasEntry(u64 title_id, ContentRecordType type) const;
-    bool HasEntry(RegisteredCacheEntry entry) const;
+    void Refresh() override;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
+    std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+        std::optional<u64> title_id) const override;

-    std::optional<u32> GetEntryVersion(u64 title_id) const;
-
-    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
-
-    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetEntryRaw(RegisteredCacheEntry entry) const;
-
-    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const;
-    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;
-
-    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not std::nullopt, it will be filtered for from all entries.
-    std::vector<RegisteredCacheEntry> ListEntriesFilter(
+    std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> ListEntriesFilterOrigin(
+        std::optional<ContentProviderUnionSlot> origin = {},
        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
        std::optional<u64> title_id = {}) const;

 private:
-    std::vector<RegisteredCache*> caches;
+    std::map<ContentProviderUnionSlot, ContentProvider*> providers;
+};
+
+class ManualContentProvider : public ContentProvider {
+public:
+    ~ManualContentProvider() override;
+
+    void AddEntry(TitleType title_type, ContentRecordType content_type, u64 title_id,
+                  VirtualFile file);
+    void ClearAllEntries();
+
+    void Refresh() override;
+    bool HasEntry(u64 title_id, ContentRecordType type) const override;
+    std::optional<u32> GetEntryVersion(u64 title_id) const override;
+    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const override;
+    VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const override;
+    std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const override;
+    std::vector<ContentProviderEntry> ListEntriesFilter(
+        std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+        std::optional<u64> title_id) const override;
+
+private:
+    std::map<std::tuple<TitleType, ContentRecordType, u64>, VirtualFile> entries;
 };

 } // namespace FileSys
--- a/src/core/file_sys/romfs_factory.cpp
+++ b/src/core/file_sys/romfs_factory.cpp
@@ -48,7 +48,7 @@ ResultVal<VirtualFile> RomFSFactory::Open(u64 title_id, StorageId storage, Conte

    switch (storage) {
    case StorageId::None:
-        res = Service::FileSystem::GetUnionContents().GetEntry(title_id, type);
+        res = Core::System::GetInstance().GetContentProvider().GetEntry(title_id, type);
        break;
    case StorageId::NandSystem:
        res = Service::FileSystem::GetSystemNANDContents()->GetEntry(title_id, type);
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -143,11 +143,12 @@ std::multimap<u64, std::shared_ptr<NCA>> NSP::GetNCAsByTitleID() const {
    return out;
 }

-std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> NSP::GetNCAs() const {
+std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>>
+NSP::GetNCAs() const {
    return ncas;
 }

-std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const {
+std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type, TitleType title_type) const {
    if (extracted)
        LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");

@@ -155,14 +156,14 @@ std::shared_ptr<NCA> NSP::GetNCA(u64 title_id, ContentRecordType type) const {
    if (title_id_iter == ncas.end())
        return nullptr;

-    const auto type_iter = title_id_iter->second.find(type);
+    const auto type_iter = title_id_iter->second.find({title_type, type});
    if (type_iter == title_id_iter->second.end())
        return nullptr;

    return type_iter->second;
 }

-VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type) const {
+VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type, TitleType title_type) const {
    if (extracted)
        LOG_WARNING(Service_FS, "called on an NSP that is of type extracted.");
    const auto nca = GetNCA(title_id, type);
@@ -240,7 +241,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
            const CNMT cnmt(inner_file);
            auto& ncas_title = ncas[cnmt.GetTitleID()];

-            ncas_title[ContentRecordType::Meta] = nca;
+            ncas_title[{cnmt.GetType(), ContentRecordType::Meta}] = nca;
            for (const auto& rec : cnmt.GetContentRecords()) {
                const auto id_string = Common::HexArrayToString(rec.nca_id, false);
                const auto next_file = pfs->GetFile(fmt::format("{}.nca", id_string));
@@ -258,7 +259,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {
                if (next_nca->GetStatus() == Loader::ResultStatus::Success ||
                    (next_nca->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS &&
                     (cnmt.GetTitleID() & 0x800) != 0)) {
-                    ncas_title[rec.type] = std::move(next_nca);
+                    ncas_title[{cnmt.GetType(), rec.type}] = std::move(next_nca);
                }
            }

--- a/src/core/file_sys/submission_package.h
+++ b/src/core/file_sys/submission_package.h
@@ -42,9 +42,12 @@ public:
    // Type 0 Only (Collection of NCAs + Certificate + Ticket + Meta XML)
    std::vector<std::shared_ptr<NCA>> GetNCAsCollapsed() const;
    std::multimap<u64, std::shared_ptr<NCA>> GetNCAsByTitleID() const;
-    std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> GetNCAs() const;
-    std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type) const;
-    VirtualFile GetNCAFile(u64 title_id, ContentRecordType type) const;
+    std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> GetNCAs()
+        const;
+    std::shared_ptr<NCA> GetNCA(u64 title_id, ContentRecordType type,
+                                TitleType title_type = TitleType::Application) const;
+    VirtualFile GetNCAFile(u64 title_id, ContentRecordType type,
+                           TitleType title_type = TitleType::Application) const;
    std::vector<Core::Crypto::Key128> GetTitlekey() const;

    std::vector<VirtualFile> GetFiles() const override;
@@ -67,7 +70,7 @@ private:

    std::shared_ptr<PartitionFilesystem> pfs;
    // Map title id -> {map type -> NCA}
-    std::map<u64, std::map<ContentRecordType, std::shared_ptr<NCA>>> ncas;
+    std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas;
    std::vector<VirtualFile> ticket_files;

    Core::Crypto::KeyManager keys;
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -12,6 +12,23 @@

 namespace Core::Frontend {

+/**
+ * Represents a graphics context that can be used for background computation or drawing. If the
+ * graphics backend doesn't require the context, then the implementation of these methods can be
+ * stubs
+ */
+class GraphicsContext {
+public:
+    /// Makes the graphics context current for the caller thread
+    virtual void MakeCurrent() = 0;
+
+    /// Releases (dunno if this is the "right" word) the context from the caller thread
+    virtual void DoneCurrent() = 0;
+
+    /// Swap buffers to display the next frame
+    virtual void SwapBuffers() = 0;
+};
+
 /**
 * Abstraction class used to provide an interface between emulation code and the frontend
 * (e.g. SDL, QGLWidget, GLFW, etc...).
@@ -30,7 +47,7 @@ namespace Core::Frontend {
 * - DO NOT TREAT THIS CLASS AS A GUI TOOLKIT ABSTRACTION LAYER. That's not what it is. Please
 *   re-read the upper points again and think about it if you don't see this.
 */
-class EmuWindow {
+class EmuWindow : public GraphicsContext {
 public:
    /// Data structure to store emuwindow configuration
    struct WindowConfig {
@@ -40,17 +57,21 @@ public:
        std::pair<unsigned, unsigned> min_client_area_size;
    };

-    /// Swap buffers to display the next frame
-    virtual void SwapBuffers() = 0;
-
    /// Polls window events
    virtual void PollEvents() = 0;

-    /// Makes the graphics context current for the caller thread
-    virtual void MakeCurrent() = 0;
-
-    /// Releases (dunno if this is the "right" word) the GLFW context from the caller thread
-    virtual void DoneCurrent() = 0;
+    /**
+     * Returns a GraphicsContext that the frontend provides that is shared with the emu window. This
+     * context can be used from other threads for background graphics computation. If the frontend
+     * is using a graphics backend that doesn't need anything specific to run on a different thread,
+     * then it can use a stubbed implemenation for GraphicsContext.
+     *
+     * If the return value is null, then the core should assume that the frontend cannot provide a
+     * Shared Context
+     */
+    virtual std::unique_ptr<GraphicsContext> CreateSharedContext() const {
+        return nullptr;
+    }

    /**
     * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -1030,7 +1030,7 @@ static void Step() {

 /// Tell the CPU if we hit a memory breakpoint.
 bool IsMemoryBreak() {
-    if (IsConnected()) {
+    if (!IsConnected()) {
        return false;
    }

--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -139,10 +139,8 @@ public:
            context->AddDomainObject(std::move(iface));
        } else {
            auto& kernel = Core::System::GetInstance().Kernel();
-            auto sessions =
+            auto [server, client] =
                Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
-            auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
-            auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
            iface->ClientConnected(server);
            context->AddMoveObject(std::move(client));
        }
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <tuple>
-
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/errors.h"
@@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
    active_sessions++;

    // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
-    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
+    auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);

    if (server_port->HasHLEHandler()) {
-        server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
+        server_port->GetHLEHandler()->ClientConnected(server);
    } else {
-        server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
+        server_port->AppendPendingSession(server);
    }

    // Wake the threads waiting on the ServerPort
    server_port->WakeupAllWaitingThreads();

-    return MakeResult(std::get<SharedPtr<ClientSession>>(sessions));
+    return MakeResult(client);
 }

 void ClientPort::ConnectionClosed() {
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -25,7 +25,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::ClientPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientPort;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -29,7 +29,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::ClientSession;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ClientSession;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -21,6 +21,7 @@
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
+#include "core/memory.h"

 namespace Kernel {

@@ -181,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {

 void KernelCore::MakeCurrentProcess(Process* process) {
    impl->current_process = process;
+    Memory::SetCurrentPageTable(&process->VMManager().page_table);
 }

 Process* KernelCore::CurrentProcess() {
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -32,9 +32,6 @@ namespace {
 * @param priority The priority to give the main thread
 */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
-    // Setup page table so we can write to memory
-    Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
-
    // Initialize new "main" thread
    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
    auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
@@ -109,6 +106,8 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
    is_64bit_process = metadata.Is64BitProgram();

    vm_manager.Reset(metadata.GetAddressSpaceType());
+    // Ensure that the potentially resized page table is seen by CPU backends.
+    Memory::SetCurrentPageTable(&vm_manager.page_table);

    const auto& caps = metadata.GetKernelCapabilities();
    const auto capability_init_result =
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -85,7 +85,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::Process;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Process;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -31,7 +31,7 @@ public:
        return reset_type;
    }

-    static const HandleType HANDLE_TYPE = HandleType::ReadableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -41,7 +41,7 @@ public:
        return GetTypeName();
    }

-    static const HandleType HANDLE_TYPE = HandleType::ResourceLimit;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ResourceLimit;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -101,7 +101,6 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        auto* const thread_owner_process = current_thread->GetOwnerProcess();
        if (previous_process != thread_owner_process) {
            system.Kernel().MakeCurrentProcess(thread_owner_process);
-            Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
        }

        cpu_core.LoadContext(new_thread->GetContext());
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

-std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
-    KernelCore& kernel, u32 max_sessions, std::string name) {
-
+ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                                std::string name) {
    SharedPtr<ServerPort> server_port(new ServerPort(kernel));
    SharedPtr<ClientPort> client_port(new ClientPort(kernel));

@@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
    client_port->max_sessions = max_sessions;
    client_port->active_sessions = 0;

-    return std::make_tuple(std::move(server_port), std::move(client_port));
+    return std::make_pair(std::move(server_port), std::move(client_port));
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -6,7 +6,7 @@

 #include <memory>
 #include <string>
-#include <tuple>
+#include <utility>
 #include <vector>
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
@@ -23,6 +23,7 @@ class SessionRequestHandler;
 class ServerPort final : public WaitObject {
 public:
    using HLEHandler = std::shared_ptr<SessionRequestHandler>;
+    using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;

    /**
     * Creates a pair of ServerPort and an associated ClientPort.
@@ -32,8 +33,8 @@ public:
     * @param name Optional name of the ports
     * @return The created port tuple
     */
-    static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair(
-        KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort");
+    static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
+                                   std::string name = "UnknownPort");

    std::string GetTypeName() const override {
        return "ServerPort";
@@ -42,7 +43,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::ServerPort;
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerPort;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -28,11 +28,9 @@ ServerSession::~ServerSession() {
    // the emulated application.

    // Decrease the port's connection count.
-    if (parent->port)
+    if (parent->port) {
        parent->port->ConnectionClosed();
-
-    // TODO(Subv): Wake up all the ClientSession's waiting threads and set
-    // the SendSyncRequest result to 0xC920181A.
+    }

    parent->server = nullptr;
 }
@@ -74,9 +72,6 @@ void ServerSession::ClientDisconnected() {
        handler->ClientDisconnected(this);
    }

-    // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
-    // their WaitSynchronization result to 0xC920181A.
-
    // Clean up the list of client threads with pending requests, they are unneeded now that the
    // client endpoint is closed.
    pending_requesting_threads.clear();
@@ -204,6 +199,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
    client_session->parent = parent;
    server_session->parent = parent;

-    return std::make_tuple(std::move(server_session), std::move(client_session));
+    return std::make_pair(std::move(server_session), std::move(client_session));
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -6,6 +6,7 @@

 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>

 #include "core/hle/kernel/object.h"
@@ -41,7 +42,11 @@ public:
        return "ServerSession";
    }

-    static const HandleType HANDLE_TYPE = HandleType::ServerSession;
+    std::string GetName() const override {
+        return name;
+    }
+
+    static constexpr HandleType HANDLE_TYPE = HandleType::ServerSession;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
@@ -54,7 +59,7 @@ public:
        return parent.get();
    }

-    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
+    using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;

    /**
     * Creates a pair of ServerSession and an associated ClientSession.
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -76,7 +76,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::SharedMemory;
+    static constexpr HandleType HANDLE_TYPE = HandleType::SharedMemory;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -6,8 +6,12 @@

 #include "common/common_types.h"

+namespace Core {
+class System;
+}
+
 namespace Kernel {

-void CallSVC(u32 immediate);
+void CallSVC(Core::System& system, u32 immediate);

 } // namespace Kernel
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -11,278 +11,312 @@

 namespace Kernel {

-static inline u64 Param(int n) {
-    return Core::CurrentArmInterface().GetReg(n);
+static inline u64 Param(const Core::System& system, int n) {
+    return system.CurrentArmInterface().GetReg(n);
 }

 /**
 * HLE a function return from the current ARM userland process
- * @param res Result to return
+ * @param system System context
+ * @param result Result to return
 */
-static inline void FuncReturn(u64 res) {
-    Core::CurrentArmInterface().SetReg(0, res);
+static inline void FuncReturn(Core::System& system, u64 result) {
+    system.CurrentArmInterface().SetReg(0, result);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type ResultCode

-template <ResultCode func(u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0)).raw);
+template <ResultCode func(Core::System&, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0)).raw);
 }

-template <ResultCode func(u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0))).raw);
+template <ResultCode func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
 }

-template <ResultCode func(u32, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
 }

-template <ResultCode func(u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*)>
+void SvcWrap(Core::System& system) {
    u32 param = 0;
-    const u32 retval = func(&param).raw;
-    Core::CurrentArmInterface().SetReg(1, param);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param).raw;
+    system.CurrentArmInterface().SetReg(1, param);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u32*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u32*)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
    u32 param_2 = 0;
-    const u32 retval = func(&param_1, &param_2).raw;
+    const u32 retval = func(system, &param_1, &param_2).raw;

-    auto& arm_interface = Core::CurrentArmInterface();
+    auto& arm_interface = system.CurrentArmInterface();
    arm_interface.SetReg(1, param_1);
    arm_interface.SetReg(2, param_2);

-    FuncReturn(retval);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    const u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u32)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    const u32 retval = func(&param_1, Param(1), static_cast<u32>(Param(2))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u64*, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32)>
+void SvcWrap(Core::System& system) {
    u64 param_1 = 0;
-    const u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u64, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
 }

-template <ResultCode func(u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1))).raw);
+template <ResultCode func(Core::System&, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
 }

-template <ResultCode func(u64*, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64)>
+void SvcWrap(Core::System& system) {
    u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u64*, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u32, u32)>
+void SvcWrap(Core::System& system) {
    u64 param_1 = 0;
-    u32 retval = func(&param_1, static_cast<u32>(Param(1)), static_cast<u32>(Param(2))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw);
+template <ResultCode func(Core::System&, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
 }

-template <ResultCode func(u32, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw);
+template <ResultCode func(Core::System&, u32, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
+                            static_cast<u32>(Param(system, 1)), Param(system, 2))
+                           .raw);
 }

-template <ResultCode func(u32, u32*, u64*)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u32*, u64*)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
    u64 param_2 = 0;
-    ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2);
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    Core::CurrentArmInterface().SetReg(2, param_2);
-    FuncReturn(retval.raw);
+    const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    system.CurrentArmInterface().SetReg(2, param_2);
+    FuncReturn(system, retval.raw);
 }

-template <ResultCode func(u64, u64, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
+                           .raw);
+}
+
+template <ResultCode func(Core::System&, u64, u64, u32, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), Param(system, 3))
+                           .raw);
+}
+
+template <ResultCode func(Core::System&, u32, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)))
+                           .raw);
+}
+
+template <ResultCode func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
+}
+
+template <ResultCode func(Core::System&, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw);
+        system,
+        func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
 }

-template <ResultCode func(u64, u64, u32, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2)), Param(3)).raw);
+template <ResultCode func(Core::System&, u32, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
+                            Param(system, 2), static_cast<u32>(Param(system, 3)))
+                           .raw);
 }

-template <ResultCode func(u32, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw);
-}
-
-template <ResultCode func(u64, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), Param(2)).raw);
-}
-
-template <ResultCode func(u64, u64, u32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), static_cast<u32>(Param(2))).raw);
-}
-
-template <ResultCode func(u32, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
    FuncReturn(
-        func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw);
+        system,
+        func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
 }

-template <ResultCode func(u32, u64, u64)>
-void SvcWrap() {
-    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw);
-}
-
-template <ResultCode func(u32*, u64, u64, s64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    ResultCode retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3)));
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval.raw);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<s64>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u64, u64, u32, s64)>
-void SvcWrap() {
-    FuncReturn(
-        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw);
+template <ResultCode func(Core::System&, u64, u64, u32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
+                            static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }

-template <ResultCode func(u64*, u64, u64, u64)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
    u64 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3)).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval =
+        func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)),
-                      static_cast<s32>(Param(5)))
-                     .raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
+                            static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u32*, u64, u64, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(Handle*, u64, u32, u32)>
-void SvcWrap() {
+template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
+void SvcWrap(Core::System& system) {
    u32 param_1 = 0;
-    u32 retval =
-        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw;
-    Core::CurrentArmInterface().SetReg(1, param_1);
-    FuncReturn(retval);
+    const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
+                            static_cast<u32>(Param(system, 3)))
+                           .raw;
+
+    system.CurrentArmInterface().SetReg(1, param_1);
+    FuncReturn(system, retval);
 }

-template <ResultCode func(u64, u32, s32, s64)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s64>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s64)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
+                           .raw);
 }

-template <ResultCode func(u64, u32, s32, s32)>
-void SvcWrap() {
-    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
-                    static_cast<s32>(Param(3)))
-                   .raw);
+template <ResultCode func(Core::System&, u64, u32, s32, s32)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
+                            static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+                           .raw);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u32

-template <u32 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u32 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Function wrappers that return type u64

-template <u64 func()>
-void SvcWrap() {
-    FuncReturn(func());
+template <u64 func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    FuncReturn(system, func(system));
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
 /// Function wrappers that return type void

-template <void func()>
-void SvcWrap() {
-    func();
+template <void func(Core::System&)>
+void SvcWrap(Core::System& system) {
+    func(system);
 }

-template <void func(s64)>
-void SvcWrap() {
-    func(static_cast<s64>(Param(0)));
+template <void func(Core::System&, s64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<s64>(Param(system, 0)));
 }

-template <void func(u64, u64 len)>
-void SvcWrap() {
-    func(Param(0), Param(1));
+template <void func(Core::System&, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1));
 }

-template <void func(u64, u64, u64)>
-void SvcWrap() {
-    func(Param(0), Param(1), Param(2));
+template <void func(Core::System&, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), Param(system, 1), Param(system, 2));
 }

-template <void func(u32, u64, u64)>
-void SvcWrap() {
-    func(static_cast<u32>(Param(0)), Param(1), Param(2));
+template <void func(Core::System&, u32, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -220,11 +220,6 @@ void Thread::SetPriority(u32 priority) {
    UpdatePriority();
 }

-void Thread::BoostPriority(u32 priority) {
-    scheduler->SetThreadPriority(this, priority);
-    current_priority = priority;
-}
-
 void Thread::SetWaitSynchronizationResult(ResultCode result) {
    context.cpu_registers[0] = result.raw;
 }
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -106,7 +106,7 @@ public:
        return "Thread";
    }

-    static const HandleType HANDLE_TYPE = HandleType::Thread;
+    static constexpr HandleType HANDLE_TYPE = HandleType::Thread;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
@@ -136,12 +136,6 @@ public:
     */
    void SetPriority(u32 priority);

-    /**
-     * Temporarily boosts the thread's priority until the next time it is scheduled
-     * @param priority The new priority
-     */
-    void BoostPriority(u32 priority);
-
    /// Adds a thread to the list of threads that are waiting for a lock held by this thread.
    void AddMutexWaiter(SharedPtr<Thread> thread);

--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -37,7 +37,7 @@ public:
        return name;
    }

-    static const HandleType HANDLE_TYPE = HandleType::WritableEvent;
+    static constexpr HandleType HANDLE_TYPE = HandleType::WritableEvent;
    HandleType GetHandleType() const override {
        return HANDLE_TYPE;
    }
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -86,7 +86,7 @@ static FileSys::VirtualFile GetManualRomFS() {
    if (loader.ReadManualRomFS(out) == Loader::ResultStatus::Success)
        return out;

-    const auto& installed{FileSystem::GetUnionContents()};
+    const auto& installed{Core::System::GetInstance().GetContentProvider()};
    const auto res = installed.GetEntry(Core::System::GetInstance().CurrentProcess()->GetTitleID(),
                                        FileSys::ContentRecordType::Manual);

--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -33,11 +33,11 @@ static bool CheckAOCTitleIDMatchesBase(u64 title_id, u64 base) {

 static std::vector<u64> AccumulateAOCTitleIDs() {
    std::vector<u64> add_on_content;
-    const auto rcu = FileSystem::GetUnionContents();
+    const auto& rcu = Core::System::GetInstance().GetContentProvider();
    const auto list =
        rcu.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data);
    std::transform(list.begin(), list.end(), std::back_inserter(add_on_content),
-                   [](const FileSys::RegisteredCacheEntry& rce) { return rce.title_id; });
+                   [](const FileSys::ContentProviderEntry& rce) { return rce.title_id; });
    add_on_content.erase(
        std::remove_if(
            add_on_content.begin(), add_on_content.end(),
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -391,11 +391,6 @@ void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
        save_data_factory->WriteSaveDataSize(type, title_id, user_id, new_value);
 }

-FileSys::RegisteredCacheUnion GetUnionContents() {
-    return FileSys::RegisteredCacheUnion{
-        {GetSystemNANDContents(), GetUserNANDContents(), GetSDMCContents()}};
-}
-
 FileSys::RegisteredCache* GetSystemNANDContents() {
    LOG_TRACE(Service_FS, "Opening System NAND Contents");

@@ -460,6 +455,10 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
    if (bis_factory == nullptr) {
        bis_factory =
            std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory);
+        Core::System::GetInstance().RegisterContentProvider(
+            FileSys::ContentProviderUnionSlot::SysNAND, bis_factory->GetSystemNANDContents());
+        Core::System::GetInstance().RegisterContentProvider(
+            FileSys::ContentProviderUnionSlot::UserNAND, bis_factory->GetUserNANDContents());
    }

    if (save_data_factory == nullptr) {
@@ -468,6 +467,8 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {

    if (sdmc_factory == nullptr) {
        sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory));
+        Core::System::GetInstance().RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
+                                                            sdmc_factory->GetSDMCContents());
    }
 }

--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -54,8 +54,6 @@ FileSys::SaveDataSize ReadSaveDataSize(FileSys::SaveDataType type, u64 title_id,
 void WriteSaveDataSize(FileSys::SaveDataType type, u64 title_id, u128 user_id,
                       FileSys::SaveDataSize new_value);

-FileSys::RegisteredCacheUnion GetUnionContents();
-
 FileSys::RegisteredCache* GetSystemNANDContents();
 FileSys::RegisteredCache* GetUserNANDContents();
 FileSys::RegisteredCache* GetSDMCContents();
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -115,11 +115,12 @@ private:

    void Read(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
+        const u64 option = rp.Pop<u64>();
        const s64 offset = rp.Pop<s64>();
        const s64 length = rp.Pop<s64>();

-        LOG_DEBUG(Service_FS, "called, offset=0x{:X}, length={}", offset, length);
+        LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset,
+                  length);

        // Error checking
        if (length < 0) {
@@ -148,11 +149,12 @@ private:

    void Write(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
+        const u64 option = rp.Pop<u64>();
        const s64 offset = rp.Pop<s64>();
        const s64 length = rp.Pop<s64>();

-        LOG_DEBUG(Service_FS, "called, offset=0x{:X}, length={}", offset, length);
+        LOG_DEBUG(Service_FS, "called, option={}, offset=0x{:X}, length={}", option, offset,
+                  length);

        // Error checking
        if (length < 0) {
@@ -250,10 +252,7 @@ private:
    u64 next_entry_index = 0;

    void Read(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-        const u64 unk = rp.Pop<u64>();
-
-        LOG_DEBUG(Service_FS, "called, unk=0x{:X}", unk);
+        LOG_DEBUG(Service_FS, "called.");

        // Calculate how many entries we can fit in the output buffer
        const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(FileSys::Entry);
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -93,12 +93,18 @@ public:
    }

    void LoadNrr(Kernel::HLERequestContext& ctx) {
+        struct Parameters {
+            u64_le process_id;
+            u64_le nrr_address;
+            u64_le nrr_size;
+        };
+
        IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr nrr_addr{rp.Pop<VAddr>()};
-        const u64 nrr_size{rp.Pop<u64>()};
-        LOG_DEBUG(Service_LDR, "called with nrr_addr={:016X}, nrr_size={:016X}", nrr_addr,
-                  nrr_size);
+        const auto [process_id, nrr_address, nrr_size] = rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR,
+                  "called with process_id={:016X}, nrr_address={:016X}, nrr_size={:016X}",
+                  process_id, nrr_address, nrr_size);

        if (!initialized) {
            LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
@@ -116,24 +122,26 @@ public:
        }

        // NRR Address does not fall on 0x1000 byte boundary
-        if (!Common::Is4KBAligned(nrr_addr)) {
-            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!", nrr_addr);
+        if (!Common::Is4KBAligned(nrr_address)) {
+            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!",
+                      nrr_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_ALIGNMENT);
            return;
        }

        // NRR Size is zero or causes overflow
-        if (nrr_addr + nrr_size <= nrr_addr || nrr_size == 0 || !Common::Is4KBAligned(nrr_size)) {
+        if (nrr_address + nrr_size <= nrr_address || nrr_size == 0 ||
+            !Common::Is4KBAligned(nrr_size)) {
            LOG_ERROR(Service_LDR, "NRR Size is invalid! (nrr_address={:016X}, nrr_size={:016X})",
-                      nrr_addr, nrr_size);
+                      nrr_address, nrr_size);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_SIZE);
            return;
        }
        // Read NRR data from memory
        std::vector<u8> nrr_data(nrr_size);
-        Memory::ReadBlock(nrr_addr, nrr_data.data(), nrr_size);
+        Memory::ReadBlock(nrr_address, nrr_data.data(), nrr_size);
        NRRHeader header;
        std::memcpy(&header, nrr_data.data(), sizeof(NRRHeader));

@@ -174,7 +182,7 @@ public:
            hashes.emplace_back(hash);
        }

-        nrr.insert_or_assign(nrr_addr, std::move(hashes));
+        nrr.insert_or_assign(nrr_address, std::move(hashes));

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -188,23 +196,30 @@ public:
            return;
        }

-        IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const auto nrr_addr{rp.Pop<VAddr>()};
-        LOG_DEBUG(Service_LDR, "called with nrr_addr={:016X}", nrr_addr);
+        struct Parameters {
+            u64_le process_id;
+            u64_le nrr_address;
+        };

-        if (!Common::Is4KBAligned(nrr_addr)) {
-            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!", nrr_addr);
+        IPC::RequestParser rp{ctx};
+        const auto [process_id, nrr_address] = rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR, "called with process_id={:016X}, nrr_addr={:016X}", process_id,
+                  nrr_address);
+
+        if (!Common::Is4KBAligned(nrr_address)) {
+            LOG_ERROR(Service_LDR, "NRR Address has invalid alignment (actual {:016X})!",
+                      nrr_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_ALIGNMENT);
            return;
        }

-        const auto iter = nrr.find(nrr_addr);
+        const auto iter = nrr.find(nrr_address);
        if (iter == nrr.end()) {
            LOG_ERROR(Service_LDR,
                      "Attempting to unload NRR which has not been loaded! (addr={:016X})",
-                      nrr_addr);
+                      nrr_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_NRR_ADDRESS);
            return;
@@ -216,16 +231,22 @@ public:
    }

    void LoadNro(Kernel::HLERequestContext& ctx) {
+        struct Parameters {
+            u64_le process_id;
+            u64_le image_address;
+            u64_le image_size;
+            u64_le bss_address;
+            u64_le bss_size;
+        };
+
        IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr nro_addr{rp.Pop<VAddr>()};
-        const u64 nro_size{rp.Pop<u64>()};
-        const VAddr bss_addr{rp.Pop<VAddr>()};
-        const u64 bss_size{rp.Pop<u64>()};
-        LOG_DEBUG(
-            Service_LDR,
-            "called with nro_addr={:016X}, nro_size={:016X}, bss_addr={:016X}, bss_size={:016X}",
-            nro_addr, nro_size, bss_addr, bss_size);
+        const auto [process_id, nro_address, nro_size, bss_address, bss_size] =
+            rp.PopRaw<Parameters>();
+
+        LOG_DEBUG(Service_LDR,
+                  "called with pid={:016X}, nro_addr={:016X}, nro_size={:016X}, bss_addr={:016X}, "
+                  "bss_size={:016X}",
+                  process_id, nro_address, nro_size, bss_address, bss_size);

        if (!initialized) {
            LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
@@ -243,8 +264,9 @@ public:
        }

        // NRO Address does not fall on 0x1000 byte boundary
-        if (!Common::Is4KBAligned(nro_addr)) {
-            LOG_ERROR(Service_LDR, "NRO Address has invalid alignment (actual {:016X})!", nro_addr);
+        if (!Common::Is4KBAligned(nro_address)) {
+            LOG_ERROR(Service_LDR, "NRO Address has invalid alignment (actual {:016X})!",
+                      nro_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_ALIGNMENT);
            return;
@@ -252,15 +274,15 @@ public:

        // NRO Size or BSS Size is zero or causes overflow
        const auto nro_size_valid =
-            nro_size != 0 && nro_addr + nro_size > nro_addr && Common::Is4KBAligned(nro_size);
-        const auto bss_size_valid =
-            nro_size + bss_size >= nro_size && (bss_size == 0 || bss_addr + bss_size > bss_addr);
+            nro_size != 0 && nro_address + nro_size > nro_address && Common::Is4KBAligned(nro_size);
+        const auto bss_size_valid = nro_size + bss_size >= nro_size &&
+                                    (bss_size == 0 || bss_address + bss_size > bss_address);

        if (!nro_size_valid || !bss_size_valid) {
            LOG_ERROR(Service_LDR,
                      "NRO Size or BSS Size is invalid! (nro_address={:016X}, nro_size={:016X}, "
                      "bss_address={:016X}, bss_size={:016X})",
-                      nro_addr, nro_size, bss_addr, bss_size);
+                      nro_address, nro_size, bss_address, bss_size);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_SIZE);
            return;
@@ -268,7 +290,7 @@ public:

        // Read NRO data from memory
        std::vector<u8> nro_data(nro_size);
-        Memory::ReadBlock(nro_addr, nro_data.data(), nro_size);
+        Memory::ReadBlock(nro_address, nro_data.data(), nro_size);

        SHA256Hash hash{};
        mbedtls_sha256(nro_data.data(), nro_data.size(), hash.data(), 0);
@@ -318,17 +340,18 @@ public:
            return;
        }

-        ASSERT(vm_manager
-                   .MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
-                   .IsSuccess());
-        ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
+        ASSERT(
+            vm_manager
+                .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
+                .IsSuccess());
+        ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());

        if (bss_size > 0) {
            ASSERT(vm_manager
-                       .MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
+                       .MirrorMemory(*map_address + nro_size, bss_address, bss_size,
                                     Kernel::MemoryState::ModuleCode)
                       .IsSuccess());
-            ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
+            ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess());
        }

        vm_manager.ReprotectRange(*map_address, header.text_size,
@@ -348,13 +371,6 @@ public:
    }

    void UnloadNro(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-        rp.Skip(2, false);
-        const VAddr mapped_addr{rp.PopRaw<VAddr>()};
-        const VAddr heap_addr{rp.PopRaw<VAddr>()};
-        LOG_DEBUG(Service_LDR, "called with mapped_addr={:016X}, heap_addr={:016X}", mapped_addr,
-                  heap_addr);
-
        if (!initialized) {
            LOG_ERROR(Service_LDR, "LDR:RO not initialized before use!");
            IPC::ResponseBuilder rb{ctx, 2};
@@ -362,22 +378,30 @@ public:
            return;
        }

-        if (!Common::Is4KBAligned(mapped_addr) || !Common::Is4KBAligned(heap_addr)) {
-            LOG_ERROR(Service_LDR,
-                      "NRO/BSS Address has invalid alignment (actual nro_addr={:016X}, "
-                      "bss_addr={:016X})!",
-                      mapped_addr, heap_addr);
+        struct Parameters {
+            u64_le process_id;
+            u64_le nro_address;
+        };
+
+        IPC::RequestParser rp{ctx};
+        const auto [process_id, nro_address] = rp.PopRaw<Parameters>();
+        LOG_DEBUG(Service_LDR, "called with process_id={:016X}, nro_address=0x{:016X}", process_id,
+                  nro_address);
+
+        if (!Common::Is4KBAligned(nro_address)) {
+            LOG_ERROR(Service_LDR, "NRO address has invalid alignment (nro_address=0x{:016X})",
+                      nro_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_ALIGNMENT);
            return;
        }

-        const auto iter = nro.find(mapped_addr);
+        const auto iter = nro.find(nro_address);
        if (iter == nro.end()) {
            LOG_ERROR(Service_LDR,
-                      "The NRO attempting to unmap was not mapped or has an invalid address "
-                      "(actual {:016X})!",
-                      mapped_addr);
+                      "The NRO attempting to be unmapped was not mapped or has an invalid address "
+                      "(nro_address=0x{:016X})!",
+                      nro_address);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERROR_INVALID_NRO_ADDRESS);
            return;
@@ -386,10 +410,7 @@ public:
        auto& vm_manager = Core::CurrentProcess()->VMManager();
        const auto& nro_size = iter->second.size;

-        ASSERT(vm_manager
-                   .MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
-                   .IsSuccess());
-        ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());
+        ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess());

        Core::System::GetInstance().InvalidateCpuInstructionCaches();

@@ -459,11 +480,10 @@ private:
    std::map<VAddr, NROInfo> nro;
    std::map<VAddr, std::vector<SHA256Hash>> nrr;

-    bool IsValidNROHash(const SHA256Hash& hash) {
-        return std::any_of(
-            nrr.begin(), nrr.end(), [&hash](const std::pair<VAddr, std::vector<SHA256Hash>>& p) {
-                return std::find(p.second.begin(), p.second.end(), hash) != p.second.end();
-            });
+    bool IsValidNROHash(const SHA256Hash& hash) const {
+        return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {
+            return std::find(p.second.begin(), p.second.end(), hash) != p.second.end();
+        });
    }

    static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) {
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -21,12 +21,13 @@
 #include "core/hle/service/vi/display/vi_display.h"
 #include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/perf_stats.h"
+#include "core/settings.h"
 #include "video_core/renderer_base.h"

 namespace Service::NVFlinger {

-constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);

 NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    displays.emplace_back(0, "Default");
@@ -36,13 +37,15 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
    displays.emplace_back(4, "Null");

    // Schedule the screen composition events
-    composition_event =
-        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
+    const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks;
+
+    composition_event = core_timing.RegisterEvent(
+        "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) {
            Compose();
-            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event);
        });

-    core_timing.ScheduleEvent(frame_ticks, composition_event);
+    core_timing.ScheduleEvent(ticks, composition_event);
 }

 NVFlinger::~NVFlinger() {
@@ -62,6 +65,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
                     [&](const VI::Display& display) { return display.GetName() == name; });
+
    if (itr == displays.end()) {
        return {};
    }
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,6 +21,8 @@
 #include "core/memory.h"
 #include "core/settings.h"

+#pragma optimize("", off)
+
 namespace Loader {
 namespace {
 struct MODHeader {
@@ -136,13 +138,13 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,

    // Apply patches if necessary
    if (pm && (pm->HasNSOPatch(nso_header.build_id) || Settings::values.dump_nso)) {
-        std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size());
+        std::vector<u8> pi_header;
        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
                         program_image.end());

-        pi_header = pm->PatchNSO(pi_header);
+        pi_header = pm->PatchNSO(pi_header, file.GetName());

        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
    }
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -38,10 +38,6 @@ void SetCurrentPageTable(Common::PageTable* page_table) {
    }
 }

-Common::PageTable* GetCurrentPageTable() {
-    return current_page_table;
-}
-
 static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
                     Common::PageType type) {
    LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -28,16 +28,6 @@ constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

 /// Virtual user-space memory regions
 enum : VAddr {
-    /// Read-only page containing kernel and system configuration values.
-    CONFIG_MEMORY_VADDR = 0x1FF80000,
-    CONFIG_MEMORY_SIZE = 0x00001000,
-    CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
-
-    /// Usually read-only page containing mostly values read from hardware.
-    SHARED_PAGE_VADDR = 0x1FF81000,
-    SHARED_PAGE_SIZE = 0x00001000,
-    SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
-
    /// TLS (Thread-Local Storage) related.
    TLS_ENTRY_SIZE = 0x200,

@@ -50,9 +40,8 @@ enum : VAddr {
    KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };

-/// Currently active page table
+/// Changes the currently active page table.
 void SetCurrentPageTable(Common::PageTable* page_table);
-Common::PageTable* GetCurrentPageTable();

 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
    bool use_disk_shader_cache;
    bool use_accurate_gpu_emulation;
    bool use_asynchronous_gpu_emulation;
+    bool force_30fps_mode;

    float bg_red;
    float bg_green;
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -17,7 +17,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
    : mutable_memory(mutable_memory_),
      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
    auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
-    kernel.MakeCurrentProcess(process.get());
    page_table = &process->VMManager().page_table;

    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
@@ -28,7 +27,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
    Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
    Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);

-    Memory::SetCurrentPageTable(page_table);
+    kernel.MakeCurrentProcess(process.get());
 }

 TestEnvironment::~TestEnvironment() {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -106,6 +106,8 @@ add_library(video_core STATIC
    textures/decoders.cpp
    textures/decoders.h
    textures/texture.h
+    texture_cache.cpp
+    texture_cache.h
    video_core.cpp
    video_core.h
 )
@@ -127,12 +129,14 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_sampler_cache.h
        renderer_vulkan/vk_scheduler.cpp
        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_shader_decompiler.cpp
+        renderer_vulkan/vk_shader_decompiler.h
        renderer_vulkan/vk_stream_buffer.cpp
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
        renderer_vulkan/vk_swapchain.h)

-    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
 endif()

@@ -140,3 +144,6 @@ create_target_directory_groups(video_core)

 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad)
+if (ENABLE_VULKAN)
+    target_link_libraries(video_core PRIVATE sirit)
+endif()
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -8,6 +8,7 @@
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"

 namespace Tegra {

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,12 +6,13 @@
 #include "common/logging/log.h"
 #include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

 namespace Tegra::Engines {

 Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : rasterizer{rasterizer}, memory_manager{memory_manager} {}

 void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,7 +10,10 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}

 namespace VideoCore {
 class RasterizerInterface;
@@ -115,10 +118,9 @@ public:
        };
    } regs{};

-    MemoryManager& memory_manager;
-
 private:
    VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;

    /// Performs the copy from the source surface to the destination surface as configured in the
    /// registers.
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -9,7 +9,10 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
+
+namespace Tegra {
+class MemoryManager;
+}

 namespace Tegra::Engines {

@@ -40,10 +43,11 @@ public:
    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                  "KeplerCompute Regs has wrong size");

-    MemoryManager& memory_manager;
-
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);
+
+private:
+    MemoryManager& memory_manager;
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -5,9 +5,9 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"

@@ -15,7 +15,7 @@ namespace Tegra::Engines {

 KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                           MemoryManager& memory_manager)
-    : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}

 KeplerMemory::~KeplerMemory() = default;

--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,12 +10,15 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"

 namespace Core {
 class System;
 }

+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -82,8 +85,8 @@ public:

 private:
    Core::System& system;
-    MemoryManager& memory_manager;
    VideoCore::RasterizerInterface& rasterizer;
+    MemoryManager& memory_manager;

    void ProcessData(u32 data);
 };
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,11 +7,10 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "core/memory.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
 #include "video_core/textures/texture.h"

 namespace Tegra::Engines {
@@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;

 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                     MemoryManager& memory_manager)
-    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
-      macro_interpreter(*this) {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
+                                                                                  *this} {
    InitializeRegisterDefaults();
 }

@@ -250,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        ProcessQueryGet();
        break;
    }
+    case MAXWELL3D_REG_INDEX(sync_info): {
+        ProcessSyncPoint();
+        break;
+    }
    default:
        break;
    }
@@ -327,6 +330,14 @@ void Maxwell3D::ProcessQueryGet() {
    }
 }

+void Maxwell3D::ProcessSyncPoint() {
+    const u32 sync_point = regs.sync_info.sync_point.Value();
+    const u32 increment = regs.sync_info.increment.Value();
+    const u32 cache_flush = regs.sync_info.unknown.Value();
+    LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
+              cache_flush);
+}
+
 void Maxwell3D::DrawArrays() {
    LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
              regs.vertex_buffer.count);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -16,13 +16,16 @@
 #include "common/math_util.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
-#include "video_core/memory_manager.h"
 #include "video_core/textures/texture.h"

 namespace Core {
 class System;
 }

+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -576,7 +579,17 @@ public:
                    u32 bind;
                } macros;

-                INSERT_PADDING_WORDS(0x188);
+                INSERT_PADDING_WORDS(0x69);
+
+                struct {
+                    union {
+                        BitField<0, 16, u32> sync_point;
+                        BitField<16, 1, u32> unknown;
+                        BitField<20, 1, u32> increment;
+                    };
+                } sync_info;
+
+                INSERT_PADDING_WORDS(0x11E);

                u32 tfb_enabled;

@@ -1093,7 +1106,6 @@ public:
    };

    State state{};
-    MemoryManager& memory_manager;

    struct DirtyFlags {
        std::bitset<8> color_buffer{0xFF};
@@ -1141,6 +1153,8 @@ private:

    VideoCore::RasterizerInterface& rasterizer;

+    MemoryManager& memory_manager;
+
    /// Start offsets of each macro in macro_memory
    std::unordered_map<u32, u32> macro_offsets;

@@ -1180,6 +1194,9 @@ private:
    /// Handles a write to the QUERY_GET register.
    void ProcessQueryGet();

+    /// Handles writes to syncing register.
+    void ProcessSyncPoint();
+
    /// Handles a write to the CB_DATA[i] register.
    void ProcessCBData(u32 value);

@@ -1195,6 +1212,7 @@ private:
                  "Field " #field_name " has invalid position")

 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,9 +5,9 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
+#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
 #include "video_core/textures/decoders.h"
@@ -16,7 +16,7 @@ namespace Tegra::Engines {

 MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                       MemoryManager& memory_manager)
-    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}

 void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,12 +10,15 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"

 namespace Core {
 class System;
 }

+namespace Tegra {
+class MemoryManager;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -139,13 +142,13 @@ public:
        };
    } regs{};

-    MemoryManager& memory_manager;
-
 private:
    Core::System& system;

    VideoCore::RasterizerInterface& rasterizer;

+    MemoryManager& memory_manager;
+
    /// Performs the copy from the source buffer to the destination buffer as configured in the
    /// registers.
    void HandleCopy();
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1238,13 +1238,16 @@ union Instruction {

    union {
        BitField<20, 16, u64> imm20_16;
+        BitField<35, 1, u64> high_b_rr; // used on RR
        BitField<36, 1, u64> product_shift_left;
        BitField<37, 1, u64> merge_37;
        BitField<48, 1, u64> sign_a;
        BitField<49, 1, u64> sign_b;
+        BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
        BitField<50, 3, XmadMode> mode;
        BitField<52, 1, u64> high_b;
        BitField<53, 1, u64> high_a;
+        BitField<55, 1, u64> product_shift_left_second; // used on CR
        BitField<56, 1, u64> merge_56;
    } xmad;

@@ -1662,7 +1665,7 @@ private:
            INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
-            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
            INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
            INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
            INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {

 GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
    auto& rasterizer{renderer.Rasterizer()};
-    memory_manager = std::make_unique<Tegra::MemoryManager>();
+    memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,16 +5,13 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
 #include "core/memory.h"
-#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"

 namespace Tegra {

-MemoryManager::MemoryManager() {
+MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
              Common::PageType::Unmapped);
@@ -70,8 +67,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    const u64 aligned_size{Common::AlignUp(size, page_size)};
    const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};

-    Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
-                                                                                 aligned_size);
+    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
    UnmapRange(gpu_addr, aligned_size);

    return gpu_addr;
@@ -204,14 +200,85 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
 }

 void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
-    std::memcpy(dest_buffer, GetPointer(src_addr), size);
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            std::memcpy(dest_buffer, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
+
+        page_index++;
+        page_offset = 0;
+        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
 }
+
 void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
-    std::memcpy(GetPointer(dest_addr), src_buffer, size);
+    std::size_t remaining_size{size};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+            std::memcpy(dest_ptr, src_buffer, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
+
+        page_index++;
+        page_offset = 0;
+        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+        remaining_size -= copy_amount;
+    }
 }

 void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
-    std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+    std::size_t remaining_size{size};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};
+
+    while (remaining_size > 0) {
+        const std::size_t copy_amount{
+            std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+        switch (page_table.attributes[page_index]) {
+        case Common::PageType::Memory: {
+            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            WriteBlock(dest_addr, src_ptr, copy_amount);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
+
+        page_index++;
+        page_offset = 0;
+        dest_addr += static_cast<VAddr>(copy_amount);
+        src_addr += static_cast<VAddr>(copy_amount);
+        remaining_size -= copy_amount;
+    }
 }

 void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
@@ -351,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
    const VirtualMemoryArea& vma{vma_handle->second};
    if (vma.type == VirtualMemoryArea::Type::Mapped) {
        // Region is already allocated
-        return {};
+        return vma_handle;
    }

    const VAddr start_in_vma{base - vma.base};
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
 #include "common/common_types.h"
 #include "common/page_table.h"

+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra {

 /**
@@ -43,7 +47,7 @@ struct VirtualMemoryArea {

 class MemoryManager final {
 public:
-    MemoryManager();
+    MemoryManager(VideoCore::RasterizerInterface& rasterizer);

    GPUVAddr AllocateSpace(u64 size, u64 align);
    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -144,6 +148,7 @@ private:

    Common::PageTable page_table{page_bits};
    VMAMap vma_map;
+    VideoCore::RasterizerInterface& rasterizer;
 };

 } // namespace Tegra
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,6 +7,7 @@

 #include "common/alignment.h"
 #include "core/core.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"

--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -6,6 +6,7 @@

 #include "common/logging/log.h"
 #include "core/core.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -299,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
    BaseBindings base_bindings;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};

+    // Prepare packed bindings
+    bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
+    bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
+
    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const auto& shader_config = gpu.regs.shader_config[index];
        const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -321,8 +325,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));

        // Bind the emulation info buffer
-        glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset,
+                                 static_cast<GLsizeiptr>(sizeof(ubo)));

        Shader shader{shader_cache.GetStageProgram(program)};
        const auto [program_handle, next_bindings] =
@@ -366,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        base_bindings = next_bindings;
    }

+    bind_ubo_pushbuffer.Bind();
+    bind_ssbo_pushbuffer.Bind();
+
    SyncClipEnabled(clip_distances);

    gpu.dirty_flags.shaders = false;
@@ -900,23 +907,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
    const auto& entries = shader->GetShaderEntries().const_buffers;

-    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
-    std::array<GLuint, max_binds> bind_buffers;
-    std::array<GLintptr, max_binds> bind_offsets;
-    std::array<GLsizeiptr, max_binds> bind_sizes;
-
-    ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
-
    // Upload only the enabled buffers from the 16 constbuffers of each shader stage
    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
        const auto& used_buffer = entries[bindpoint];
        const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];

        if (!buffer.enabled) {
-            // With disabled buffers set values as zero to unbind them
-            bind_buffers[bindpoint] = 0;
-            bind_offsets[bindpoint] = 0;
-            bind_sizes[bindpoint] = 0;
+            // Set values to zero to unbind buffers
+            bind_ubo_pushbuffer.Push(0, 0, 0);
            continue;
        }

@@ -944,30 +942,19 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
        const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));

-        // Prepare values for multibind
-        bind_buffers[bindpoint] = buffer_cache.GetHandle();
-        bind_offsets[bindpoint] = const_buffer_offset;
-        bind_sizes[bindpoint] = size;
+        bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
    }
-
-    // The first binding is reserved for emulation values
-    const GLuint ubo_base_binding = base_bindings.cbuf + 1;
-    glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
-                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
 }

 void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
                                          const Shader& shader, GLenum primitive_mode,
                                          BaseBindings base_bindings) {
-    // TODO(Rodrigo): Use ARB_multi_bind here
    const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
-    for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
-        const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = base_bindings.gmem + bindpoint;
-        const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
-        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+    for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry{entries[bindpoint]};
+        const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
+                                  static_cast<GLsizeiptr>(region->GetSizeInBytes()));
    }
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,6 +28,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"

 namespace Core {
 class System;
@@ -229,6 +230,9 @@ private:
    PrimitiveAssembler primitive_assembler{buffer_cache};
    GLint uniform_buffer_alignment;

+    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
+    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+
    std::size_t CalculateVertexArraysSize() const;

    std::size_t CalculateIndexBufferSize() const;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
@@ -111,11 +112,26 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
                                                       params.srgb_conversion);

-    if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) {
+    if (config.tsc.depth_compare_enabled) {
        // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
        // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
        // causes GetFormatType to properly return 'Depth' below).
-        params.pixel_format = PixelFormat::Z16;
+        if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
+            switch (params.pixel_format) {
+            case PixelFormat::R16S:
+            case PixelFormat::R16U:
+            case PixelFormat::R16F:
+                params.pixel_format = PixelFormat::Z16;
+                break;
+            case PixelFormat::R32F:
+                params.pixel_format = PixelFormat::Z32F;
+                break;
+            default:
+                LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
+                            static_cast<u32>(params.pixel_format));
+                break;
+            }
+        }
    }

    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
@@ -265,6 +281,10 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
    params.width = config.width;
+    if (!params.is_tiled) {
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.pitch = config.width * bpp;
+    }
    params.height = config.height;
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
@@ -661,8 +681,8 @@ void CachedSurface::FlushGLBuffer() {
    gl_buffer[0].resize(GetSizeInBytes());

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+    glPixelStorei(GL_PACK_ALIGNMENT, align);
    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
    ASSERT(!tuple.compressed);
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -707,8 +727,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);

-    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, align);
    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));

    const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
@@ -1174,10 +1194,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
        return new_surface;
    }

+    const bool old_compressed =
+        GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
+    const bool new_compressed =
+        GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
+    const bool compatible_formats =
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
+        !(old_compressed || new_compressed);
    // For compatible surfaces, we can just do fast glCopyImageSubData based copy
-    if (old_params.target == new_params.target && old_params.type == new_params.type &&
-        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
+    if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
+        old_params.depth == 1 && compatible_formats) {
        FastCopySurface(old_surface, new_surface);
        return new_surface;
    }
@@ -1192,7 +1218,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        if (old_params.pixel_format == new_params.pixel_format)
+        if (compatible_formats)
            FastLayeredCopySurface(old_surface, new_surface);
        else {
            AccurateCopySurface(old_surface, new_surface);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -11,6 +11,7 @@
 #include <vector>

 #include "common/alignment.h"
+#include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
@@ -205,6 +206,13 @@ struct SurfaceParams {
        return bd;
    }

+    u32 RowAlign(u32 mip_level) const {
+        const u32 m_width = MipWidth(mip_level);
+        const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+        const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+        return (1U << l2);
+    }
+
    /// Creates SurfaceParams from a texture configuration
    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
                                          const GLShader::SamplerEntry& entry);
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -7,6 +7,7 @@
 #include "common/hash.h"
 #include "core/core.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -39,6 +40,10 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
 /// Gets the shader program code from memory for the specified address
 ProgramCode GetShaderCode(const u8* host_ptr) {
    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
    std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
    return program_code;
 }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@

 namespace OpenGL::GLShader {

+namespace {
+
 using Tegra::Shader::Attribute;
 using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
 using Operation = const OperationNode&;

+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+
 enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
    static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
 constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
    static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);

-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
-
 class ShaderWriter {
 public:
    void AddExpression(std::string_view text) {
@@ -91,7 +97,7 @@ private:
 };

 /// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u32 elem) {
+std::string GetSwizzle(u32 elem) {
    ASSERT(elem <= 3);
    std::string swizzle = ".";
    swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
 }

 /// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
    switch (topology) {
    case Tegra::Shader::OutputTopology::PointList:
        return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
 }

 /// Returns true if an object has to be treated as precise
-static bool IsPrecise(Operation operand) {
+bool IsPrecise(Operation operand) {
    const auto& meta = operand.GetMeta();

    if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
    return false;
 }

-static bool IsPrecise(Node node) {
+bool IsPrecise(Node node) {
    if (const auto operation = std::get_if<OperationNode>(node)) {
        return IsPrecise(*operation);
    }
@@ -546,8 +552,7 @@ private:
            } else if (std::holds_alternative<OperationNode>(*offset)) {
                // Indirect access
                const std::string final_offset = code.GenerateTemporary();
-                code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
-                             std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
+                code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
                return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
                                   final_offset, final_offset);

@@ -723,8 +728,8 @@ private:
                                                         result_type));
    }

-    std::string GenerateTexture(Operation operation, const std::string& func,
-                                const std::vector<std::pair<Type, Node>>& extras) {
+    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+                                const std::vector<TextureIR>& extras) {
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -734,11 +739,11 @@ private:
        const bool has_array = meta->sampler.IsArray();
        const bool has_shadow = meta->sampler.IsShadow();

-        std::string expr = func;
-        expr += '(';
-        expr += GetSampler(meta->sampler);
-        expr += ", ";
-
+        std::string expr = "texture" + function_suffix;
+        if (!meta->aoffi.empty()) {
+            expr += "Offset";
+        }
+        expr += '(' + GetSampler(meta->sampler) + ", ";
        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
        expr += '(';
        for (std::size_t i = 0; i < count; ++i) {
@@ -756,38 +761,76 @@ private:
        }
        expr += ')';

-        for (const auto& extra_pair : extras) {
-            const auto [type, operand] = extra_pair;
-            if (operand == nullptr) {
-                continue;
-            }
-            expr += ", ";
-
-            switch (type) {
-            case Type::Int:
-                if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
-                    // Inline the string as an immediate integer in GLSL (some extra arguments are
-                    // required to be constant)
-                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-                } else {
-                    expr += "ftoi(" + Visit(operand) + ')';
-                }
-                break;
-            case Type::Float:
-                expr += Visit(operand);
-                break;
-            default: {
-                const auto type_int = static_cast<u32>(type);
-                UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
-                expr += '0';
-                break;
-            }
+        for (const auto& variant : extras) {
+            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+                expr += GenerateTextureArgument(*argument);
+            } else if (std::get_if<TextureAoffi>(&variant)) {
+                expr += GenerateTextureAoffi(meta->aoffi);
+            } else {
+                UNREACHABLE();
            }
        }

        return expr + ')';
    }

+    std::string GenerateTextureArgument(TextureArgument argument) {
+        const auto [type, operand] = argument;
+        if (operand == nullptr) {
+            return {};
+        }
+
+        std::string expr = ", ";
+        switch (type) {
+        case Type::Int:
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (some extra arguments are
+                // required to be constant)
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+            } else {
+                expr += "ftoi(" + Visit(operand) + ')';
+            }
+            break;
+        case Type::Float:
+            expr += Visit(operand);
+            break;
+        default: {
+            const auto type_int = static_cast<u32>(type);
+            UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+            expr += '0';
+            break;
+        }
+        }
+        return expr;
+    }
+
+    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+        if (aoffi.empty()) {
+            return {};
+        }
+        constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+        std::string expr = ", ";
+        expr += coord_constructors.at(aoffi.size() - 1);
+        expr += '(';
+
+        for (std::size_t index = 0; index < aoffi.size(); ++index) {
+            const auto operand{aoffi.at(index)};
+            if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+                // to be constant by the standard).
+                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+            } else {
+                expr += "ftoi(" + Visit(operand) + ')';
+            }
+            if (index + 1 < aoffi.size()) {
+                expr += ", ";
+            }
+        }
+        expr += ')';
+
+        return expr;
+    }
+
    std::string Assign(Operation operation) {
        const Node dest = operation[0];
        const Node src = operation[1];
@@ -1164,7 +1207,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+        std::string expr = GenerateTexture(
+            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1175,7 +1219,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+        std::string expr = GenerateTexture(
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1187,7 +1232,8 @@ private:
        ASSERT(meta);

        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+        return GenerateTexture(operation, "Gather",
+                               {TextureArgument{type, meta->component}, TextureAoffi{}}) +
               GetSwizzle(meta->element);
    }

@@ -1217,8 +1263,8 @@ private:
        ASSERT(meta);

        if (meta->element < 2) {
-            return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
-                   " * vec2(256))" + GetSwizzle(meta->element) + "))";
+            return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+                   GetSwizzle(meta->element) + "))";
        }
        return "0";
    }
@@ -1571,6 +1617,8 @@ private:
    ShaderWriter code;
 };

+} // Anonymous namespace
+
 std::string GetCommonDeclarations() {
    const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
    const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -10,8 +10,8 @@
 #include "common/common_types.h"
 #include "common/file_util.h"
 #include "common/logging/log.h"
-#include "common/lz4_compression.h"
 #include "common/scm_rev.h"
+#include "common/zstd_compression.h"

 #include "core/core.h"
 #include "core/hle/kernel/process.h"
@@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
                return {};
            }

-            dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length);
+            dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
            if (dump.binary.empty()) {
                return {};
            }
@@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
        return {};
    }

-    const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size);
+    const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
    if (code.empty()) {
        return {};
    }
@@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
    if (!IsUsable())
        return;

-    const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC(
-        reinterpret_cast<const u8*>(code.data()), code.size(), 9)};
+    const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+        reinterpret_cast<const u8*>(code.data()), code.size())};
    if (compressed_code.empty()) {
        LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
                  unique_identifier);
@@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());

    const std::vector<u8> compressed_binary =
-        Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9);
+        Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());

    if (compressed_binary.empty()) {
        LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,12 +2,44 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"

 namespace OpenGL::GLShader {

 using Tegra::Engines::Maxwell3D;

+ProgramManager::ProgramManager() {
+    pipeline.Create();
+}
+
+ProgramManager::~ProgramManager() = default;
+
+void ProgramManager::ApplyTo(OpenGLState& state) {
+    UpdatePipeline();
+    state.draw.shader_program = 0;
+    state.draw.program_pipeline = pipeline.handle;
+}
+
+void ProgramManager::UpdatePipeline() {
+    // Avoid updating the pipeline when values have no changed
+    if (old_state == current_state) {
+        return;
+    }
+
+    // Workaround for AMD bug
+    constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+                                     GL_FRAGMENT_SHADER_BIT};
+    glUseProgramStages(pipeline.handle, all_used_stages, 0);
+
+    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+    glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+
+    old_state = current_state;
+}
+
 void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
    const auto& regs = maxwell.regs;
    const auto& state = maxwell.state;
@@ -16,7 +48,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
    viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
    viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;

-    u32 func = static_cast<u32>(regs.alpha_test_func);
+    auto func{static_cast<u32>(regs.alpha_test_func)};
    // Normalize the gl variants of opCompare to be the same as the normal variants
    const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
    if (func >= op_gl_variant_base) {
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <cstddef>
+
 #include <glad/glad.h>

 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,56 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384,

 class ProgramManager {
 public:
-    ProgramManager() {
-        pipeline.Create();
-    }
+    explicit ProgramManager();
+    ~ProgramManager();
+
+    void ApplyTo(OpenGLState& state);

    void UseProgrammableVertexShader(GLuint program) {
-        vs = program;
+        current_state.vertex_shader = program;
    }

    void UseProgrammableGeometryShader(GLuint program) {
-        gs = program;
+        current_state.geometry_shader = program;
    }

    void UseProgrammableFragmentShader(GLuint program) {
-        fs = program;
+        current_state.fragment_shader = program;
    }

    void UseTrivialGeometryShader() {
-        gs = 0;
-    }
-
-    void ApplyTo(OpenGLState& state) {
-        UpdatePipeline();
-        state.draw.shader_program = 0;
-        state.draw.program_pipeline = pipeline.handle;
-        state.geometry_shaders.enabled = (gs != 0);
+        current_state.geometry_shader = 0;
    }

 private:
-    void UpdatePipeline() {
-        // Avoid updating the pipeline when values have no changed
-        if (old_vs == vs && old_fs == fs && old_gs == gs)
-            return;
-        // Workaround for AMD bug
-        glUseProgramStages(pipeline.handle,
-                           GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
-                           0);
+    struct PipelineState {
+        bool operator==(const PipelineState& rhs) const {
+            return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
+                   geometry_shader == rhs.geometry_shader;
+        }

-        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
-        glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
-        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
+        bool operator!=(const PipelineState& rhs) const {
+            return !operator==(rhs);
+        }

-        // Update the old values
-        old_vs = vs;
-        old_fs = fs;
-        old_gs = gs;
-    }
+        GLuint vertex_shader{};
+        GLuint fragment_shader{};
+        GLuint geometry_shader{};
+    };
+
+    void UpdatePipeline();

    OGLPipeline pipeline;
-    GLuint vs{}, fs{}, gs{};
-    GLuint old_vs{}, old_fs{}, old_gs{};
+    PipelineState current_state;
+    PipelineState old_state;
 };

 } // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@

 namespace OpenGL {

-OpenGLState OpenGLState::cur_state;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;

+OpenGLState OpenGLState::cur_state;
 bool OpenGLState::s_rgb_used;

+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+    const bool changed = current_value != new_value;
+    current_value = new_value;
+    return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+    if (enable) {
+        glEnable(cap);
+    } else {
+        glDisable(cap);
+    }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+    if (enable) {
+        glEnablei(cap, index);
+    } else {
+        glDisablei(cap, index);
+    }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+    if (UpdateValue(current_value, new_value))
+        Enable(cap, index, new_value);
+}
+
+} // namespace
+
 OpenGLState::OpenGLState() {
    // These all match default OpenGL values
-    geometry_shaders.enabled = false;
    framebuffer_srgb.enabled = false;
+
    multisample_control.alpha_to_coverage = false;
    multisample_control.alpha_to_one = false;
+
    cull.enabled = false;
    cull.mode = GL_BACK;
    cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {

    primitive_restart.enabled = false;
    primitive_restart.index = 0;
+
    for (auto& item : color_mask) {
        item.red_enabled = GL_TRUE;
        item.green_enabled = GL_TRUE;
        item.blue_enabled = GL_TRUE;
        item.alpha_enabled = GL_TRUE;
    }
-    stencil.test_enabled = false;
-    auto reset_stencil = [](auto& config) {
+
+    const auto ResetStencil = [](auto& config) {
        config.test_func = GL_ALWAYS;
        config.test_ref = 0;
        config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
        config.action_depth_pass = GL_KEEP;
        config.action_stencil_fail = GL_KEEP;
    };
-    reset_stencil(stencil.front);
-    reset_stencil(stencil.back);
+    stencil.test_enabled = false;
+    ResetStencil(stencil.front);
+    ResetStencil(stencil.back);
+
    for (auto& item : viewports) {
        item.x = 0;
        item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
        item.scissor.width = 0;
        item.scissor.height = 0;
    }
+
    for (auto& item : blend) {
        item.enabled = true;
        item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
        item.src_a_func = GL_ONE;
        item.dst_a_func = GL_ZERO;
    }
+
    independant_blend.enabled = false;
+
    blend_color.red = 0.0f;
    blend_color.green = 0.0f;
    blend_color.blue = 0.0f;
    blend_color.alpha = 0.0f;
+
    logic_op.enabled = false;
    logic_op.operation = GL_COPY;

@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
    clip_distance = {};

    point.size = 1;
+
    fragment_color_clamp.enabled = false;
+
    depth_clamp.far_plane = false;
    depth_clamp.near_plane = false;
+
    polygon_offset.fill_enable = false;
    polygon_offset.line_enable = false;
    polygon_offset.point_enable = false;
@@ -103,132 +159,380 @@ OpenGLState::OpenGLState() {
 }

 void OpenGLState::ApplyDefaultState() {
+    glEnable(GL_BLEND);
    glDisable(GL_FRAMEBUFFER_SRGB);
    glDisable(GL_CULL_FACE);
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_PRIMITIVE_RESTART);
    glDisable(GL_STENCIL_TEST);
-    glEnable(GL_BLEND);
    glDisable(GL_COLOR_LOGIC_OP);
    glDisable(GL_SCISSOR_TEST);
 }

+void OpenGLState::ApplyFramebufferState() const {
+    if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+    }
+    if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+    }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+    if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+        glBindVertexArray(draw.vertex_array);
+    }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+    if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+        glUseProgram(draw.shader_program);
+    }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+    if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+        glBindProgramPipeline(draw.program_pipeline);
+    }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+        Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+               clip_distance[i]);
+    }
+}
+
+void OpenGLState::ApplyPointSize() const {
+    if (UpdateValue(cur_state.point.size, point.size)) {
+        glPointSize(point.size);
+    }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+    if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+    }
+}
+
+void OpenGLState::ApplyMultisample() const {
+    Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+           multisample_control.alpha_to_coverage);
+    Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+           multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+        return;
+    }
+    cur_state.depth_clamp = depth_clamp;
+
+    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                         "Unimplemented Depth Clamp Separation!");
+
+    Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
 void OpenGLState::ApplySRgb() const {
-    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
-        if (framebuffer_srgb.enabled) {
-            // Track if sRGB is used
-            s_rgb_used = true;
-            glEnable(GL_FRAMEBUFFER_SRGB);
-        } else {
-            glDisable(GL_FRAMEBUFFER_SRGB);
-        }
+    if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+        return;
+    cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+    if (framebuffer_srgb.enabled) {
+        // Track if sRGB is used
+        s_rgb_used = true;
+        glEnable(GL_FRAMEBUFFER_SRGB);
+    } else {
+        glDisable(GL_FRAMEBUFFER_SRGB);
    }
 }

 void OpenGLState::ApplyCulling() const {
-    if (cull.enabled != cur_state.cull.enabled) {
-        if (cull.enabled) {
-            glEnable(GL_CULL_FACE);
-        } else {
-            glDisable(GL_CULL_FACE);
-        }
-    }
+    Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);

-    if (cull.mode != cur_state.cull.mode) {
+    if (UpdateValue(cur_state.cull.mode, cull.mode)) {
        glCullFace(cull.mode);
    }

-    if (cull.front_face != cur_state.cull.front_face) {
+    if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
        glFrontFace(cull.front_face);
    }
 }

 void OpenGLState::ApplyColorMask() const {
-    if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            const auto& updated = color_mask[i];
-            const auto& current = cur_state.color_mask[i];
-            if (updated.red_enabled != current.red_enabled ||
-                updated.green_enabled != current.green_enabled ||
-                updated.blue_enabled != current.blue_enabled ||
-                updated.alpha_enabled != current.alpha_enabled) {
-                glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
-                             updated.blue_enabled, updated.alpha_enabled);
-            }
-        }
-    } else {
-        const auto& updated = color_mask[0];
-        const auto& current = cur_state.color_mask[0];
+    for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+        const auto& updated = color_mask[i];
+        auto& current = cur_state.color_mask[i];
        if (updated.red_enabled != current.red_enabled ||
            updated.green_enabled != current.green_enabled ||
            updated.blue_enabled != current.blue_enabled ||
            updated.alpha_enabled != current.alpha_enabled) {
-            glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
-                        updated.alpha_enabled);
+            current = updated;
+            glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+                         updated.blue_enabled, updated.alpha_enabled);
        }
    }
 }

 void OpenGLState::ApplyDepth() const {
-    if (depth.test_enabled != cur_state.depth.test_enabled) {
-        if (depth.test_enabled) {
-            glEnable(GL_DEPTH_TEST);
-        } else {
-            glDisable(GL_DEPTH_TEST);
-        }
-    }
+    Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);

-    if (depth.test_func != cur_state.depth.test_func) {
+    if (cur_state.depth.test_func != depth.test_func) {
+        cur_state.depth.test_func = depth.test_func;
        glDepthFunc(depth.test_func);
    }

-    if (depth.write_mask != cur_state.depth.write_mask) {
+    if (cur_state.depth.write_mask != depth.write_mask) {
+        cur_state.depth.write_mask = depth.write_mask;
        glDepthMask(depth.write_mask);
    }
 }

 void OpenGLState::ApplyPrimitiveRestart() const {
-    if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
-        if (primitive_restart.enabled) {
-            glEnable(GL_PRIMITIVE_RESTART);
-        } else {
-            glDisable(GL_PRIMITIVE_RESTART);
-        }
-    }
+    Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);

-    if (primitive_restart.index != cur_state.primitive_restart.index) {
+    if (cur_state.primitive_restart.index != primitive_restart.index) {
+        cur_state.primitive_restart.index = primitive_restart.index;
        glPrimitiveRestartIndex(primitive_restart.index);
    }
 }

 void OpenGLState::ApplyStencilTest() const {
-    if (stencil.test_enabled != cur_state.stencil.test_enabled) {
-        if (stencil.test_enabled) {
-            glEnable(GL_STENCIL_TEST);
-        } else {
-            glDisable(GL_STENCIL_TEST);
-        }
-    }
+    Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);

-    const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
-        if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
-            config.test_mask != prev_config.test_mask) {
+    const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+        if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+            current.test_mask != config.test_mask) {
+            current.test_func = config.test_func;
+            current.test_ref = config.test_ref;
+            current.test_mask = config.test_mask;
            glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
        }
-        if (config.action_depth_fail != prev_config.action_depth_fail ||
-            config.action_depth_pass != prev_config.action_depth_pass ||
-            config.action_stencil_fail != prev_config.action_stencil_fail) {
+        if (current.action_depth_fail != config.action_depth_fail ||
+            current.action_depth_pass != config.action_depth_pass ||
+            current.action_stencil_fail != config.action_stencil_fail) {
+            current.action_depth_fail = config.action_depth_fail;
+            current.action_depth_pass = config.action_depth_pass;
+            current.action_stencil_fail = config.action_stencil_fail;
            glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
                                config.action_depth_pass);
        }
-        if (config.write_mask != prev_config.write_mask) {
+        if (current.write_mask != config.write_mask) {
+            current.write_mask = config.write_mask;
            glStencilMaskSeparate(face, config.write_mask);
        }
    };
    ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
    ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+
+void OpenGLState::ApplyViewport() const {
+    for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+        const auto& updated = viewports[i];
+        auto& current = cur_state.viewports[i];
+
+        if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+            current.height != updated.height) {
+            current.x = updated.x;
+            current.y = updated.y;
+            current.width = updated.width;
+            current.height = updated.height;
+            glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+                               static_cast<GLfloat>(updated.width),
+                               static_cast<GLfloat>(updated.height));
+        }
+        if (current.depth_range_near != updated.depth_range_near ||
+            current.depth_range_far != updated.depth_range_far) {
+            current.depth_range_near = updated.depth_range_near;
+            current.depth_range_far = updated.depth_range_far;
+            glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
+        }
+
+        Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+        if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+            current.scissor.width != updated.scissor.width ||
+            current.scissor.height != updated.scissor.height) {
+            current.scissor.x = updated.scissor.x;
+            current.scissor.y = updated.scissor.y;
+            current.scissor.width = updated.scissor.width;
+            current.scissor.height = updated.scissor.height;
+            glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+                             updated.scissor.height);
+        }
+    }
+}
+
+void OpenGLState::ApplyGlobalBlending() const {
+    const Blend& updated = blend[0];
+    Blend& current = cur_state.blend[0];
+
+    Enable(GL_BLEND, current.enabled, updated.enabled);
+
+    if (current.src_rgb_func != updated.src_rgb_func ||
+        current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+        current.dst_a_func != updated.dst_a_func) {
+        current.src_rgb_func = updated.src_rgb_func;
+        current.dst_rgb_func = updated.dst_rgb_func;
+        current.src_a_func = updated.src_a_func;
+        current.dst_a_func = updated.dst_a_func;
+        glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+                            updated.dst_a_func);
+    }
+
+    if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+        current.rgb_equation = updated.rgb_equation;
+        current.a_equation = updated.a_equation;
+        glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
+    }
+}
+
+void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
+    const Blend& updated = blend[target];
+    Blend& current = cur_state.blend[target];
+
+    if (current.enabled != updated.enabled || force) {
+        current.enabled = updated.enabled;
+        Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
+    }
+
+    if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+                           current.dst_a_func),
+                  std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+                           updated.dst_a_func))) {
+        glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
+                             updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
+    }
+
+    if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+                  std::tie(updated.rgb_equation, updated.a_equation))) {
+        glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
+                                 updated.a_equation);
+    }
+}
+
+void OpenGLState::ApplyBlending() const {
+    if (independant_blend.enabled) {
+        const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+        for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+            ApplyTargetBlending(target, force);
+        }
+    } else {
+        ApplyGlobalBlending();
+    }
+    cur_state.independant_blend.enabled = independant_blend.enabled;
+
+    if (UpdateTie(
+            std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+                     cur_state.blend_color.blue, cur_state.blend_color.alpha),
+            std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
+        glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
+    }
+}
+
+void OpenGLState::ApplyLogicOp() const {
+    Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
+
+    if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
+        glLogicOp(logic_op.operation);
+    }
+}
+
+void OpenGLState::ApplyPolygonOffset() const {
+    Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+           polygon_offset.fill_enable);
+    Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+           polygon_offset.line_enable);
+    Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+           polygon_offset.point_enable);
+
+    if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+                           cur_state.polygon_offset.clamp),
+                  std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
+        if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
+            glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
+        } else {
+            UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
+                                 "Unimplemented Depth polygon offset clamp.");
+            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
+        }
+    }
+}
+
+void OpenGLState::ApplyTextures() const {
+    bool has_delta{};
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Maxwell::NumTextureSamplers> textures;
+
+    for (std::size_t i = 0; i < std::size(texture_units); ++i) {
+        const auto& texture_unit = texture_units[i];
+        auto& cur_state_texture_unit = cur_state.texture_units[i];
+        textures[i] = texture_unit.texture;
+        if (cur_state_texture_unit.texture == textures[i])
+            continue;
+        cur_state_texture_unit.texture = textures[i];
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
+        }
+        last = i;
+    }
+    if (has_delta) {
+        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       textures.data() + first);
+    }
+}
+
+void OpenGLState::ApplySamplers() const {
+    bool has_delta{};
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
+    for (std::size_t i = 0; i < std::size(samplers); ++i) {
+        if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+            continue;
+        cur_state.texture_units[i].sampler = texture_units[i].sampler;
+        samplers[i] = texture_units[i].sampler;
+        if (!has_delta) {
+            first = i;
+            has_delta = true;
+        }
+        last = i;
+    }
+    if (has_delta) {
+        glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       samplers.data() + first);
+    }
+}
+
+void OpenGLState::Apply() const {
+    ApplyFramebufferState();
+    ApplyVertexArrayState();
+    ApplyShaderProgram();
+    ApplyProgramPipeline();
+    ApplyClipDistances();
+    ApplyPointSize();
+    ApplyFragmentColorClamp();
+    ApplyMultisample();
+    ApplyDepthClamp();
+    ApplyColorMask();
+    ApplyViewport();
+    ApplyStencilTest();
+    ApplySRgb();
+    ApplyCulling();
+    ApplyDepth();
+    ApplyPrimitiveRestart();
+    ApplyBlending();
+    ApplyLogicOp();
+    ApplyTextures();
+    ApplySamplers();
+    ApplyPolygonOffset();
+}
+
 void OpenGLState::EmulateViewportWithScissor() {
    auto& current = viewports[0];
    if (current.scissor.enabled) {
@@ -251,332 +555,6 @@ void OpenGLState::EmulateViewportWithScissor() {
    }
 }

-void OpenGLState::ApplyViewport() const {
-    if (geometry_shaders.enabled) {
-        for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
-             i++) {
-            const auto& current = cur_state.viewports[i];
-            const auto& updated = viewports[i];
-            if (updated.x != current.x || updated.y != current.y ||
-                updated.width != current.width || updated.height != current.height) {
-                glViewportIndexedf(
-                    i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
-                    static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
-            }
-            if (updated.depth_range_near != current.depth_range_near ||
-                updated.depth_range_far != current.depth_range_far) {
-                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
-            }
-
-            if (updated.scissor.enabled != current.scissor.enabled) {
-                if (updated.scissor.enabled) {
-                    glEnablei(GL_SCISSOR_TEST, i);
-                } else {
-                    glDisablei(GL_SCISSOR_TEST, i);
-                }
-            }
-
-            if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
-                updated.scissor.width != current.scissor.width ||
-                updated.scissor.height != current.scissor.height) {
-                glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                                 updated.scissor.height);
-            }
-        }
-    } else {
-        const auto& current = cur_state.viewports[0];
-        const auto& updated = viewports[0];
-        if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
-            updated.height != current.height) {
-            glViewport(updated.x, updated.y, updated.width, updated.height);
-        }
-
-        if (updated.depth_range_near != current.depth_range_near ||
-            updated.depth_range_far != current.depth_range_far) {
-            glDepthRange(updated.depth_range_near, updated.depth_range_far);
-        }
-
-        if (updated.scissor.enabled != current.scissor.enabled) {
-            if (updated.scissor.enabled) {
-                glEnable(GL_SCISSOR_TEST);
-            } else {
-                glDisable(GL_SCISSOR_TEST);
-            }
-        }
-
-        if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
-            updated.scissor.width != current.scissor.width ||
-            updated.scissor.height != current.scissor.height) {
-            glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
-                      updated.scissor.height);
-        }
-    }
-}
-
-void OpenGLState::ApplyGlobalBlending() const {
-    const Blend& current = cur_state.blend[0];
-    const Blend& updated = blend[0];
-    if (updated.enabled != current.enabled) {
-        if (updated.enabled) {
-            glEnable(GL_BLEND);
-        } else {
-            glDisable(GL_BLEND);
-        }
-    }
-    if (!updated.enabled) {
-        return;
-    }
-    if (updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
-        glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
-                            updated.dst_a_func);
-    }
-
-    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
-        glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
-    }
-}
-
-void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
-    const Blend& updated = blend[target];
-    const Blend& current = cur_state.blend[target];
-    if (updated.enabled != current.enabled || force) {
-        if (updated.enabled) {
-            glEnablei(GL_BLEND, static_cast<GLuint>(target));
-        } else {
-            glDisablei(GL_BLEND, static_cast<GLuint>(target));
-        }
-    }
-
-    if (updated.src_rgb_func != current.src_rgb_func ||
-        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
-        updated.dst_a_func != current.dst_a_func) {
-        glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
-                             updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
-    }
-
-    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
-        glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
-                                 updated.a_equation);
-    }
-}
-
-void OpenGLState::ApplyBlending() const {
-    if (independant_blend.enabled) {
-        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
-            ApplyTargetBlending(i,
-                                independant_blend.enabled != cur_state.independant_blend.enabled);
-        }
-    } else {
-        ApplyGlobalBlending();
-    }
-    if (blend_color.red != cur_state.blend_color.red ||
-        blend_color.green != cur_state.blend_color.green ||
-        blend_color.blue != cur_state.blend_color.blue ||
-        blend_color.alpha != cur_state.blend_color.alpha) {
-        glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
-    }
-}
-
-void OpenGLState::ApplyLogicOp() const {
-    if (logic_op.enabled != cur_state.logic_op.enabled) {
-        if (logic_op.enabled) {
-            glEnable(GL_COLOR_LOGIC_OP);
-        } else {
-            glDisable(GL_COLOR_LOGIC_OP);
-        }
-    }
-
-    if (logic_op.operation != cur_state.logic_op.operation) {
-        glLogicOp(logic_op.operation);
-    }
-}
-
-void OpenGLState::ApplyPolygonOffset() const {
-    const bool fill_enable_changed =
-        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
-    const bool line_enable_changed =
-        polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
-    const bool point_enable_changed =
-        polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
-    const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
-    const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
-    const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
-    if (fill_enable_changed) {
-        if (polygon_offset.fill_enable) {
-            glEnable(GL_POLYGON_OFFSET_FILL);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_FILL);
-        }
-    }
-
-    if (line_enable_changed) {
-        if (polygon_offset.line_enable) {
-            glEnable(GL_POLYGON_OFFSET_LINE);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_LINE);
-        }
-    }
-
-    if (point_enable_changed) {
-        if (polygon_offset.point_enable) {
-            glEnable(GL_POLYGON_OFFSET_POINT);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_POINT);
-        }
-    }
-
-    if (factor_changed || units_changed || clamp_changed) {
-        if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
-            glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
-        } else {
-            glPolygonOffset(polygon_offset.factor, polygon_offset.units);
-            UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
-                                 "Unimplemented Depth polygon offset clamp.");
-        }
-    }
-}
-
-void OpenGLState::ApplyTextures() const {
-    bool has_delta{};
-    std::size_t first{};
-    std::size_t last{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
-
-    for (std::size_t i = 0; i < std::size(texture_units); ++i) {
-        const auto& texture_unit = texture_units[i];
-        const auto& cur_state_texture_unit = cur_state.texture_units[i];
-        textures[i] = texture_unit.texture;
-
-        if (textures[i] != cur_state_texture_unit.texture) {
-            if (!has_delta) {
-                first = i;
-                has_delta = true;
-            }
-            last = i;
-        }
-    }
-
-    if (has_delta) {
-        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       textures.data() + first);
-    }
-}
-
-void OpenGLState::ApplySamplers() const {
-    bool has_delta{};
-    std::size_t first{};
-    std::size_t last{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
-    for (std::size_t i = 0; i < std::size(samplers); ++i) {
-        samplers[i] = texture_units[i].sampler;
-        if (samplers[i] != cur_state.texture_units[i].sampler) {
-            if (!has_delta) {
-                first = i;
-                has_delta = true;
-            }
-            last = i;
-        }
-    }
-    if (has_delta) {
-        glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       samplers.data() + first);
-    }
-}
-
-void OpenGLState::ApplyFramebufferState() const {
-    if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
-        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
-    }
-    if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
-        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
-    }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
-    if (draw.vertex_array != cur_state.draw.vertex_array) {
-        glBindVertexArray(draw.vertex_array);
-    }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
-    if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
-        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
-        return;
-    }
-    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
-                         "Unimplemented Depth Clamp Separation!");
-
-    if (depth_clamp.far_plane || depth_clamp.near_plane) {
-        glEnable(GL_DEPTH_CLAMP);
-    } else {
-        glDisable(GL_DEPTH_CLAMP);
-    }
-}
-
-void OpenGLState::Apply() const {
-    ApplyFramebufferState();
-    ApplyVertexArrayState();
-
-    // Shader program
-    if (draw.shader_program != cur_state.draw.shader_program) {
-        glUseProgram(draw.shader_program);
-    }
-
-    // Program pipeline
-    if (draw.program_pipeline != cur_state.draw.program_pipeline) {
-        glBindProgramPipeline(draw.program_pipeline);
-    }
-    // Clip distance
-    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
-        if (clip_distance[i] != cur_state.clip_distance[i]) {
-            if (clip_distance[i]) {
-                glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            } else {
-                glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
-            }
-        }
-    }
-    // Point
-    if (point.size != cur_state.point.size) {
-        glPointSize(point.size);
-    }
-    if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
-        glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
-                     fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
-    }
-    if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
-        if (multisample_control.alpha_to_coverage) {
-            glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
-        }
-    }
-    if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
-        if (multisample_control.alpha_to_one) {
-            glEnable(GL_SAMPLE_ALPHA_TO_ONE);
-        } else {
-            glDisable(GL_SAMPLE_ALPHA_TO_ONE);
-        }
-    }
-    ApplyDepthClamp();
-    ApplyColorMask();
-    ApplyViewport();
-    ApplyStencilTest();
-    ApplySRgb();
-    ApplyCulling();
-    ApplyDepth();
-    ApplyPrimitiveRestart();
-    ApplyBlending();
-    ApplyLogicOp();
-    ApplyTextures();
-    ApplySamplers();
-    ApplyPolygonOffset();
-    cur_state = *this;
-}
-
 OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
    for (auto& unit : texture_units) {
        if (unit.texture == handle) {
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -53,10 +53,6 @@ public:
        bool near_plane;
    } depth_clamp; // GL_DEPTH_CLAMP

-    struct {
-        bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
-    } geometry_shaders;
-
    struct {
        bool enabled;      // GL_CULL_FACE
        GLenum mode;       // GL_CULL_FACE_MODE
@@ -184,34 +180,26 @@ public:
    static OpenGLState GetCurState() {
        return cur_state;
    }
+
    static bool GetsRGBUsed() {
        return s_rgb_used;
    }
+
    static void ClearsRGBUsed() {
        s_rgb_used = false;
    }
+
    /// Apply this state as the current OpenGL state
    void Apply() const;
-    /// Apply only the state affecting the framebuffer
-    void ApplyFramebufferState() const;
-    /// Apply only the state affecting the vertex array
-    void ApplyVertexArrayState() const;
-    /// Set the initial OpenGL state
-    static void ApplyDefaultState();
-    /// Resets any references to the given resource
-    OpenGLState& UnbindTexture(GLuint handle);
-    OpenGLState& ResetSampler(GLuint handle);
-    OpenGLState& ResetProgram(GLuint handle);
-    OpenGLState& ResetPipeline(GLuint handle);
-    OpenGLState& ResetVertexArray(GLuint handle);
-    OpenGLState& ResetFramebuffer(GLuint handle);
-    void EmulateViewportWithScissor();

-private:
-    static OpenGLState cur_state;
-    // Workaround for sRGB problems caused by
-    // QT not supporting srgb output
-    static bool s_rgb_used;
+    void ApplyFramebufferState() const;
+    void ApplyVertexArrayState() const;
+    void ApplyShaderProgram() const;
+    void ApplyProgramPipeline() const;
+    void ApplyClipDistances() const;
+    void ApplyPointSize() const;
+    void ApplyFragmentColorClamp() const;
+    void ApplyMultisample() const;
    void ApplySRgb() const;
    void ApplyCulling() const;
    void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
    void ApplySamplers() const;
    void ApplyDepthClamp() const;
    void ApplyPolygonOffset() const;
+
+    /// Set the initial OpenGL state
+    static void ApplyDefaultState();
+
+    /// Resets any references to the given resource
+    OpenGLState& UnbindTexture(GLuint handle);
+    OpenGLState& ResetSampler(GLuint handle);
+    OpenGLState& ResetProgram(GLuint handle);
+    OpenGLState& ResetPipeline(GLuint handle);
+    OpenGLState& ResetVertexArray(GLuint handle);
+    OpenGLState& ResetFramebuffer(GLuint handle);
+
+    /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+    void EmulateViewportWithScissor();
+
+private:
+    static OpenGLState cur_state;
+
+    // Workaround for sRGB problems caused by QT not supporting srgb output
+    static bool s_rgb_used;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,11 +5,39 @@
 #include <string>
 #include <fmt/format.h>
 #include <glad/glad.h>
+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/utils.h"

 namespace OpenGL {

+BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
+
+BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
+
+void BindBuffersRangePushBuffer::Setup(GLuint first_) {
+    first = first_;
+    buffers.clear();
+    offsets.clear();
+    sizes.clear();
+}
+
+void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) {
+    buffers.push_back(buffer);
+    offsets.push_back(offset);
+    sizes.push_back(size);
+}
+
+void BindBuffersRangePushBuffer::Bind() const {
+    const std::size_t count{buffers.size()};
+    DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
+    if (count == 0) {
+        return;
+    }
+    glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
+                       sizes.data());
+}
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
    if (!GLAD_GL_KHR_debug) {
        return; // We don't need to throw an error as this is just for debugging
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -5,11 +5,31 @@
 #pragma once

 #include <string>
+#include <vector>
 #include <glad/glad.h>
 #include "common/common_types.h"

 namespace OpenGL {

+class BindBuffersRangePushBuffer {
+public:
+    BindBuffersRangePushBuffer(GLenum target);
+    ~BindBuffersRangePushBuffer();
+
+    void Setup(GLuint first_);
+
+    void Push(GLuint buffer, GLintptr offset, GLsizeiptr size);
+
+    void Bind() const;
+
+private:
+    GLenum target;
+    GLuint first;
+    std::vector<GLuint> buffers;
+    std::vector<GLintptr> offsets;
+    std::vector<GLsizeiptr> sizes;
+};
+
 void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");

 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -10,6 +10,7 @@
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "core/memory.h"
+#include "video_core/memory_manager.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include <sirit/sirit.h>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+}
+
+namespace Vulkan::VKShader {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using SamplerEntry = VideoCommon::Shader::Sampler;
+
+constexpr u32 DESCRIPTOR_SET = 0;
+
+class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
+public:
+    explicit constexpr ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, u32 index)
+        : VideoCommon::Shader::ConstBuffer{entry}, index{index} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+class GlobalBufferEntry {
+public:
+    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+
+    u32 GetCbufIndex() const {
+        return cbuf_index;
+    }
+
+    u32 GetCbufOffset() const {
+        return cbuf_offset;
+    }
+
+private:
+    u32 cbuf_index{};
+    u32 cbuf_offset{};
+};
+
+struct ShaderEntries {
+    u32 const_buffers_base_binding{};
+    u32 global_buffers_base_binding{};
+    u32 samplers_base_binding{};
+    std::vector<ConstBufferEntry> const_buffers;
+    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<SamplerEntry> samplers;
+    std::set<u32> attributes;
+    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    std::size_t shader_length{};
+    Sirit::Id entry_function{};
+    std::vector<Sirit::Id> interfaces;
+};
+
+using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+
+DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+
+} // namespace Vulkan::VKShader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
 #include <fmt/format.h>

 #include "common/assert.h"
+#include "common/bit_field.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {

    switch (opcode->get().GetId()) {
    case OpCode::Id::TEX: {
-        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
        }

        const TextureType texture_type{instr.tex.texture_type};
        const bool is_array = instr.tex.array != 0;
+        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
        const auto process_mode = instr.tex.GetTextureProcessMode();
        WriteTexInstructionFloat(
-            bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
+            bb, instr,
+            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
        break;
    }
    case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
    }
    case OpCode::Id::TLD4: {
        ASSERT(instr.tld4.array == 0);
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
                             "NDV is not implemented");
        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        const auto texture_type = instr.tld4.texture_type.Value();
        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
        const bool is_array = instr.tld4.array != 0;
-        WriteTexInstructionFloat(bb, instr,
-                                 GetTld4Code(instr, texture_type, depth_compare, is_array));
+        const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        WriteTexInstructionFloat(
+            bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
        break;
    }
    case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
-                MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
                SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {

        for (u32 element = 0; element < 2; ++element) {
            auto params = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporal(bb, element, value);
        }
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,

 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               TextureProcessMode process_mode, std::vector<Node> coords,
-                               Node array, Node depth_compare, u32 bias_offset) {
+                               Node array, Node depth_compare, u32 bias_offset,
+                               std::vector<Node> aoffi) {
    const bool is_array = array;
    const bool is_shadow = depth_compare;

@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto copy_coords = coords;
-        MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
+        MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 }

 Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
-                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled =
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                           bool is_aoffi) {
+    const bool lod_bias_enabled{
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+    u64 parameter_register = instr.gpr20.Value();
+    if (lod_bias_enabled) {
+        ++parameter_register;
+    }

    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,

    const Node array = is_array ? GetRegister(array_register) : nullptr;

+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+    }
+
    Node dc{};
    if (depth_compare) {
        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
        // or bias are used
-        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        dc = GetRegister(depth_register);
+        dc = GetRegister(parameter_register++);
    }

-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
 }

 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        dc = GetRegister(depth_register);
    }

-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
 }

 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array) {
+                            bool is_array, bool is_aoffi) {
    const std::size_t coord_count = GetCoordCount(texture_type);
    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    const u64 coord_register = array_register + (is_array ? 1 : 0);

    std::vector<Node> coords;
-    for (size_t i = 0; i < coord_count; ++i)
+    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
+    }
+
+    u64 parameter_register = instr.gpr20.Value();
+    std::vector<Node> aoffi;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    }
+
+    Node dc{};
+    if (depth_compare) {
+        dc = GetRegister(parameter_register++);
+    }

    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }

@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
    return {coord_count, total_coord_count};
 }

-} // namespace VideoCommon::Shader
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+                                                bool is_tld4) {
+    const auto [coord_offsets, size, wrap_value,
+                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
+        if (is_tld4) {
+            return {{0, 8, 16}, 6, 32, 64};
+        } else {
+            return {{0, 4, 8}, 4, 8, 16};
+        }
+    }();
+    const u32 mask = (1U << size) - 1;
+
+    std::vector<Node> aoffi;
+    aoffi.reserve(coord_count);
+
+    const auto aoffi_immediate{
+        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+    if (!aoffi_immediate) {
+        // Variable access, not supported on AMD.
+        LOG_WARNING(HW_GPU,
+                    "AOFFI constant folding failed, some hardware might have graphical issues");
+        for (std::size_t coord = 0; coord < coord_count; ++coord) {
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return aoffi;
+    }
+
+    for (std::size_t coord = 0; coord < coord_count; ++coord) {
+        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        if (value >= wrap_value) {
+            value -= diff_value;
+        }
+        aoffi.push_back(Immediate(value));
+    }
+    return aoffi;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
    const bool is_signed_b = instr.xmad.sign_b == 1;
    const bool is_signed_c = is_signed_a;

-    auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
+    auto [is_merge, is_psl, is_high_b, mode, op_b,
+          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::XMAD_CR:
            return {instr.xmad.merge_56,
+                    instr.xmad.product_shift_left_second,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    GetRegister(instr.gpr39)};
        case OpCode::Id::XMAD_RR:
-            return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
        case OpCode::Id::XMAD_RC:
-            return {false, GetRegister(instr.gpr39),
+            return {false,
+                    false,
+                    instr.xmad.high_b,
+                    instr.xmad.mode_cbf,
+                    GetRegister(instr.gpr39),
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::XMAD_IMM:
-            return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+            return {instr.xmad.merge_37,
+                    instr.xmad.product_shift_left,
+                    false,
+                    instr.xmad.mode,
+                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                    GetRegister(instr.gpr39)};
        }
        UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
-        return {false, Immediate(0), Immediate(0)};
+        return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
    }();

    op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);

    const Node original_b = op_b;
-    op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
+    op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);

    // TODO(Rodrigo): Use an appropiate sign for this operation
    Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
-    if (instr.xmad.product_shift_left) {
+    if (is_psl) {
        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
    }
+    SetTemporal(bb, 0, product);
+    product = GetTemporal(0);

    const Node original_c = op_c;
+    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
    op_c = [&]() {
-        switch (instr.xmad.mode) {
+        switch (set_mode) {
        case Tegra::Shader::XmadMode::None:
            return original_c;
        case Tegra::Shader::XmadMode::CLo:
@@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
        }
    }();

+    SetTemporal(bb, 1, op_c);
+    op_c = GetTemporal(1);
+
    // TODO(Rodrigo): Use an appropiate sign for this operation
    Node sum = Operation(OperationCode::IAdd, product, op_c);
+    SetTemporal(bb, 2, sum);
+    sum = GetTemporal(2);
    if (is_merge) {
        const Node a = BitfieldExtract(sum, 0, 16);
        const Node b =
@@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <cstring>
 #include <map>
+#include <optional>
 #include <set>
 #include <string>
 #include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
    const Sampler& sampler;
    Node array{};
    Node depth_compare{};
+    std::vector<Node> aoffi;
    Node bias{};
    Node lod{};
    Node component{};
@@ -741,14 +743,14 @@ private:

    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                     bool is_array);
+                     bool is_array, bool is_aoffi);

    Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
                      bool is_array);

    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array);
+                      bool depth_compare, bool is_array, bool is_aoffi);

    Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      bool is_array);
@@ -757,9 +759,11 @@ private:
        Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);

+    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+
    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
-                         Node array, Node depth_compare, u32 bias_offset);
+                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);

    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                         u64 byte_height);
@@ -773,6 +777,8 @@ private:

    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);

+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+
    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);

    template <typename... T>
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
 #include <utility>
 #include <variant>

+#include "common/common_types.h"
 #include "video_core/shader/shader_ir.h"

 namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                   OperationCode operation_code) {
    for (; cursor >= 0; --cursor) {
-        const Node node = code[cursor];
+        const Node node = code.at(cursor);
        if (const auto operation = std::get_if<OperationNode>(node)) {
            if (operation->GetCode() == operation_code)
                return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
    return nullptr;
 }

+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
+    // that it uses as operand
+    const auto [found, found_cursor] =
+        TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+    if (!found) {
+        return {};
+    }
+    if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+        return immediate->GetValue();
+    }
+    return {};
+}
+
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                             s64 cursor) {
    for (; cursor >= 0; --cursor) {
--- a/Show More
+++ b/Show More