Reorder variables to comply with the Auzure build pipeline

Add game versio to title bar
2020-06-22 15:56:41 +02:00 · 2020-06-08 23:58:04 +02:00
128 changed files with 1786 additions and 4711 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,9 +13,6 @@
 [submodule "soundtouch"]
    path = externals/soundtouch
    url = https://github.com/citra-emu/ext-soundtouch.git
-[submodule "libressl"]
-    path = externals/libressl
-    url = https://github.com/citra-emu/ext-libressl-portable.git
 [submodule "discord-rpc"]
    path = externals/discord-rpc
    url = https://github.com/discordapp/discord-rpc.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,6 +152,7 @@ macro(yuzu_find_packages)
        "Boost             1.71        boost/1.72.0"
        "Catch2            2.11        catch2/2.11.0"
        "fmt               6.2         fmt/6.2.0"
+        "OpenSSL           1.1         openssl/1.1.1f"
    # can't use until https://github.com/bincrafters/community/issues/1173
        #"libzip            1.5         libzip/1.5.2@bincrafters/stable"
        "lz4               1.8         lz4/1.9.2"
@@ -311,6 +312,15 @@ elseif (TARGET Boost::boost)
    add_library(boost ALIAS Boost::boost)
 endif()

+if (NOT TARGET OpenSSL::SSL)
+    set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
+    add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
+endif()
+if (NOT TARGET OpenSSL::Crypto)
+    set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
+    add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
+endif()
+
 if (TARGET sdl2::sdl2)
    # imported from the conan generated sdl2Config.cmake
    set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -51,8 +51,6 @@ endif()
 # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
 set(VIDEO_CORE "${SRC_DIR}/src/video_core")
 set(HASH_FILES
-    "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
-    "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -673,6 +673,10 @@ QTabWidget::pane {
    border-bottom-left-radius: 2px;
 }

+QTabWidget::tab-bar {
+    overflow: visible;
+}
+
 QTabBar {
    qproperty-drawBase: 0;
    border-radius: 3px;
--- a/dist/yuzu.manifest
+++ b/dist/yuzu.manifest
@@ -1,58 +1,24 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<assembly manifestVersion="1.0"
-    xmlns="urn:schemas-microsoft-com:asm.v1"
-    xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
-  <asmv3:application>
-    <asmv3:windowsSettings>
-      <!-- Windows 7/8/8.1/10 -->
-      <dpiAware
-        xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">
-        true/pm
-      </dpiAware>
-      <!-- Windows 10, version 1607 or later -->
-      <dpiAwareness
-        xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
-        PerMonitorV2
-      </dpiAwareness>
-      <!-- Windows 10, version 1703 or later -->
-      <gdiScaling
-          xmlns="http://schemas.microsoft.com/SMI/2017/WindowsSettings">
-        true
-      </gdiScaling>
-      <ws2:longPathAware
-          xmlns:ws3="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
-        true
-      </ws2:longPathAware>
-    </asmv3:windowsSettings>
-  </asmv3:application>
-  <compatibility
-      xmlns="urn:schemas-microsoft-com:compatibility.v1">
-    <application>
-      <!-- Windows 10 -->
-      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
-      <!-- Windows 8.1 -->
-      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
-      <!-- Windows 8 -->
-      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
-      <!-- Windows 7 -->
-      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
-    </application>
-  </compatibility>
-  <trustInfo
-      xmlns="urn:schemas-microsoft-com:asm.v3">
-    <security>
-      <requestedPrivileges>
-        <!--
-          UAC settings:
-          - app should run at same integrity level as calling process
-          - app does not need to manipulate windows belonging to
-            higher-integrity-level processes
-          -->
-        <requestedExecutionLevel
-            level="asInvoker"
-            uiAccess="false"
-        />
-      </requestedPrivileges>
-    </security>
-  </trustInfo>
-</assembly>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+ <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+  <security>
+   <requestedPrivileges>
+    <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
+   </requestedPrivileges>
+  </security>
+ </trustInfo>
+ <application xmlns="urn:schemas-microsoft-com:asm.v3">
+  <windowsSettings>
+   <dpiAware xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">True/PM</dpiAware>
+   <longPathAware xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</longPathAware>
+  </windowsSettings>
+ </application>
+ <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
+  <application>
+   <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
+   <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
+   <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
+   <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
+  </application>
+ </compatibility>
+</assembly>
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -4,13 +4,6 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
 include(DownloadExternals)

-# xbyak
-if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
-    add_library(xbyak INTERFACE)
-    target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
-    target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
-endif()
-
 # Catch
 add_library(catch-single-include INTERFACE)
 target_include_directories(catch-single-include INTERFACE catch/single_include)
@@ -73,15 +66,6 @@ if (NOT LIBZIP_FOUND)
 endif()

 if (ENABLE_WEB_SERVICE)
-    # LibreSSL
-    set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
-    add_subdirectory(libressl EXCLUDE_FROM_ALL)
-    target_include_directories(ssl INTERFACE ./libressl/include)
-    target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
-    get_directory_property(OPENSSL_LIBRARIES
-        DIRECTORY libressl
-        DEFINITION OPENSSL_LIBS)
-
    # lurlparser
    add_subdirectory(lurlparser EXCLUDE_FROM_ALL)

@@ -89,5 +73,13 @@ if (ENABLE_WEB_SERVICE)
    add_library(httplib INTERFACE)
    target_include_directories(httplib INTERFACE ./httplib)
    target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
-    target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
+    target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
+endif()
+
+if (NOT TARGET xbyak)
+    if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
+        add_library(xbyak INTERFACE)
+        target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
+        target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
+    endif()
 endif()
--- a/externals/libressl
+++ b/externals/libressl
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -180,12 +180,11 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<

    // Copy output header
    UpdateDataHeader response_data{worker_params};
-    if (behavior_info.IsElapsedFrameCountSupported()) {
-        response_data.render_info = sizeof(RendererInfo);
-        response_data.total_size += sizeof(RendererInfo);
-    }
-
    std::vector<u8> output_params(response_data.total_size);
+    if (behavior_info.IsElapsedFrameCountSupported()) {
+        response_data.frame_count = 0x10;
+        response_data.total_size += 0x10;
+    }
    std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));

    // Copy output memory pool entries
@@ -220,17 +219,6 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
        return Audren::ERR_INVALID_PARAMETERS;
    }

-    if (behavior_info.IsElapsedFrameCountSupported()) {
-        const std::size_t renderer_info_offset{
-            sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
-            response_data.effects_size + response_data.sinks_size +
-            response_data.performance_manager_size + response_data.behavior_size};
-        RendererInfo renderer_info{};
-        renderer_info.elasped_frame_count = elapsed_frame_count;
-        std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
-                    sizeof(RendererInfo));
-    }
-
    return MakeResult(output_params);
 }

@@ -459,7 +447,6 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
        }
    }
    audio_out->QueueBuffer(stream, tag, std::move(buffer));
-    elapsed_frame_count++;
 }

 void AudioRenderer::ReleaseAndQueueBuffers() {
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -196,12 +196,6 @@ struct EffectOutStatus {
 };
 static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");

-struct RendererInfo {
-    u64_le elasped_frame_count{};
-    INSERT_PADDING_WORDS(2);
-};
-static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
-
 struct UpdateDataHeader {
    UpdateDataHeader() {}

@@ -215,7 +209,7 @@ struct UpdateDataHeader {
        mixes_size = 0x0;
        sinks_size = config.sink_count * 0x20;
        performance_manager_size = 0x10;
-        render_info = 0;
+        frame_count = 0;
        total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
                     effects_size + sinks_size + performance_manager_size;
    }
@@ -229,8 +223,8 @@ struct UpdateDataHeader {
    u32_le mixes_size{};
    u32_le sinks_size{};
    u32_le performance_manager_size{};
-    u32_le splitter_size{};
-    u32_le render_info{};
+    INSERT_PADDING_WORDS(1);
+    u32_le frame_count{};
    INSERT_PADDING_WORDS(4);
    u32_le total_size{};
 };
@@ -264,7 +258,6 @@ private:
    std::unique_ptr<AudioOut> audio_out;
    StreamPtr stream;
    Core::Memory::Memory& memory;
-    std::size_t elapsed_frame_count{};
 };

 } // namespace AudioCore
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,8 +32,6 @@ add_custom_command(OUTPUT scm_rev.cpp
    DEPENDS
      # WARNING! It was too much work to try and make a common location for this list,
      # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
-      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
-      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -60,7 +60,6 @@ void AppendCPUInfo(FieldCollection& fc) {
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
-    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -110,11 +110,6 @@ static CPUCaps Detect() {
                caps.bmi1 = true;
            if ((cpu_id[1] >> 8) & 1)
                caps.bmi2 = true;
-            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
-            if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
-                (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
-                caps.avx512 = caps.avx2;
-            }
        }
    }

--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -19,7 +19,6 @@ struct CPUCaps {
    bool lzcnt;
    bool avx;
    bool avx2;
-    bool avx512;
    bool bmi1;
    bool bmi2;
    bool fma;
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@

 namespace Common::X64 {

-inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
+inline int RegToIndex(const Xbyak::Reg& reg) {
    using Kind = Xbyak::Reg::Kind;
    ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
               "RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
    return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
 }

-inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
+inline Xbyak::Reg64 IndexToReg64(int reg_index) {
    ASSERT(reg_index < 16);
-    return Xbyak::Reg64(static_cast<int>(reg_index));
+    return Xbyak::Reg64(reg_index);
 }

-inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
+inline Xbyak::Xmm IndexToXmm(int reg_index) {
    ASSERT(reg_index >= 16 && reg_index < 32);
-    return Xbyak::Xmm(static_cast<int>(reg_index - 16));
+    return Xbyak::Xmm(reg_index - 16);
 }

-inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
+inline Xbyak::Reg IndexToReg(int reg_index) {
    if (reg_index < 16) {
        return IndexToReg64(reg_index);
    } else {
@@ -151,13 +151,9 @@ constexpr size_t ABI_SHADOW_SPACE = 0;

 #endif

-struct ABIFrameInfo {
-    s32 subtraction;
-    s32 xmm_offset;
-};
-
-inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
-                                           size_t needed_frame_size) {
+inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+                                   size_t needed_frame_size, s32* out_subtraction,
+                                   s32* out_xmm_offset) {
    const auto count = (regs & ABI_ALL_GPRS).count();
    rsp_alignment -= count * 8;
    size_t subtraction = 0;
@@ -174,28 +170,33 @@ inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alig
    rsp_alignment -= subtraction;
    subtraction += rsp_alignment & 0xF;

-    return ABIFrameInfo{static_cast<s32>(subtraction),
-                        static_cast<s32>(subtraction - xmm_base_subtraction)};
+    *out_subtraction = (s32)subtraction;
+    *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
 }

 inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
                                              size_t rsp_alignment, size_t needed_frame_size = 0) {
-    auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
-
+    s32 subtraction, xmm_offset;
+    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
    for (std::size_t i = 0; i < regs.size(); ++i) {
        if (regs[i] && ABI_ALL_GPRS[i]) {
+            code.push(IndexToReg64(static_cast<int>(i)));
+        }
+    }
+    if (subtraction != 0) {
+        code.sub(code.rsp, subtraction);
+    }
+
+    for (int i = 0; i < regs.count(); i++) {
+        if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
            code.push(IndexToReg64(i));
        }
    }

-    if (frame_info.subtraction != 0) {
-        code.sub(code.rsp, frame_info.subtraction);
-    }
-
    for (std::size_t i = 0; i < regs.size(); ++i) {
        if (regs[i] && ABI_ALL_XMMS[i]) {
-            code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
-            frame_info.xmm_offset += 0x10;
+            code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
+            xmm_offset += 0x10;
        }
    }

@@ -204,23 +205,59 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b

 inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
                                           size_t rsp_alignment, size_t needed_frame_size = 0) {
-    auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+    s32 subtraction, xmm_offset;
+    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);

    for (std::size_t i = 0; i < regs.size(); ++i) {
        if (regs[i] && ABI_ALL_XMMS[i]) {
-            code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
-            frame_info.xmm_offset += 0x10;
+            code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
+            xmm_offset += 0x10;
        }
    }

-    if (frame_info.subtraction != 0) {
-        code.add(code.rsp, frame_info.subtraction);
+    if (subtraction != 0) {
+        code.add(code.rsp, subtraction);
    }

    // GPRs need to be popped in reverse order
-    for (std::size_t j = 0; j < regs.size(); ++j) {
-        const std::size_t i = regs.size() - j - 1;
+    for (int i = 15; i >= 0; i--) {
+        if (regs[i]) {
+            code.pop(IndexToReg64(i));
+        }
+    }
+}
+
+inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+                                                 size_t rsp_alignment,
+                                                 size_t needed_frame_size = 0) {
+    s32 subtraction, xmm_offset;
+    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+
+    for (std::size_t i = 0; i < regs.size(); ++i) {
        if (regs[i] && ABI_ALL_GPRS[i]) {
+            code.push(IndexToReg64(static_cast<int>(i)));
+        }
+    }
+
+    if (subtraction != 0) {
+        code.sub(code.rsp, subtraction);
+    }
+
+    return ABI_SHADOW_SPACE;
+}
+
+inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+                                              size_t rsp_alignment, size_t needed_frame_size = 0) {
+    s32 subtraction, xmm_offset;
+    ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
+
+    if (subtraction != 0) {
+        code.add(code.rsp, subtraction);
+    }
+
+    // GPRs need to be popped in reverse order
+    for (int i = 15; i >= 0; i--) {
+        if (regs[i]) {
            code.pop(IndexToReg64(i));
        }
    }
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -606,11 +606,11 @@ endif()
 create_target_directory_groups(core)

 target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
-target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip)
+target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn)

 if (YUZU_ENABLE_BOXCAT)
    target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
-    target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json)
+    target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json zip)
 endif()

 if (ENABLE_WEB_SERVICE)
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -50,8 +50,7 @@ public:
    }

    void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
-        UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
-                          MemoryReadCode(pc));
+        UNIMPLEMENTED();
    }

    void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -62,7 +61,7 @@ public:
        case Dynarmic::A32::Exception::Breakpoint:
            break;
        }
-        LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
+        LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
                     static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
        UNIMPLEMENTED();
    }
@@ -90,6 +89,8 @@ public:

    ARM_Dynarmic_32& parent;
    std::size_t num_interpreted_instructions{};
+    u64 tpidrro_el0{};
+    u64 tpidr_el0{};
 };

 std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -98,7 +99,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
    config.callbacks = cb.get();
    // TODO(bunnei): Implement page table for 32-bit
    // config.page_table = &page_table.pointers;
-    config.coprocessors[15] = cp15;
+    config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
    config.define_unpredictable_behaviour = true;
    return std::make_unique<Dynarmic::A32::Jit>(config);
 }
@@ -111,13 +112,13 @@ void ARM_Dynarmic_32::Run() {
 }

 void ARM_Dynarmic_32::Step() {
-    jit->Step();
+    cb->InterpreterFallback(jit->Regs()[15], 1);
 }

 ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
                                 std::size_t core_index)
-    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
-      cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
+    : ARM_Interface{system},
+      cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

 ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -153,19 +154,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
 }

 u64 ARM_Dynarmic_32::GetTlsAddress() const {
-    return cp15->uro;
+    return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
 }

 void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
-    cp15->uro = static_cast<u32>(address);
+    CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
 }

 u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
-    return cp15->uprw;
+    return cb->tpidr_el0;
 }

 void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
-    cp15->uprw = static_cast<u32>(value);
+    cb->tpidr_el0 = value;
 }

 void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -22,7 +22,6 @@ class Memory;
 namespace Core {

 class DynarmicCallbacks32;
-class DynarmicCP15;
 class DynarmicExclusiveMonitor;
 class System;

@@ -67,14 +66,12 @@ private:
        std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;

    friend class DynarmicCallbacks32;
-    friend class DynarmicCP15;
-
    std::unique_ptr<DynarmicCallbacks32> cb;
    JitCacheType jit_cache;
    std::shared_ptr<Dynarmic::A32::Jit> jit;
-    std::shared_ptr<DynarmicCP15> cp15;
    std::size_t core_index;
    DynarmicExclusiveMonitor& exclusive_monitor;
+    std::array<u32, 84> CP15_regs{};
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -98,8 +98,8 @@ public:
            }
            [[fallthrough]];
        default:
-            ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
-                       static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
+            ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})",
+                       static_cast<std::size_t>(exception), pc);
        }
    }

--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,132 +2,79 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <fmt/format.h>
-#include "common/logging/log.h"
-#include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
-#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"

 using Callback = Dynarmic::A32::Coprocessor::Callback;
 using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
 using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;

-template <>
-struct fmt::formatter<Dynarmic::A32::CoprocReg> {
-    constexpr auto parse(format_parse_context& ctx) {
-        return ctx.begin();
-    }
-    template <typename FormatContext>
-    auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
-        return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
-    }
-};
-
-namespace Core {
-
-static u32 dummy_value;
-
 std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
                                                               CoprocReg CRd, CoprocReg CRn,
                                                               CoprocReg CRm, unsigned opc2) {
-    LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
-                 CRm, opc2);
    return {};
 }

 CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
                                                         CoprocReg CRm, unsigned opc2) {
+    // TODO(merry): Privileged CP15 registers
+
    if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
-        // CP15_FLUSH_PREFETCH_BUFFER
        // This is a dummy write, we ignore the value written here.
-        return &dummy_value;
+        return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
    }

    if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
        switch (opc2) {
        case 4:
-            // CP15_DATA_SYNC_BARRIER
            // This is a dummy write, we ignore the value written here.
-            return &dummy_value;
+            return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
        case 5:
-            // CP15_DATA_MEMORY_BARRIER
            // This is a dummy write, we ignore the value written here.
-            return &dummy_value;
+            return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
+        default:
+            return {};
        }
    }

    if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
-        // CP15_THREAD_UPRW
-        return &uprw;
+        return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
    }

-    LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
-                 opc2);
    return {};
 }

 CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
-    LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
    return {};
 }

 CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
                                                        CoprocReg CRm, unsigned opc2) {
+    // TODO(merry): Privileged CP15 registers
+
    if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
        switch (opc2) {
        case 2:
-            // CP15_THREAD_UPRW
-            return &uprw;
+            return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
        case 3:
-            // CP15_THREAD_URO
-            return &uro;
+            return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
+        default:
+            return {};
        }
    }

-    LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
-                 opc2);
    return {};
 }

 CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
-    if (!two && opc == 0 && CRm == CoprocReg::C14) {
-        // CNTPCT
-        const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
-            [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
-                ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
-                return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
-            });
-        return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
-    }
-
-    LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
    return {};
 }

 std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
                                                       std::optional<u8> option) {
-    if (option) {
-        LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
-                     long_transfer ? "l" : "", CRd, *option);
-    } else {
-        LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
-                     long_transfer ? "l" : "", CRd);
-    }
    return {};
 }

 std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
                                                        std::optional<u8> option) {
-    if (option) {
-        LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
-                     long_transfer ? "l" : "", CRd, *option);
-    } else {
-        LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
-                     long_transfer ? "l" : "", CRd);
-    }
    return {};
 }
-
-} // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,15 +10,128 @@
 #include <dynarmic/A32/coprocessor.h>
 #include "common/common_types.h"

-namespace Core {
+enum class CP15Register {
+    // c0 - Information registers
+    CP15_MAIN_ID,
+    CP15_CACHE_TYPE,
+    CP15_TCM_STATUS,
+    CP15_TLB_TYPE,
+    CP15_CPU_ID,
+    CP15_PROCESSOR_FEATURE_0,
+    CP15_PROCESSOR_FEATURE_1,
+    CP15_DEBUG_FEATURE_0,
+    CP15_AUXILIARY_FEATURE_0,
+    CP15_MEMORY_MODEL_FEATURE_0,
+    CP15_MEMORY_MODEL_FEATURE_1,
+    CP15_MEMORY_MODEL_FEATURE_2,
+    CP15_MEMORY_MODEL_FEATURE_3,
+    CP15_ISA_FEATURE_0,
+    CP15_ISA_FEATURE_1,
+    CP15_ISA_FEATURE_2,
+    CP15_ISA_FEATURE_3,
+    CP15_ISA_FEATURE_4,

-class ARM_Dynarmic_32;
+    // c1 - Control registers
+    CP15_CONTROL,
+    CP15_AUXILIARY_CONTROL,
+    CP15_COPROCESSOR_ACCESS_CONTROL,
+
+    // c2 - Translation table registers
+    CP15_TRANSLATION_BASE_TABLE_0,
+    CP15_TRANSLATION_BASE_TABLE_1,
+    CP15_TRANSLATION_BASE_CONTROL,
+    CP15_DOMAIN_ACCESS_CONTROL,
+    CP15_RESERVED,
+
+    // c5 - Fault status registers
+    CP15_FAULT_STATUS,
+    CP15_INSTR_FAULT_STATUS,
+    CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
+    CP15_INST_FSR,
+
+    // c6 - Fault Address registers
+    CP15_FAULT_ADDRESS,
+    CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
+    CP15_WFAR,
+    CP15_IFAR,
+
+    // c7 - Cache operation registers
+    CP15_WAIT_FOR_INTERRUPT,
+    CP15_PHYS_ADDRESS,
+    CP15_INVALIDATE_INSTR_CACHE,
+    CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
+    CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
+    CP15_FLUSH_PREFETCH_BUFFER,
+    CP15_FLUSH_BRANCH_TARGET_CACHE,
+    CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
+    CP15_INVALIDATE_DATA_CACHE,
+    CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
+    CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
+    CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
+    CP15_CLEAN_DATA_CACHE,
+    CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
+    CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
+    CP15_DATA_SYNC_BARRIER,
+    CP15_DATA_MEMORY_BARRIER,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
+    CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
+
+    // c8 - TLB operations
+    CP15_INVALIDATE_ITLB,
+    CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
+    CP15_INVALIDATE_DTLB,
+    CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
+    CP15_INVALIDATE_UTLB,
+    CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
+    CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
+    CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
+
+    // c9 - Data cache lockdown register
+    CP15_DATA_CACHE_LOCKDOWN,
+
+    // c10 - TLB/Memory map registers
+    CP15_TLB_LOCKDOWN,
+    CP15_PRIMARY_REGION_REMAP,
+    CP15_NORMAL_REGION_REMAP,
+
+    // c13 - Thread related registers
+    CP15_PID,
+    CP15_CONTEXT_ID,
+    CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
+    CP15_THREAD_URO,  // Thread ID register - User Read Only (Privileged R/W)
+    CP15_THREAD_PRW,  // Thread ID register - Privileged R/W only.
+
+    // c15 - Performance and TLB lockdown registers
+    CP15_PERFORMANCE_MONITOR_CONTROL,
+    CP15_CYCLE_COUNTER,
+    CP15_COUNT_0,
+    CP15_COUNT_1,
+    CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
+    CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
+    CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
+    CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
+    CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
+    CP15_TLB_DEBUG_CONTROL,
+
+    // Skyeye defined
+    CP15_TLB_FAULT_ADDR,
+    CP15_TLB_FAULT_STATUS,
+
+    // Not an actual register.
+    // All registers should be defined above this.
+    CP15_REGISTER_COUNT,
+};

 class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
 public:
    using CoprocReg = Dynarmic::A32::CoprocReg;

-    explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
+    explicit DynarmicCP15(u32* cp15) : CP15(cp15){};

    std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
                                                     CoprocReg CRn, CoprocReg CRm,
@@ -34,9 +147,6 @@ public:
    std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
                                              std::optional<u8> option) override;

-    ARM_Dynarmic_32& parent;
-    u32 uprw;
-    u32 uro;
+private:
+    u32* CP15{};
 };
-
-} // namespace Core
--- a/src/core/file_sys/system_archive/mii_model.cpp
+++ b/src/core/file_sys/system_archive/mii_model.cpp
@@ -40,7 +40,7 @@ VirtualDir MiiModel() {
    out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
        MiiModelData::SHAPE_MID, "ShapeMid.dat"));

-    return out;
+    return std::move(out);
 }

 } // namespace FileSys::SystemArchive
--- a/src/core/file_sys/system_archive/shared_font.cpp
+++ b/src/core/file_sys/system_archive/shared_font.cpp
@@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name)

    std::vector<u8> bfttf(Size + sizeof(u64));

-    size_t offset = 0;
+    u64 offset = 0;
    Service::NS::EncryptSharedFont(vec, bfttf, offset);
    return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
 }
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
    // Ensure that we don't leave anything un-freed
    auto group_guard = detail::ScopeExit([&] {
        for (const auto& it : page_list.Nodes()) {
-            const auto min_num_pages{std::min<size_t>(
+            const auto min_num_pages{std::min(
                it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
            chosen_manager.Free(it.GetAddress(), min_num_pages);
        }
@@ -165,7 +165,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,

    // Free all of the pages
    for (const auto& it : page_list.Nodes()) {
-        const auto min_num_pages{std::min<size_t>(
+        const auto min_num_pages{std::min(
            it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
        chosen_manager.Free(it.GetAddress(), min_num_pages);
    }
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -132,8 +132,7 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {

 u64 Process::GetTotalPhysicalMemoryAvailable() const {
    const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
-                       page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
-                       main_thread_stack_size};
+                       page_table->GetTotalHeapSize() + image_size + main_thread_stack_size};

    if (capacity < memory_usage_capacity) {
        return capacity;
@@ -147,8 +146,7 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
 }

 u64 Process::GetTotalPhysicalMemoryUsed() const {
-    return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
-           GetSystemResourceSize();
+    return image_size + main_thread_stack_size + page_table->GetTotalHeapSize();
 }

 u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,9 +24,13 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
    const std::size_t index{ResourceTypeToIndex(resource)};

    s64 new_value = current[index] + amount;
-    if (new_value > limit[index] && available[index] + amount <= limit[index]) {
+    while (new_value > limit[index] && available[index] + amount <= limit[index]) {
        // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
        new_value = current[index] + amount;
+
+        if (timeout >= 0) {
+            break;
+        }
    }

    if (new_value <= limit[index]) {
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -44,218 +44,6 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
    return static_cast<u32>(std::min(size, max_jpeg_image_size));
 }

-class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> {
-public:
-    explicit IManagerForSystemService(Common::UUID user_id)
-        : ServiceFramework("IManagerForSystemService") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "CheckAvailability"},
-            {1, nullptr, "GetAccountId"},
-            {2, nullptr, "EnsureIdTokenCacheAsync"},
-            {3, nullptr, "LoadIdTokenCache"},
-            {100, nullptr, "SetSystemProgramIdentification"},
-            {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
-            {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
-            {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
-            {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
-            {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
-            {120, nullptr, "GetNintendoAccountId"},
-            {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
-            {130, nullptr, "GetNintendoAccountUserResourceCache"},
-            {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
-            {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
-            {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
-            {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+
-            {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
-            {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
-            {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
-            {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
-            {150, nullptr, "CreateAuthorizationRequest"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-// 3.0.0+
-class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> {
-public:
-    explicit IFloatingRegistrationRequest(Common::UUID user_id)
-        : ServiceFramework("IFloatingRegistrationRequest") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSessionId"},
-            {12, nullptr, "GetAccountId"},
-            {13, nullptr, "GetLinkedNintendoAccountId"},
-            {14, nullptr, "GetNickname"},
-            {15, nullptr, "GetProfileImage"},
-            {21, nullptr, "LoadIdTokenCache"},
-            {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync
-            {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync
-            {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+
-            {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+
-            {110, nullptr, "SetSystemProgramIdentification"},
-            {111, nullptr, "EnsureIdTokenCacheAsync"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class IAdministrator final : public ServiceFramework<IAdministrator> {
-public:
-    explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "CheckAvailability"},
-            {1, nullptr, "GetAccountId"},
-            {2, nullptr, "EnsureIdTokenCacheAsync"},
-            {3, nullptr, "LoadIdTokenCache"},
-            {100, nullptr, "SetSystemProgramIdentification"},
-            {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
-            {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
-            {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
-            {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
-            {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
-            {120, nullptr, "GetNintendoAccountId"},
-            {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
-            {130, nullptr, "GetNintendoAccountUserResourceCache"},
-            {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
-            {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
-            {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
-            {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+
-            {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
-            {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
-            {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
-            {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
-            {150, nullptr, "CreateAuthorizationRequest"},
-            {200, nullptr, "IsRegistered"},
-            {201, nullptr, "RegisterAsync"},
-            {202, nullptr, "UnregisterAsync"},
-            {203, nullptr, "DeleteRegistrationInfoLocally"},
-            {220, nullptr, "SynchronizeProfileAsync"},
-            {221, nullptr, "UploadProfileAsync"},
-            {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"},
-            {250, nullptr, "IsLinkedWithNintendoAccount"},
-            {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"},
-            {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"},
-            {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"},
-            {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"},
-            {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
-            {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
-            {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"},
-            {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+
-            {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+
-            {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+
-            {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+
-            {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+
-            {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+
-            {997, nullptr, "DebugUnlinkNintendoAccountAsync"},
-            {998, nullptr, "DebugSetAvailabilityErrorDetail"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> {
-public:
-    explicit IAuthorizationRequest(Common::UUID user_id)
-        : ServiceFramework("IAuthorizationRequest") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSessionId"},
-            {10, nullptr, "InvokeWithoutInteractionAsync"},
-            {19, nullptr, "IsAuthorized"},
-            {20, nullptr, "GetAuthorizationCode"},
-            {21, nullptr, "GetIdToken"},
-            {22, nullptr, "GetState"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> {
-public:
-    explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "PrepareAsync"},
-            {1, nullptr, "GetRequest"},
-            {2, nullptr, "ApplyResponse"},
-            {3, nullptr, "ApplyResponseAsync"},
-            {10, nullptr, "Suspend"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-// 3.0.0+
-class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> {
-public:
-    explicit IOAuthProcedureForExternalNsa(Common::UUID user_id)
-        : ServiceFramework("IOAuthProcedureForExternalNsa") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "PrepareAsync"},
-            {1, nullptr, "GetRequest"},
-            {2, nullptr, "ApplyResponse"},
-            {3, nullptr, "ApplyResponseAsync"},
-            {10, nullptr, "Suspend"},
-            {100, nullptr, "GetAccountId"},
-            {101, nullptr, "GetLinkedNintendoAccountId"},
-            {102, nullptr, "GetNickname"},
-            {103, nullptr, "GetProfileImage"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class IOAuthProcedureForNintendoAccountLinkage final
-    : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> {
-public:
-    explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id)
-        : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "PrepareAsync"},
-            {1, nullptr, "GetRequest"},
-            {2, nullptr, "ApplyResponse"},
-            {3, nullptr, "ApplyResponseAsync"},
-            {10, nullptr, "Suspend"},
-            {100, nullptr, "GetRequestWithTheme"},
-            {101, nullptr, "IsNetworkServiceAccountReplaced"},
-            {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class INotifier final : public ServiceFramework<INotifier> {
-public:
-    explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSystemEvent"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
 class IProfileCommon : public ServiceFramework<IProfileCommon> {
 public:
    explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
@@ -438,54 +226,6 @@ public:
        : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
 };

-class IAsyncContext final : public ServiceFramework<IAsyncContext> {
-public:
-    explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSystemEvent"},
-            {1, nullptr, "Cancel"},
-            {2, nullptr, "HasDone"},
-            {3, nullptr, "GetResult"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class ISessionObject final : public ServiceFramework<ISessionObject> {
-public:
-    explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {999, nullptr, "Dummy"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> {
-public:
-    explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSessionId"},
-            {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew)
-            {12, nullptr, "GetAccountId"},
-            {13, nullptr, "GetLinkedNintendoAccountId"},
-            {14, nullptr, "GetNickname"},
-            {15, nullptr, "GetProfileImage"},
-            {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
 class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
 public:
    explicit IManagerForApplication(Common::UUID user_id)
@@ -525,87 +265,6 @@ private:
    Common::UUID user_id;
 };

-// 6.0.0+
-class IAsyncNetworkServiceLicenseKindContext final
-    : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> {
-public:
-    explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id)
-        : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSystemEvent"},
-            {1, nullptr, "Cancel"},
-            {2, nullptr, "HasDone"},
-            {3, nullptr, "GetResult"},
-            {4, nullptr, "GetNetworkServiceLicenseKind"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-// 8.0.0+
-class IOAuthProcedureForUserRegistration final
-    : public ServiceFramework<IOAuthProcedureForUserRegistration> {
-public:
-    explicit IOAuthProcedureForUserRegistration(Common::UUID user_id)
-        : ServiceFramework("IOAuthProcedureForUserRegistration") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "PrepareAsync"},
-            {1, nullptr, "GetRequest"},
-            {2, nullptr, "ApplyResponse"},
-            {3, nullptr, "ApplyResponseAsync"},
-            {10, nullptr, "Suspend"},
-            {100, nullptr, "GetAccountId"},
-            {101, nullptr, "GetLinkedNintendoAccountId"},
-            {102, nullptr, "GetNickname"},
-            {103, nullptr, "GetProfileImage"},
-            {110, nullptr, "RegisterUserAsync"},
-            {111, nullptr, "GetUid"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-class DAUTH_O final : public ServiceFramework<DAUTH_O> {
-public:
-    explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData
-            {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+
-            {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+
-            {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+
-            {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+
-            {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
-// 6.0.0+
-class IAsyncResult final : public ServiceFramework<IAsyncResult> {
-public:
-    explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetResult"},
-            {1, nullptr, "Cancel"},
-            {2, nullptr, "IsAvailable"},
-            {3, nullptr, "GetSystemEvent"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
 void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_ACC, "called");
    IPC::ResponseBuilder rb{ctx, 3};
--- a/src/core/hle/service/acc/acc_aa.cpp
+++ b/src/core/hle/service/acc/acc_aa.cpp
@@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {0, nullptr, "EnsureCacheAsync"},
        {1, nullptr, "LoadCache"},
        {2, nullptr, "GetDeviceAccountId"},
-        {50, nullptr, "RegisterNotificationTokenAsync"},   // 1.0.0 - 6.2.0
-        {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
+        {50, nullptr, "RegisterNotificationTokenAsync"},
+        {51, nullptr, "UnregisterNotificationTokenAsync"},
    };
    RegisterHandlers(functions);
 }
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
        {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
        {5, &ACC_SU::GetProfile, "GetProfile"},
-        {6, nullptr, "GetProfileDigest"}, // 3.0.0+
+        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
        {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
-        {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
-        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
+        {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
        {102, nullptr, "GetBaasAccountManagerForSystemService"},
        {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
        {104, nullptr, "GetProfileUpdateNotifier"},
-        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
-        {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
+        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"},
+        {106, nullptr, "GetProfileSyncNotifier"},
        {110, nullptr, "StoreSaveDataThumbnail"},
        {111, nullptr, "ClearSaveDataThumbnail"},
        {112, nullptr, "LoadSaveDataThumbnail"},
-        {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
-        {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
-        {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
-        {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
-        {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
-        {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
-        {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
+        {113, nullptr, "GetSaveDataThumbnailExistence"},
+        {120, nullptr, "ListOpenUsersInApplication"},
+        {130, nullptr, "ActivateOpenContextRetention"},
+        {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"},
+        {150, nullptr, "AuthenticateApplicationAsync"},
+        {190, nullptr, "GetUserLastOpenedApplication"},
+        {191, nullptr, "ActivateOpenContextHolder"},
        {200, nullptr, "BeginUserRegistration"},
        {201, nullptr, "CompleteUserRegistration"},
        {202, nullptr, "CancelUserRegistration"},
@@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {204, nullptr, "SetUserPosition"},
        {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
        {206, nullptr, "CompleteUserRegistrationForcibly"},
-        {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+
-        {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
-        {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
+        {210, nullptr, "CreateFloatingRegistrationRequest"},
+        {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"},
+        {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"},
        {230, nullptr, "AuthenticateServiceAsync"},
        {250, nullptr, "GetBaasAccountAdministrator"},
        {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
-        {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+
+        {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"},
        {299, nullptr, "SuspendBackgroundDaemon"},
-        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
+        {997, nullptr, "DebugInvalidateTokenCacheForUser"},
        {998, nullptr, "DebugSetUserStateClose"},
        {999, nullptr, "DebugSetUserStateOpen"},
    };
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
        {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
        {5, &ACC_U0::GetProfile, "GetProfile"},
-        {6, nullptr, "GetProfileDigest"}, // 3.0.0+
+        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
        {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
-        {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
-        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
+        {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
        {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
        {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
        {102, nullptr, "AuthenticateApplicationAsync"},
-        {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
+        {103, nullptr, "CheckNetworkServiceAvailabilityAsync"},
        {110, nullptr, "StoreSaveDataThumbnail"},
        {111, nullptr, "ClearSaveDataThumbnail"},
        {120, nullptr, "CreateGuestLoginRequest"},
-        {130, nullptr, "LoadOpenContext"}, // 5.0.0+
-        {131, nullptr, "ListOpenContextStoredUsers"}, // 6.0.0+
-        {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+
-        {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
-        {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+
+        {130, nullptr, "LoadOpenContext"},
+        {131, nullptr, "ListOpenContextStoredUsers"},
+        {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"},
+        {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"},
+        {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"},
    };
    // clang-format on

--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,29 +17,28 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
        {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
        {5, &ACC_U1::GetProfile, "GetProfile"},
-        {6, nullptr, "GetProfileDigest"}, // 3.0.0+
+        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
        {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
-        {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
-        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
+        {60, nullptr, "ListOpenContextStoredUsers"},
+        {99, nullptr, "DebugActivateOpenContextRetention"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
        {102, nullptr, "GetBaasAccountManagerForSystemService"},
-        {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
-        {104, nullptr, "GetProfileUpdateNotifier"},
-        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
-        {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
+        {103, nullptr, "GetProfileUpdateNotifier"},
+        {104, nullptr, "CheckNetworkServiceAvailabilityAsync"},
+        {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
+        {106, nullptr, "GetProfileSyncNotifier"},
        {110, nullptr, "StoreSaveDataThumbnail"},
        {111, nullptr, "ClearSaveDataThumbnail"},
        {112, nullptr, "LoadSaveDataThumbnail"},
-        {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
-        {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
-        {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
-        {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
-        {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
-        {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
-        {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
-        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
+        {113, nullptr, "GetSaveDataThumbnailExistence"},
+        {130, nullptr, "ActivateOpenContextRetention"},
+        {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"},
+        {150, nullptr, "AuthenticateApplicationAsync"},
+        {190, nullptr, "GetUserLastOpenedApplication"},
+        {191, nullptr, "ActivateOpenContextHolder"},
+        {997, nullptr, "DebugInvalidateTokenCacheForUser"},
        {998, nullptr, "DebugSetUserStateClose"},
        {999, nullptr, "DebugSetUserStateOpen"},
    };
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
                                                                       config.sub_text.size());
    params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
                                                                         config.guide_text.size());
-    params.initial_text = std::move(initial_text);
+    params.initial_text = initial_text;
    params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
    params.password = static_cast<bool>(config.is_password);
    params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
@@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {

    const auto parameters = ConvertToFrontendParameters(config, initial_text);

-    frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },
+    frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); },
                         parameters);
 }

--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -566,14 +566,6 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
    connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
 }

-void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) {
-    gyroscope_zero_drift_mode = drift_mode;
-}
-
-Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const {
-    return gyroscope_zero_drift_mode;
-}
-
 void Controller_NPad::StartLRAssignmentMode() {
    // Nothing internally is used for lr assignment mode. Since we have the ability to set the
    // controller types from boot, it doesn't really matter about showing a selection screen
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -58,12 +58,6 @@ public:
    };
    static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");

-    enum class GyroscopeZeroDriftMode : u32 {
-        Loose = 0,
-        Standard = 1,
-        Tight = 2,
-    };
-
    enum class NpadHoldType : u64 {
        Vertical = 0,
        Horizontal = 1,
@@ -123,8 +117,6 @@ public:

    void ConnectNPad(u32 npad_id);
    void DisconnectNPad(u32 npad_id);
-    void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode);
-    GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;
    LedPattern GetLedPattern(u32 npad_id);
    void SetVibrationEnabled(bool can_vibrate);
    bool IsVibrationEnabled() const;
@@ -332,8 +324,8 @@ private:
    std::array<Kernel::EventPair, 10> styleset_changed_events;
    Vibration last_processed_vibration{};
    std::array<ControllerHolder, 10> connected_controllers{};
-    GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};
    bool can_controllers_vibrate{true};
+
    std::array<ControllerPad, 10> npad_pad_states{};
    bool is_in_lr_assignment_mode{false};
    Core::System& system;
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -185,8 +185,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {77, nullptr, "GetAccelerometerPlayMode"},
        {78, nullptr, "ResetAccelerometerPlayMode"},
        {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
-        {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"},
-        {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},
+        {80, nullptr, "GetGyroscopeZeroDriftMode"},
+        {81, nullptr, "ResetGyroscopeZeroDriftMode"},
        {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
        {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
        {91, &Hid::ActivateGesture, "ActivateGesture"},
@@ -419,41 +419,9 @@ void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
    const auto drift_mode{rp.Pop<u32>()};
    const auto applet_resource_user_id{rp.Pop<u64>()};

-    applet_resource->GetController<Controller_NPad>(HidController::NPad)
-        .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode});
-
-    LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
-              drift_mode, applet_resource_user_id);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
-void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto handle{rp.Pop<u32>()};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
-              applet_resource_user_id);
-
-    IPC::ResponseBuilder rb{ctx, 3};
-    rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(
-        static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad)
-                             .GetGyroscopeZeroDriftMode()));
-}
-
-void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto handle{rp.Pop<u32>()};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    applet_resource->GetController<Controller_NPad>(HidController::NPad)
-        .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard);
-
-    LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
-              applet_resource_user_id);
+    LOG_WARNING(Service_HID,
+                "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
+                drift_mode, applet_resource_user_id);

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -95,8 +95,6 @@ private:
    void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
    void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
    void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
-    void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
-    void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
    void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
    void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
    void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
--- a/src/core/hle/service/lm/manager.cpp
+++ b/src/core/hle/service/lm/manager.cpp
@@ -86,8 +86,7 @@ std::string FormatField(Field type, const std::vector<u8>& data) {
        return Common::StringFromFixedZeroTerminatedBuffer(
            reinterpret_cast<const char*>(data.data()), data.size());
    default:
-        UNIMPLEMENTED_MSG("Unimplemented field type={}", type);
-        return "";
+        UNIMPLEMENTED();
    }
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
    case IoctlCommand::IocGetCharacteristicsCommand:
        return GetCharacteristics(input, output, output2, version);
    case IoctlCommand::IocGetTPCMasksCommand:
-        return GetTPCMasks(input, output, output2, version);
+        return GetTPCMasks(input, output);
    case IoctlCommand::IocGetActiveSlotMaskCommand:
        return GetActiveSlotMask(input, output);
    case IoctlCommand::IocZcullGetCtxSizeCommand:
@@ -98,22 +98,17 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
    return 0;
 }

-u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
-                                 std::vector<u8>& output2, IoctlVersion version) {
+u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlGpuGetTpcMasksArgs params{};
    std::memcpy(&params, input.data(), input.size());
-    LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
-    if (params.mask_buffer_size != 0) {
-        params.tcp_mask = 3;
-    }
-
-    if (version == IoctlVersion::Version3) {
-        std::memcpy(output.data(), input.data(), output.size());
-        std::memcpy(output2.data(), &params.tcp_mask, output2.size());
-    } else {
-        std::memcpy(output.data(), &params, output.size());
-    }
-
+    LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
+             params.mask_buf_addr);
+    // TODO(ogniK): Confirm value on hardware
+    if (params.mask_buf_size)
+        params.tpc_mask_size = 4 * 1; // 4 * num_gpc
+    else
+        params.tpc_mask_size = 0;
+    std::memcpy(output.data(), &params, sizeof(params));
    return 0;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -92,11 +92,16 @@ private:
                  "IoctlCharacteristics is incorrect size");

    struct IoctlGpuGetTpcMasksArgs {
-        u32_le mask_buffer_size{};
-        INSERT_PADDING_WORDS(1);
-        u64_le mask_buffer_address{};
-        u32_le tcp_mask{};
-        INSERT_PADDING_WORDS(1);
+        /// [in]  TPC mask buffer size reserved by userspace. Should be at least
+        /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
+        /// [out] full kernel buffer size
+        u32_le mask_buf_size;
+        u32_le reserved;
+
+        /// [in]  pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
+        /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
+        u64_le mask_buf_addr;
+        u64_le tpc_mask_size; // Nintendo add this?
    };
    static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
                  "IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -161,8 +166,7 @@ private:

    u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
                           std::vector<u8>& output2, IoctlVersion version);
-    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
-                    IoctlVersion version);
+    u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
    u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
    u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
    u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
    bool renderer_debug;
    int vulkan_device;

-    u16 resolution_factor{1};
+    float resolution_factor;
    int aspect_ratio;
    int max_anisotropy;
    bool use_frame_limit;
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage
    int key_code = params.Get("code", 0);
    std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
    key_button_list->AddKeyButton(key_code, button.get());
-    return button;
+    return std::move(button);
 }

 void Keyboard::PressKey(int key_code) {
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag
    // Previously created device is disconnected here. Having two motion devices for 3DS is not
    // expected.
    current_device = device_wrapper->device;
-    return device_wrapper;
+    return std::move(device_wrapper);
 }

 void MotionEmu::BeginTilt(int x, int y) {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -27,8 +27,6 @@ add_library(video_core STATIC
    engines/shader_type.h
    macro/macro.cpp
    macro/macro.h
-    macro/macro_hle.cpp
-    macro/macro_hle.h
    macro/macro_interpreter.cpp
    macro/macro_interpreter.h
    macro/macro_jit_x64.cpp
@@ -51,11 +49,11 @@ add_library(video_core STATIC
    query_cache.h
    rasterizer_accelerated.cpp
    rasterizer_accelerated.h
+    rasterizer_cache.cpp
+    rasterizer_cache.h
    rasterizer_interface.h
    renderer_base.cpp
    renderer_base.h
-    renderer_opengl/gl_arb_decompiler.cpp
-    renderer_opengl/gl_arb_decompiler.h
    renderer_opengl/gl_buffer_cache.cpp
    renderer_opengl/gl_buffer_cache.h
    renderer_opengl/gl_device.cpp
@@ -95,7 +93,6 @@ add_library(video_core STATIC
    renderer_opengl/utils.h
    sampler_cache.cpp
    sampler_cache.h
-    shader_cache.h
    shader/decode/arithmetic.cpp
    shader/decode/arithmetic_immediate.cpp
    shader/decode/bfe.cpp
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,49 @@ namespace VideoCommon {

 class BufferBlock {
 public:
-    bool Overlaps(VAddr start, VAddr end) const {
+    bool Overlaps(const VAddr start, const VAddr end) const {
        return (cpu_addr < end) && (cpu_addr_end > start);
    }

-    bool IsInside(VAddr other_start, VAddr other_end) const {
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
        return cpu_addr <= other_start && other_end <= cpu_addr_end;
    }

-    std::size_t Offset(VAddr in_addr) const {
+    std::size_t GetOffset(const VAddr in_addr) {
        return static_cast<std::size_t>(in_addr - cpu_addr);
    }

-    VAddr CpuAddr() const {
+    VAddr GetCpuAddr() const {
        return cpu_addr;
    }

-    VAddr CpuAddrEnd() const {
+    VAddr GetCpuAddrEnd() const {
        return cpu_addr_end;
    }

-    void SetCpuAddr(VAddr new_addr) {
+    void SetCpuAddr(const VAddr new_addr) {
        cpu_addr = new_addr;
        cpu_addr_end = new_addr + size;
    }

-    std::size_t Size() const {
+    std::size_t GetSize() const {
        return size;
    }

-    u64 Epoch() const {
-        return epoch;
-    }
-
    void SetEpoch(u64 new_epoch) {
        epoch = new_epoch;
    }

-protected:
-    explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
-        SetCpuAddr(cpu_addr_);
+    u64 GetEpoch() {
+        return epoch;
    }

+protected:
+    explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
+        SetCpuAddr(cpu_addr);
+    }
+    ~BufferBlock() = default;
+
 private:
    VAddr cpu_addr{};
    VAddr cpu_addr_end{};
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,31 +30,23 @@

 namespace VideoCommon {

-template <typename Buffer, typename BufferType, typename StreamBuffer>
+template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
 class BufferCache {
    using IntervalSet = boost::icl::interval_set<VAddr>;
    using IntervalType = typename IntervalSet::interval_type;
    using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;

-    static constexpr u64 WRITE_PAGE_BIT = 11;
-    static constexpr u64 BLOCK_PAGE_BITS = 21;
-    static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
-
 public:
-    struct BufferInfo {
-        BufferType handle;
-        u64 offset;
-        u64 address;
-    };
+    using BufferInfo = std::pair<BufferType, u64>;

    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                            bool is_written = false, bool use_fast_cbuf = false) {
        std::lock_guard lock{mutex};

-        auto& memory_manager = system.GPU().MemoryManager();
+        const auto& memory_manager = system.GPU().MemoryManager();
        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
        if (!cpu_addr_opt) {
-            return GetEmptyBuffer(size);
+            return {GetEmptyBuffer(size), 0};
        }
        const VAddr cpu_addr = *cpu_addr_opt;

@@ -63,6 +55,7 @@ public:
        constexpr std::size_t max_stream_size = 0x800;
        if (use_fast_cbuf || size < max_stream_size) {
            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
+                auto& memory_manager = system.GPU().MemoryManager();
                const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
                if (use_fast_cbuf) {
                    u8* dest;
@@ -89,10 +82,10 @@ public:
            }
        }

-        Buffer* const block = GetBlock(cpu_addr, size);
+        OwnerBuffer block = GetBlock(cpu_addr, size);
        MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
        if (!map) {
-            return GetEmptyBuffer(size);
+            return {GetEmptyBuffer(size), 0};
        }
        if (is_written) {
            map->MarkAsModified(true, GetModifiedTicks());
@@ -105,7 +98,7 @@ public:
            }
        }

-        return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
+        return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
    }

    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -117,37 +110,31 @@ public:
        });
    }

-    /// Prepares the buffer cache for data uploading
-    /// @param max_size Maximum number of bytes that will be uploaded
-    /// @return True when a stream buffer invalidation was required, false otherwise
-    bool Map(std::size_t max_size) {
+    void Map(std::size_t max_size) {
        std::lock_guard lock{mutex};

-        bool invalidated;
        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
        buffer_offset = buffer_offset_base;
-
-        return invalidated;
    }

-    /// Finishes the upload stream
-    void Unmap() {
+    /// Finishes the upload stream, returns true on bindings invalidation.
+    bool Unmap() {
        std::lock_guard lock{mutex};
+
        stream_buffer->Unmap(buffer_offset - buffer_offset_base);
+        return std::exchange(invalidated, false);
    }

-    /// Function called at the end of each frame, inteded for deferred operations
    void TickFrame() {
        ++epoch;
-
        while (!pending_destruction.empty()) {
            // Delay at least 4 frames before destruction.
            // This is due to triple buffering happening on some drivers.
            static constexpr u64 epochs_to_destroy = 5;
-            if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
+            if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
                break;
            }
-            pending_destruction.pop();
+            pending_destruction.pop_front();
        }
    }

@@ -258,16 +245,28 @@ public:
        committed_flushes.pop_front();
    }

-    virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
+    virtual BufferType GetEmptyBuffer(std::size_t size) = 0;

 protected:
    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
                         std::unique_ptr<StreamBuffer> stream_buffer)
-        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
+        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
+          stream_buffer_handle{this->stream_buffer->GetHandle()} {}

    ~BufferCache() = default;

-    virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+    virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
+
+    virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
+
+    virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
+                                 const u8* data) = 0;
+
+    virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
+                                   u8* data) = 0;
+
+    virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
+                           std::size_t dst_offset, std::size_t size) = 0;

    virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
        return {};
@@ -322,7 +321,7 @@ protected:
    }

 private:
-    MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
+    MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
                            std::size_t size) {
        const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
        if (overlaps.empty()) {
@@ -330,11 +329,11 @@ private:
            const VAddr cpu_addr_end = cpu_addr + size;
            if (memory_manager.IsGranularRange(gpu_addr, size)) {
                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
-                block->Upload(block->Offset(cpu_addr), size, host_ptr);
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
            } else {
                staging_buffer.resize(size);
                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
-                block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
+                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
            }
            return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
        }
@@ -377,7 +376,7 @@ private:
        return map;
    }

-    void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
+    void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
                     const VectorMapInterval& overlaps) {
        const IntervalType base_interval{start, end};
        IntervalSet interval_set{};
@@ -387,13 +386,13 @@ private:
            interval_set.subtract(subtract);
        }
        for (auto& interval : interval_set) {
-            const std::size_t size = interval.upper() - interval.lower();
-            if (size == 0) {
-                continue;
+            std::size_t size = interval.upper() - interval.lower();
+            if (size > 0) {
+                staging_buffer.resize(size);
+                system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+                UploadBlockData(block, block->GetOffset(interval.lower()), size,
+                                staging_buffer.data());
            }
-            staging_buffer.resize(size);
-            system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
-            block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
        }
    }

@@ -423,14 +422,10 @@ private:
    }

    void FlushMap(MapInterval* map) {
-        const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
-        ASSERT_OR_EXECUTE(it != blocks.end(), return;);
-
-        std::shared_ptr<Buffer> block = it->second;
-
        const std::size_t size = map->end - map->start;
+        OwnerBuffer block = blocks[map->start >> block_page_bits];
        staging_buffer.resize(size);
-        block->Download(block->Offset(map->start), size, staging_buffer.data());
+        DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
        system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
        map->MarkAsModified(false, 0);
    }
@@ -443,7 +438,7 @@ private:

        buffer_ptr += size;
        buffer_offset += size;
-        return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
+        return {stream_buffer_handle, uploaded_offset};
    }

    void AlignBuffer(std::size_t alignment) {
@@ -453,89 +448,97 @@ private:
        buffer_offset = offset_aligned;
    }

-    std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
-        const std::size_t old_size = buffer->Size();
-        const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
-        const VAddr cpu_addr = buffer->CpuAddr();
-        std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
-        new_buffer->CopyFrom(*buffer, 0, 0, old_size);
-        QueueDestruction(std::move(buffer));
-
+    OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
+        const std::size_t old_size = buffer->GetSize();
+        const std::size_t new_size = old_size + block_page_size;
+        const VAddr cpu_addr = buffer->GetCpuAddr();
+        OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
+        CopyBlock(buffer, new_buffer, 0, 0, old_size);
+        buffer->SetEpoch(epoch);
+        pending_destruction.push_back(buffer);
        const VAddr cpu_addr_end = cpu_addr + new_size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
-            blocks.insert_or_assign(page_start, new_buffer);
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
+        while (page_start <= page_end) {
+            blocks[page_start] = new_buffer;
+            ++page_start;
        }
-
        return new_buffer;
    }

-    std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
-                                        std::shared_ptr<Buffer> second) {
-        const std::size_t size_1 = first->Size();
-        const std::size_t size_2 = second->Size();
-        const VAddr first_addr = first->CpuAddr();
-        const VAddr second_addr = second->CpuAddr();
+    OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
+        const std::size_t size_1 = first->GetSize();
+        const std::size_t size_2 = second->GetSize();
+        const VAddr first_addr = first->GetCpuAddr();
+        const VAddr second_addr = second->GetCpuAddr();
        const VAddr new_addr = std::min(first_addr, second_addr);
        const std::size_t new_size = size_1 + size_2;
-
-        std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
-        new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
-        new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
-        QueueDestruction(std::move(first));
-        QueueDestruction(std::move(second));
-
+        OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
+        CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
+        CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
+        first->SetEpoch(epoch);
+        second->SetEpoch(epoch);
+        pending_destruction.push_back(first);
+        pending_destruction.push_back(second);
        const VAddr cpu_addr_end = new_addr + new_size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
-            blocks.insert_or_assign(page_start, new_buffer);
+        u64 page_start = new_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
+        while (page_start <= page_end) {
+            blocks[page_start] = new_buffer;
+            ++page_start;
        }
        return new_buffer;
    }

-    Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
-        std::shared_ptr<Buffer> found;
-
+    OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
+        OwnerBuffer found;
        const VAddr cpu_addr_end = cpu_addr + size - 1;
-        const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
-        for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+        u64 page_start = cpu_addr >> block_page_bits;
+        const u64 page_end = cpu_addr_end >> block_page_bits;
+        while (page_start <= page_end) {
            auto it = blocks.find(page_start);
            if (it == blocks.end()) {
                if (found) {
                    found = EnlargeBlock(found);
-                    continue;
+                } else {
+                    const VAddr start_addr = (page_start << block_page_bits);
+                    found = CreateBlock(start_addr, block_page_size);
+                    blocks[page_start] = found;
+                }
+            } else {
+                if (found) {
+                    if (found == it->second) {
+                        ++page_start;
+                        continue;
+                    }
+                    found = MergeBlocks(found, it->second);
+                } else {
+                    found = it->second;
                }
-                const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
-                found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
-                blocks.insert_or_assign(page_start, found);
-                continue;
-            }
-            if (!found) {
-                found = it->second;
-                continue;
-            }
-            if (found != it->second) {
-                found = MergeBlocks(std::move(found), it->second);
            }
+            ++page_start;
        }
-        return found.get();
+        return found;
    }

-    void MarkRegionAsWritten(VAddr start, VAddr end) {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
+    void MarkRegionAsWritten(const VAddr start, const VAddr end) {
+        u64 page_start = start >> write_page_bit;
+        const u64 page_end = end >> write_page_bit;
+        while (page_start <= page_end) {
            auto it = written_pages.find(page_start);
            if (it != written_pages.end()) {
                it->second = it->second + 1;
            } else {
-                written_pages.insert_or_assign(page_start, 1);
+                written_pages[page_start] = 1;
            }
+            ++page_start;
        }
    }

-    void UnmarkRegionAsWritten(VAddr start, VAddr end) {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
+    void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
+        u64 page_start = start >> write_page_bit;
+        const u64 page_end = end >> write_page_bit;
+        while (page_start <= page_end) {
            auto it = written_pages.find(page_start);
            if (it != written_pages.end()) {
                if (it->second > 1) {
@@ -544,24 +547,22 @@ private:
                    written_pages.erase(it);
                }
            }
+            ++page_start;
        }
    }

-    bool IsRegionWritten(VAddr start, VAddr end) const {
-        const u64 page_end = end >> WRITE_PAGE_BIT;
-        for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
+    bool IsRegionWritten(const VAddr start, const VAddr end) const {
+        u64 page_start = start >> write_page_bit;
+        const u64 page_end = end >> write_page_bit;
+        while (page_start <= page_end) {
            if (written_pages.count(page_start) > 0) {
                return true;
            }
+            ++page_start;
        }
        return false;
    }

-    void QueueDestruction(std::shared_ptr<Buffer> buffer) {
-        buffer->SetEpoch(epoch);
-        pending_destruction.push(std::move(buffer));
-    }
-
    void MarkForAsyncFlush(MapInterval* map) {
        if (!uncommitted_flushes) {
            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
@@ -573,7 +574,9 @@ private:
    Core::System& system;

    std::unique_ptr<StreamBuffer> stream_buffer;
-    BufferType stream_buffer_handle;
+    BufferType stream_buffer_handle{};
+
+    bool invalidated = false;

    u8* buffer_ptr = nullptr;
    u64 buffer_offset = 0;
@@ -583,15 +586,18 @@ private:
    boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
        mapped_addresses;

+    static constexpr u64 write_page_bit = 11;
    std::unordered_map<u64, u32> written_pages;
-    std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;

-    std::queue<std::shared_ptr<Buffer>> pending_destruction;
+    static constexpr u64 block_page_bits = 21;
+    static constexpr u64 block_page_size = 1ULL << block_page_bits;
+    std::unordered_map<u64, OwnerBuffer> blocks;
+
+    std::list<OwnerBuffer> pending_destruction;
    u64 epoch = 0;
    u64 modified_ticks = 0;

    std::vector<u8> staging_buffer;
-
    std::list<MapInterval*> marked_for_unregister;

    std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,7 +93,6 @@ public:
    virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
    virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                    u64 offset) const = 0;
-    virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
    virtual u32 GetBoundBuffer() const = 0;

    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,11 +92,8 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
    ASSERT(stage == ShaderType::Compute);
    const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
    const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
-    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}

-SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
-    const Texture::TextureHandle tex_handle{handle};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
    SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,8 +219,6 @@ public:
    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                            u64 offset) const override;

-    SamplerDescriptor AccessSampler(u32 handle) const override;
-
    u32 GetBoundBuffer() const override {
        return regs.tex_cb_index;
    }
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
        ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());

    // Execute the current macro.
-    macro_engine->Execute(*this, macro_positions[entry], parameters);
+    macro_engine->Execute(macro_positions[entry], parameters);
    if (mme_draw.current_mode != MMEDrawMode::Undefined) {
        FlushMMEInlineDraw();
    }
@@ -740,11 +740,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
    const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
    const auto& tex_info_buffer = shader.const_buffers[const_buffer];
    const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
-    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
-}

-SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
-    const Texture::TextureHandle tex_handle{handle};
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
    SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -598,7 +598,6 @@ public:
                BitField<4, 3, u32> block_height;
                BitField<8, 3, u32> block_depth;
                BitField<12, 1, InvMemoryLayout> type;
-                BitField<16, 1, u32> is_3d;
            } memory_layout;
            union {
                BitField<0, 16, u32> layers;
@@ -1404,8 +1403,6 @@ public:
    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                            u64 offset) const override;

-    SamplerDescriptor AccessSampler(u32 handle) const override;
-
    u32 GetBoundBuffer() const override {
        return regs.tex_cb_index;
    }
@@ -1418,14 +1415,6 @@ public:
        return execute_on;
    }

-    VideoCore::RasterizerInterface& GetRasterizer() {
-        return rasterizer;
-    }
-
-    const VideoCore::RasterizerInterface& GetRasterizer() const {
-        return rasterizer;
-    }
-
    /// Notify a memory write has happened.
    void OnMemoryWrite() {
        dirty.flags |= dirty.on_write_stores;
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,37 +2,23 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <boost/container_hash/hash.hpp>
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/settings.h"
-#include "video_core/engines/maxwell_3d.h"
 #include "video_core/macro/macro.h"
-#include "video_core/macro/macro_hle.h"
 #include "video_core/macro/macro_interpreter.h"
 #include "video_core/macro/macro_jit_x64.h"

 namespace Tegra {

-MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
-    : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
-
-MacroEngine::~MacroEngine() = default;
-
 void MacroEngine::AddCode(u32 method, u32 data) {
    uploaded_macro_code[method].push_back(data);
 }

-void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
-                          const std::vector<u32>& parameters) {
+void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
    auto compiled_macro = macro_cache.find(method);
    if (compiled_macro != macro_cache.end()) {
-        const auto& cache_info = compiled_macro->second;
-        if (cache_info.has_hle_program) {
-            cache_info.hle_program->Execute(parameters, method);
-        } else {
-            cache_info.lle_program->Execute(parameters, method);
-        }
+        compiled_macro->second->Execute(parameters, method);
    } else {
        // Macro not compiled, check if it's uploaded and if so, compile it
        auto macro_code = uploaded_macro_code.find(method);
@@ -40,21 +26,8 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
            UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
            return;
        }
-        auto& cache_info = macro_cache[method];
-        cache_info.hash = boost::hash_value(macro_code->second);
-        cache_info.lle_program = Compile(macro_code->second);
-
-        auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
-        if (hle_program.has_value()) {
-            cache_info.has_hle_program = true;
-            cache_info.hle_program = std::move(hle_program.value());
-        }
-
-        if (cache_info.has_hle_program) {
-            cache_info.hle_program->Execute(parameters, method);
-        } else {
-            cache_info.lle_program->Execute(parameters, method);
-        }
+        macro_cache[method] = Compile(macro_code->second);
+        macro_cache[method]->Execute(parameters, method);
    }
 }

--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -11,11 +11,9 @@
 #include "common/common_types.h"

 namespace Tegra {
-
 namespace Engines {
 class Maxwell3D;
 }
-
 namespace Macro {
 constexpr std::size_t NUM_MACRO_REGISTERS = 8;
 enum class Operation : u32 {
@@ -96,8 +94,6 @@ union MethodAddress {

 } // namespace Macro

-class HLEMacro;
-
 class CachedMacro {
 public:
    virtual ~CachedMacro() = default;
@@ -111,29 +107,20 @@ public:

 class MacroEngine {
 public:
-    explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
-    virtual ~MacroEngine();
+    virtual ~MacroEngine() = default;

    // Store the uploaded macro code to compile them when they're called.
    void AddCode(u32 method, u32 data);

    // Compiles the macro if its not in the cache, and executes the compiled macro
-    void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
+    void Execute(u32 method, const std::vector<u32>& parameters);

 protected:
    virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;

 private:
-    struct CacheInfo {
-        std::unique_ptr<CachedMacro> lle_program{};
-        std::unique_ptr<CachedMacro> hle_program{};
-        u64 hash{};
-        bool has_hle_program{};
-    };
-
-    std::unordered_map<u32, CacheInfo> macro_cache;
+    std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache;
    std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
-    std::unique_ptr<HLEMacro> hle_macros;
 };

 std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -1,113 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <vector>
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro/macro_hle.h"
-#include "video_core/rasterizer_interface.h"
-
-namespace Tegra {
-
-namespace {
-// HLE'd functions
-static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
-                                 const std::vector<u32>& parameters) {
-    const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
-
-    maxwell3d.regs.draw.topology.Assign(
-        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
-                                                                        ~(0x3ffffff << 26)));
-    maxwell3d.regs.vb_base_instance = parameters[5];
-    maxwell3d.mme_draw.instance_count = instance_count;
-    maxwell3d.regs.vb_element_base = parameters[3];
-    maxwell3d.regs.index_array.count = parameters[1];
-    maxwell3d.regs.index_array.first = parameters[4];
-
-    if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
-    }
-    maxwell3d.regs.index_array.count = 0;
-    maxwell3d.mme_draw.instance_count = 0;
-    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
-}
-
-static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
-                                 const std::vector<u32>& parameters) {
-    const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
-
-    maxwell3d.regs.vertex_buffer.first = parameters[3];
-    maxwell3d.regs.vertex_buffer.count = parameters[1];
-    maxwell3d.regs.vb_base_instance = parameters[4];
-    maxwell3d.regs.draw.topology.Assign(
-        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
-    maxwell3d.mme_draw.instance_count = count;
-
-    if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(false, true);
-    }
-    maxwell3d.regs.vertex_buffer.count = 0;
-    maxwell3d.mme_draw.instance_count = 0;
-    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
-}
-
-static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
-                                 const std::vector<u32>& parameters) {
-    const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
-    const u32 element_base = parameters[4];
-    const u32 base_instance = parameters[5];
-    maxwell3d.regs.index_array.first = parameters[3];
-    maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
-    maxwell3d.regs.index_array.count = parameters[1];
-    maxwell3d.regs.vb_element_base = element_base;
-    maxwell3d.regs.vb_base_instance = base_instance;
-    maxwell3d.mme_draw.instance_count = instance_count;
-    maxwell3d.CallMethodFromMME(0x8e3, 0x640);
-    maxwell3d.CallMethodFromMME(0x8e4, element_base);
-    maxwell3d.CallMethodFromMME(0x8e5, base_instance);
-    maxwell3d.regs.draw.topology.Assign(
-        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
-    if (maxwell3d.ShouldExecute()) {
-        maxwell3d.GetRasterizer().Draw(true, true);
-    }
-    maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
-    maxwell3d.regs.index_array.count = 0;
-    maxwell3d.regs.vb_element_base = 0x0;
-    maxwell3d.regs.vb_base_instance = 0x0;
-    maxwell3d.mme_draw.instance_count = 0;
-    maxwell3d.CallMethodFromMME(0x8e3, 0x640);
-    maxwell3d.CallMethodFromMME(0x8e4, 0x0);
-    maxwell3d.CallMethodFromMME(0x8e5, 0x0);
-    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
-}
-} // namespace
-
-constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
-    std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
-    std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
-    std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
-}};
-
-HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
-HLEMacro::~HLEMacro() = default;
-
-std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
-    const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
-                                 [hash](const auto& pair) { return pair.first == hash; });
-    if (it == hle_funcs.end()) {
-        return std::nullopt;
-    }
-    return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
-}
-
-HLEMacroImpl::~HLEMacroImpl() = default;
-
-HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
-    : maxwell3d(maxwell3d), func(func) {}
-
-void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
-    func(maxwell3d, parameters);
-}
-
-} // namespace Tegra
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -1,44 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <optional>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/macro/macro.h"
-
-namespace Tegra {
-
-namespace Engines {
-class Maxwell3D;
-}
-
-using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
-
-class HLEMacro {
-public:
-    explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
-    ~HLEMacro();
-
-    std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
-
-private:
-    Engines::Maxwell3D& maxwell3d;
-};
-
-class HLEMacroImpl : public CachedMacro {
-public:
-    explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
-    ~HLEMacroImpl();
-
-    void Execute(const std::vector<u32>& parameters, u32 method) override;
-
-private:
-    Engines::Maxwell3D& maxwell3d;
-    HLEFunction func;
-};
-
-} // namespace Tegra
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,8 +11,7 @@
 MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));

 namespace Tegra {
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
-    : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}

 std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
    return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,22 +14,27 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));

 namespace Tegra {
-static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
-static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
+static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
+static const Xbyak::Reg64 STATE = Xbyak::util::r11;
+static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
+static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
+static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
 static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
 static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;

 static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
-    STATE,
-    RESULT,
    PARAMETERS,
+    REGISTERS,
+    STATE,
+    NEXT_PARAMETER,
+    RESULT,
    METHOD_ADDRESS,
    BRANCH_HOLDER,
 });

-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
-    : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}

 std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
    return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
@@ -48,32 +53,32 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
    JITState state{};
    state.maxwell3d = &maxwell3d;
    state.registers = {};
-    program(&state, parameters.data());
+    state.parameters = parameters.data();
+    program(&state);
 }

 void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
    const bool is_a_zero = opcode.src_a == 0;
    const bool is_b_zero = opcode.src_b == 0;
    const bool valid_operation = !is_a_zero && !is_b_zero;
-    [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
+    const bool is_move_operation = !is_a_zero && is_b_zero;
    const bool has_zero_register = is_a_zero || is_b_zero;
-    const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
-                                  opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;

-    Xbyak::Reg32 src_a;
+    Xbyak::Reg64 src_a;
    Xbyak::Reg32 src_b;

-    if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
-        src_a = Compile_GetRegister(opcode.src_a, RESULT);
-        src_b = Compile_GetRegister(opcode.src_b, eax);
+    if (!optimizer.zero_reg_skip) {
+        src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
+        src_b = Compile_GetRegister(opcode.src_b, ebx);
    } else {
        if (!is_a_zero) {
-            src_a = Compile_GetRegister(opcode.src_a, RESULT);
+            src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
        }
        if (!is_b_zero) {
-            src_b = Compile_GetRegister(opcode.src_b, eax);
+            src_b = Compile_GetRegister(opcode.src_b, ebx);
        }
    }
+    Xbyak::Label skip_carry{};

    bool has_emitted = false;

@@ -185,8 +190,7 @@ void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
        opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
        if (next_opcode.has_value()) {
            const auto next = *next_opcode;
-            if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
-                opcode.dst == next.dst) {
+            if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
                return;
            }
        }
@@ -240,10 +244,10 @@ void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
 }

 void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
-    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
-    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+    auto dst = Compile_GetRegister(opcode.src_a, eax);
+    auto src = Compile_GetRegister(opcode.src_b, RESULT);

-    shr(src, dst.cvt8());
+    shr(src, al);
    if (opcode.bf_size != 0 && opcode.bf_size != 31) {
        and_(src, opcode.GetBitfieldMask());
    } else if (opcode.bf_size == 0) {
@@ -259,8 +263,8 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
 }

 void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
-    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
-    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+    auto dst = Compile_GetRegister(opcode.src_a, eax);
+    auto src = Compile_GetRegister(opcode.src_b, RESULT);

    if (opcode.bf_src_bit != 0) {
        shr(src, opcode.bf_src_bit);
@@ -269,11 +273,18 @@ void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
    if (opcode.bf_size != 31) {
        and_(src, opcode.GetBitfieldMask());
    }
-    shl(src, dst.cvt8());
-
+    shl(src, al);
    Compile_ProcessResult(opcode.result_operation, opcode.dst);
 }

+static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) {
+    return maxwell3d->GetRegisterValue(method);
+}
+
+static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+    maxwell3d->CallMethodFromMME(method_address.address, value);
+}
+
 void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
    if (optimizer.zero_reg_skip && opcode.src_a == 0) {
        if (opcode.immediate == 0) {
@@ -291,34 +302,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
            sub(result, opcode.immediate * -1);
        }
    }
-
-    // Equivalent to Engines::Maxwell3D::GetRegisterValue:
-    if (optimizer.enable_asserts) {
-        Xbyak::Label pass_range_check;
-        cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
-        jb(pass_range_check);
-        int3();
-        L(pass_range_check);
-    }
-    mov(rax, qword[STATE]);
-    mov(RESULT,
-        dword[rax + offsetof(Engines::Maxwell3D, regs) +
-              offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
-
+    Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+    mov(Common::X64::ABI_PARAM1, qword[STATE]);
+    mov(Common::X64::ABI_PARAM2, RESULT);
+    Common::X64::CallFarFunction(*this, &Read);
+    Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
+    mov(RESULT, Common::X64::ABI_RETURN.cvt32());
    Compile_ProcessResult(opcode.result_operation, opcode.dst);
 }

-static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
-    maxwell3d->CallMethodFromMME(method_address.address, value);
-}
-
 void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
-    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+    Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);
    mov(Common::X64::ABI_PARAM1, qword[STATE]);
    mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
    mov(Common::X64::ABI_PARAM3, value);
    Common::X64::CallFarFunction(*this, &Send);
-    Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+    Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0);

    Xbyak::Label dont_process{};
    // Get increment
@@ -330,7 +329,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
    and_(METHOD_ADDRESS, 0xfff);
    shr(ecx, 12);
    and_(ecx, 0x3f);
-    lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
+    lea(eax, ptr[rcx + METHOD_ADDRESS_64]);
    sal(ecx, 12);
    or_(eax, ecx);

@@ -422,15 +421,19 @@ void MacroJITx64Impl::Compile() {
    bool keep_executing = true;
    labels.fill(Xbyak::Label());

-    Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+    Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
    // JIT state
    mov(STATE, Common::X64::ABI_PARAM1);
-    mov(PARAMETERS, Common::X64::ABI_PARAM2);
+    mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 +
+                          static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
+    mov(REGISTERS, Common::X64::ABI_PARAM1);
+    add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
    xor_(RESULT, RESULT);
    xor_(METHOD_ADDRESS, METHOD_ADDRESS);
+    xor_(NEXT_PARAMETER, NEXT_PARAMETER);
    xor_(BRANCH_HOLDER, BRANCH_HOLDER);

-    mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
+    mov(dword[REGISTERS + 4], Compile_FetchParameter());

    // Track get register for zero registers and mark it as no-op
    optimizer.zero_reg_skip = true;
@@ -443,9 +446,6 @@ void MacroJITx64Impl::Compile() {
    // one if our register isn't "dirty"
    optimizer.optimize_for_method_move = true;

-    // Enable run-time assertions in JITted code
-    optimizer.enable_asserts = false;
-
    // Check to see if we can skip emitting certain instructions
    Optimizer_ScanFlags();

@@ -463,7 +463,7 @@ void MacroJITx64Impl::Compile() {

    L(end_of_code);

-    Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+    Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
    ret();
    ready();
    program = getCode<ProgramType>();
@@ -537,8 +537,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
 }

 Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
-    mov(eax, dword[PARAMETERS]);
-    add(PARAMETERS, sizeof(u32));
+    mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]);
+    inc(NEXT_PARAMETER);
    return eax;
 }

@@ -547,22 +547,41 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
        // Register 0 is always zero
        xor_(dst, dst);
    } else {
-        mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
+        mov(dst, dword[REGISTERS + index * sizeof(u32)]);
    }

    return dst;
 }

+Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
+    if (index == 0) {
+        // Register 0 is always zero
+        xor_(dst, dst);
+    } else {
+        mov(dst, dword[REGISTERS + index * sizeof(u32)]);
+    }
+
+    return dst;
+}
+
+void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
+    Xbyak::Label zero{}, end{};
+    xor_(ecx, ecx);
+    shr(dst, 32);
+    setne(cl);
+    mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
+}
+
 void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
-    const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
+    auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
        // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
        // register.
        if (reg == 0) {
            return;
        }
-        mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
+        mov(dword[REGISTERS + reg * sizeof(u32)], result);
    };
-    const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
+    auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };

    switch (operation) {
    case Macro::ResultOperation::IgnoreAndFetch:
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,6 +55,8 @@ private:

    Xbyak::Reg32 Compile_FetchParameter();
    Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
+    Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
+    void Compile_WriteCarry(Xbyak::Reg64 dst);

    void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
    void Compile_Send(Xbyak::Reg32 value);
@@ -65,10 +67,11 @@ private:
    struct JITState {
        Engines::Maxwell3D* maxwell3d{};
        std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
+        const u32* parameters{};
        u32 carry_flag{};
    };
    static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
-    using ProgramType = void (*)(JITState*, const u32*);
+    using ProgramType = void (*)(JITState*);

    struct OptimizerState {
        bool can_skip_carry{};
@@ -76,15 +79,14 @@ private:
        bool zero_reg_skip{};
        bool skip_dummy_addimmediate{};
        bool optimize_for_method_move{};
-        bool enable_asserts{};
    };
    OptimizerState optimizer{};

    std::optional<Macro::Opcode> next_opcode{};
    ProgramType program{nullptr};

-    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
-    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
+    std::array<Xbyak::Label, MAX_CODE_SIZE> labels{};
+    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip{};
    Xbyak::Label end_of_code{};

    bool is_delay_slot{};
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,11 +210,10 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
    return range == inner_size;
 }

-void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
-                              const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
    std::size_t remaining_size{size};
-    std::size_t page_index{gpu_src_addr >> page_bits};
-    std::size_t page_offset{gpu_src_addr & page_mask};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};

    auto& memory = system.Memory();

@@ -235,11 +234,11 @@ void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
    }
 }

-void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
+void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
                                    const std::size_t size) const {
    std::size_t remaining_size{size};
-    std::size_t page_index{gpu_src_addr >> page_bits};
-    std::size_t page_offset{gpu_src_addr & page_mask};
+    std::size_t page_index{src_addr >> page_bits};
+    std::size_t page_offset{src_addr & page_mask};

    auto& memory = system.Memory();

@@ -260,11 +259,10 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
    }
 }

-void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
-                               const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
    std::size_t remaining_size{size};
-    std::size_t page_index{gpu_dest_addr >> page_bits};
-    std::size_t page_offset{gpu_dest_addr & page_mask};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};

    auto& memory = system.Memory();

@@ -285,11 +283,11 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
    }
 }

-void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
+void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
                                     const std::size_t size) {
    std::size_t remaining_size{size};
-    std::size_t page_index{gpu_dest_addr >> page_bits};
-    std::size_t page_offset{gpu_dest_addr & page_mask};
+    std::size_t page_index{dest_addr >> page_bits};
+    std::size_t page_offset{dest_addr & page_mask};

    auto& memory = system.Memory();

@@ -308,18 +306,16 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
    }
 }

-void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
-                              const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
    std::vector<u8> tmp_buffer(size);
-    ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
-    WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
+    ReadBlock(src_addr, tmp_buffer.data(), size);
+    WriteBlock(dest_addr, tmp_buffer.data(), size);
 }

-void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
-                                    const std::size_t size) {
+void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
    std::vector<u8> tmp_buffer(size);
-    ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
-    WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
+    ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
+    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
 }

 bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
     * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
     * Flushes and Invalidations, respectively to each operation.
     */
-    void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
-    void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

    /**
     * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
     * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
     * being flushed.
     */
-    void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
-    void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
-    void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

    /**
     * IsGranularRange checks if a gpu region can be simply read with a pointer
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -220,8 +220,8 @@ private:
            return cache_begin < addr_end && addr_begin < cache_end;
        };

-        const u64 page_end = addr_end >> PAGE_BITS;
-        for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
            const auto& it = cached_queries.find(page);
            if (it == std::end(cached_queries)) {
                continue;
@@ -242,14 +242,14 @@ private:
    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
-        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
+        const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
                                                  host_ptr);
    }

    /// Tries to a get a cached query. Returns nullptr on failure.
    CachedQuery* TryGet(VAddr addr) {
-        const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
+        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
        const auto it = cached_queries.find(page);
        if (it == std::end(cached_queries)) {
            return nullptr;
@@ -268,7 +268,7 @@ private:
    }

    static constexpr std::uintptr_t PAGE_SIZE = 4096;
-    static constexpr unsigned PAGE_BITS = 12;
+    static constexpr unsigned PAGE_SHIFT = 12;

    Core::System& system;
    VideoCore::RasterizerInterface& rasterizer;
--- a/src/video_core/rasterizer_cache.cpp
+++ b/src/video_core/rasterizer_cache.cpp
@@ -0,0 +1,7 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/rasterizer_cache.h"
+
+RasterizerCacheObject::~RasterizerCacheObject() = default;
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -0,0 +1,253 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+#include <set>
+#include <unordered_map>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range_core.hpp>
+
+#include "common/common_types.h"
+#include "core/settings.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_interface.h"
+
+class RasterizerCacheObject {
+public:
+    explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
+
+    virtual ~RasterizerCacheObject();
+
+    VAddr GetCpuAddr() const {
+        return cpu_addr;
+    }
+
+    /// Gets the size of the shader in guest memory, required for cache management
+    virtual std::size_t GetSizeInBytes() const = 0;
+
+    /// Sets whether the cached object should be considered registered
+    void SetIsRegistered(bool registered) {
+        is_registered = registered;
+    }
+
+    /// Returns true if the cached object is registered
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+    /// Returns true if the cached object is dirty
+    bool IsDirty() const {
+        return is_dirty;
+    }
+
+    /// Returns ticks from when this cached object was last modified
+    u64 GetLastModifiedTicks() const {
+        return last_modified_ticks;
+    }
+
+    /// Marks an object as recently modified, used to specify whether it is clean or dirty
+    template <class T>
+    void MarkAsModified(bool dirty, T& cache) {
+        is_dirty = dirty;
+        last_modified_ticks = cache.GetModifiedTicks();
+    }
+
+    void SetMemoryMarked(bool is_memory_marked_) {
+        is_memory_marked = is_memory_marked_;
+    }
+
+    bool IsMemoryMarked() const {
+        return is_memory_marked;
+    }
+
+    void SetSyncPending(bool is_sync_pending_) {
+        is_sync_pending = is_sync_pending_;
+    }
+
+    bool IsSyncPending() const {
+        return is_sync_pending;
+    }
+
+private:
+    bool is_registered{};      ///< Whether the object is currently registered with the cache
+    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
+    bool is_memory_marked{};   ///< Whether the object is marking rasterizer memory.
+    bool is_sync_pending{};    ///< Whether the object is pending deletion.
+    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
+    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
+};
+
+template <class T>
+class RasterizerCache : NonCopyable {
+    friend class RasterizerCacheObject;
+
+public:
+    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+
+    /// Write any cached resources overlapping the specified region back to memory
+    void FlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
+        for (auto& object : objects) {
+            FlushObject(object);
+        }
+    }
+
+    /// Mark the specified region as being invalidated
+    void InvalidateRegion(VAddr addr, u64 size) {
+        std::lock_guard lock{mutex};
+
+        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
+        for (auto& object : objects) {
+            if (!object->IsRegistered()) {
+                // Skip duplicates
+                continue;
+            }
+            Unregister(object);
+        }
+    }
+
+    void OnCPUWrite(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
+            if (object->IsRegistered()) {
+                UnmarkMemory(object);
+                object->SetSyncPending(true);
+                marked_for_unregister.emplace_back(object);
+            }
+        }
+    }
+
+    void SyncGuestHost() {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : marked_for_unregister) {
+            if (object->IsRegistered()) {
+                object->SetSyncPending(false);
+                Unregister(object);
+            }
+        }
+        marked_for_unregister.clear();
+    }
+
+    /// Invalidates everything in the cache
+    void InvalidateAll() {
+        std::lock_guard lock{mutex};
+
+        while (interval_cache.begin() != interval_cache.end()) {
+            Unregister(*interval_cache.begin()->second.begin());
+        }
+    }
+
+protected:
+    /// Tries to get an object from the cache with the specified cache address
+    T TryGet(VAddr addr) const {
+        const auto iter = map_cache.find(addr);
+        if (iter != map_cache.end())
+            return iter->second;
+        return nullptr;
+    }
+
+    /// Register an object into the cache
+    virtual void Register(const T& object) {
+        std::lock_guard lock{mutex};
+
+        object->SetIsRegistered(true);
+        interval_cache.add({GetInterval(object), ObjectSet{object}});
+        map_cache.insert({object->GetCpuAddr(), object});
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
+        object->SetMemoryMarked(true);
+    }
+
+    /// Unregisters an object from the cache
+    virtual void Unregister(const T& object) {
+        std::lock_guard lock{mutex};
+
+        UnmarkMemory(object);
+        object->SetIsRegistered(false);
+        if (object->IsSyncPending()) {
+            marked_for_unregister.remove(object);
+            object->SetSyncPending(false);
+        }
+        const VAddr addr = object->GetCpuAddr();
+        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
+        map_cache.erase(addr);
+    }
+
+    void UnmarkMemory(const T& object) {
+        if (!object->IsMemoryMarked()) {
+            return;
+        }
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
+        object->SetMemoryMarked(false);
+    }
+
+    /// Returns a ticks counter used for tracking when cached objects were last modified
+    u64 GetModifiedTicks() {
+        std::lock_guard lock{mutex};
+
+        return ++modified_ticks;
+    }
+
+    virtual void FlushObjectInner(const T& object) = 0;
+
+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        std::lock_guard lock{mutex};
+
+        if (!object->IsDirty()) {
+            return;
+        }
+        FlushObjectInner(object);
+        object->MarkAsModified(false, *this);
+    }
+
+    std::recursive_mutex mutex;
+
+private:
+    /// Returns a list of cached objects from the specified memory region, ordered by access time
+    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
+        if (size == 0) {
+            return {};
+        }
+
+        std::vector<T> objects;
+        const ObjectInterval interval{addr, addr + size};
+        for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
+            for (auto& cached_object : pair.second) {
+                if (!cached_object) {
+                    continue;
+                }
+                objects.push_back(cached_object);
+            }
+        }
+
+        std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
+            return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
+        });
+
+        return objects;
+    }
+
+    using ObjectSet = std::set<T>;
+    using ObjectCache = std::unordered_map<VAddr, T>;
+    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+    using ObjectInterval = typename IntervalCache::interval_type;
+
+    static auto GetInterval(const T& object) {
+        return ObjectInterval::right_open(object->GetCpuAddr(),
+                                          object->GetCpuAddr() + object->GetSizeInBytes());
+    }
+
+    ObjectCache map_cache;
+    IntervalCache interval_cache; ///< Cache of objects
+    u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
+    VideoCore::RasterizerInterface& rasterizer;
+    std::list<T> marked_for_unregister;
+};
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                                    const VideoCommon::Shader::Registry& registry,
-                                    Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,46 +22,22 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));

-Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
+CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
    : VideoCommon::BufferBlock{cpu_addr, size} {
    gl_buffer.Create();
    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
-    if (device.HasVertexBufferUnifiedMemory()) {
-        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
-        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
-    }
 }

-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
-    glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
-                         data);
-}
-
-void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
-    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
-    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
-    glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
-                            data);
-}
-
-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                      std::size_t size) const {
-    glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
-                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
-}
+CachedBufferBlock::~CachedBufferBlock() = default;

 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
-                               const Device& device_, std::size_t stream_size)
-    : GenericBufferCache{rasterizer, system,
-                         std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
-      device{device_} {
+                               const Device& device, std::size_t stream_size)
+    : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
    if (!device.HasFastBufferSubData()) {
        return;
    }

-    static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+    static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
    glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
    for (const GLuint cbuf : cbufs) {
        glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -72,21 +48,44 @@ OGLBufferCache::~OGLBufferCache() {
    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
 }

-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, cpu_addr, size);
+Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
 }

-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
-    return {0, 0, 0};
+GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
+}
+
+GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
+    return 0;
+}
+
+void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                     const u8* data) {
+    glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
+                         static_cast<GLsizeiptr>(size), data);
+}
+
+void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                       u8* data) {
+    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+    glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
+                            static_cast<GLsizeiptr>(size), data);
+}
+
+void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                               std::size_t dst_offset, std::size_t size) {
+    glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
+                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
 }

 OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
                                                             std::size_t size) {
    DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
-    const GLuint cbuf = cbufs[cbuf_cursor++];
-
+    const GLuint& cbuf = cbufs[cbuf_cursor++];
    glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
-    return {cbuf, 0, 0};
+    return {cbuf, 0};
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"

@@ -23,57 +24,57 @@ class Device;
 class OGLStreamBuffer;
 class RasterizerOpenGL;

-class Buffer : public VideoCommon::BufferBlock {
+class CachedBufferBlock;
+
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
+
+class CachedBufferBlock : public VideoCommon::BufferBlock {
 public:
-    explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
-    ~Buffer();
+    explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
+    ~CachedBufferBlock();

-    void Upload(std::size_t offset, std::size_t size, const u8* data) const;
-
-    void Download(std::size_t offset, std::size_t size, u8* data) const;
-
-    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                  std::size_t size) const;
-
-    GLuint Handle() const noexcept {
+    GLuint GetHandle() const {
        return gl_buffer.handle;
    }

-    u64 Address() const noexcept {
-        return gpu_address;
-    }
-
 private:
    OGLBuffer gl_buffer;
-    u64 gpu_address = 0;
 };

-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
 class OGLBufferCache final : public GenericBufferCache {
 public:
    explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
                            const Device& device, std::size_t stream_size);
    ~OGLBufferCache();

-    BufferInfo GetEmptyBuffer(std::size_t) override;
+    GLuint GetEmptyBuffer(std::size_t) override;

    void Acquire() noexcept {
        cbuf_cursor = 0;
    }

 protected:
-    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
+
+    GLuint ToHandle(const Buffer& buffer) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;

    BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;

 private:
-    static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-                                             Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
-
-    const Device& device;
-
    std::size_t cbuf_cursor = 0;
-    std::array<GLuint, NUM_CBUFS> cbufs{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+                           Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
+        cbufs;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -123,24 +123,16 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
    u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
    u32 base_images = 0;

-    // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
-    // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
-    // fragment stage, and at least 1 for the rest of the stages.
-    // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
-    // Reserve at least 4 image bindings on the fragment stage.
+    // Reserve more image bindings on fragment and vertex stages.
    bindings[4].image =
-        Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
-
-    // This is guaranteed to be at least 1.
-    const u32 total_extracted_images = num_images / (NumStages - 1);
+        Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
+    bindings[0].image =
+        Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);

    // Reserve the other image bindings.
-    for (std::size_t i = 0; i < NumStages; ++i) {
+    const u32 total_extracted_images = num_images / (NumStages - 2);
+    for (std::size_t i = 2; i < NumStages; ++i) {
        const std::size_t stage = stage_swizzle[i];
-        if (stage == 4) {
-            continue;
-        }
        bindings[stage].image =
            Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
    }
@@ -193,7 +185,6 @@ bool IsASTCSupported() {
 Device::Device()
    : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
-    const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
    const std::vector extensions = GetExtensions();

@@ -217,21 +208,13 @@ Device::Device()
    has_shader_ballot = GLAD_GL_ARB_shader_ballot;
    has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
    has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
-    has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
    has_astc = IsASTCSupported();
    has_variable_aoffi = TestVariableAoffi();
    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
-    has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
-
-    // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
-    // uniform buffers as "push constants"
    has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
-
    use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
-                           GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
-                           GLAD_GL_NV_transform_feedback2;
+                           GLAD_GL_NV_compute_program5;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
    LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
@@ -252,7 +235,6 @@ Device::Device(std::nullptr_t) {
    has_shader_ballot = true;
    has_vertex_viewport_layer = true;
    has_image_load_formatted = true;
-    has_texture_shadow_lod = true;
    has_variable_aoffi = true;
 }

--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -68,14 +68,6 @@ public:
        return has_image_load_formatted;
    }

-    bool HasTextureShadowLod() const {
-        return has_texture_shadow_lod;
-    }
-
-    bool HasVertexBufferUnifiedMemory() const {
-        return has_vertex_buffer_unified_memory;
-    }
-
    bool HasASTC() const {
        return has_astc;
    }
@@ -96,10 +88,6 @@ public:
        return has_fast_buffer_sub_data;
    }

-    bool HasNvViewportArray2() const {
-        return has_nv_viewport_array2;
-    }
-
    bool UseAssemblyShaders() const {
        return use_assembly_shaders;
    }
@@ -118,14 +106,11 @@ private:
    bool has_shader_ballot{};
    bool has_vertex_viewport_layer{};
    bool has_image_load_formatted{};
-    bool has_texture_shadow_lod{};
-    bool has_vertex_buffer_unified_memory{};
    bool has_astc{};
    bool has_variable_aoffi{};
    bool has_component_indexing_bug{};
    bool has_precise_bug{};
    bool has_fast_buffer_sub_data{};
-    bool has_nv_viewport_array2{};
    bool use_assembly_shaders{};
 };

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,7 +30,6 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
-#include "video_core/shader_cache.h"

 namespace OpenGL {

@@ -61,28 +60,15 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
 constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
    NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;

-constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
+constexpr std::size_t NumSupportedVertexAttributes = 16;

 template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
                                               ShaderType shader_type, std::size_t index = 0) {
-    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
-        if (entry.is_separated) {
-            const u32 buffer_1 = entry.buffer;
-            const u32 buffer_2 = entry.secondary_buffer;
-            const u32 offset_1 = entry.offset;
-            const u32 offset_2 = entry.secondary_offset;
-            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
-            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
-            return engine.GetTextureInfo(handle_1 | handle_2);
-        }
-    }
    if (entry.is_bindless) {
-        const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
-        return engine.GetTextureInfo(handle);
+        const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+        return engine.GetTextureInfo(tex_handle);
    }
-
    const auto& gpu_profile = engine.AccessGuestDriverProfile();
    const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -107,34 +93,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
    return buffer.size;
 }

-/// Translates hardware transform feedback indices
-/// @param location Hardware location
-/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
-/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
-std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
-    const u8 index = location / 4;
-    if (index >= 8 && index <= 39) {
-        return {GL_GENERIC_ATTRIB_NV, index - 8};
-    }
-    if (index >= 48 && index <= 55) {
-        return {GL_TEXTURE_COORD_NV, index - 48};
-    }
-    switch (index) {
-    case 7:
-        return {GL_POSITION, 0};
-    case 40:
-        return {GL_PRIMARY_COLOR_NV, 0};
-    case 41:
-        return {GL_SECONDARY_COLOR_NV, 0};
-    case 42:
-        return {GL_BACK_PRIMARY_COLOR_NV, 0};
-    case 43:
-        return {GL_BACK_SECONDARY_COLOR_NV, 0};
-    }
-    UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
-    return {GL_POSITION, 0};
-}
-
 void oglEnable(GLenum cap, bool state) {
    (state ? glEnable : glDisable)(cap);
 }
@@ -194,7 +152,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
    // avoid OpenGL errors.
    // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
    // assume every shader uses them all.
-    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
+    for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
        if (!flags[Dirty::VertexFormat0 + index]) {
            continue;
        }
@@ -232,11 +190,9 @@ void RasterizerOpenGL::SetupVertexBuffer() {

    MICROPROFILE_SCOPE(OpenGL_VB);

-    const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
    // Upload all guest vertex arrays sequentially to our buffer
    const auto& regs = gpu.regs;
-    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
+    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
        if (!flags[Dirty::VertexBuffer0 + index]) {
            continue;
        }
@@ -249,25 +205,16 @@ void RasterizerOpenGL::SetupVertexBuffer() {

        const GPUVAddr start = vertex_array.StartAddress();
        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-        ASSERT(end >= start);

-        const GLuint gl_index = static_cast<GLuint>(index);
+        ASSERT(end >= start);
        const u64 size = end - start;
        if (size == 0) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            if (use_unified_memory) {
-                glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
-            }
+            glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
            continue;
        }
-        const auto info = buffer_cache.UploadMemory(start, size);
-        if (use_unified_memory) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
-                                   info.address + info.offset, size);
-        } else {
-            glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
-        }
+        const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
+        glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
+                           vertex_array.stride);
    }
 }

@@ -280,7 +227,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
    flags[Dirty::VertexInstances] = false;

    const auto& regs = gpu.regs;
-    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
+    for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
        if (!flags[Dirty::VertexInstance0 + index]) {
            continue;
        }
@@ -297,9 +244,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
    MICROPROFILE_SCOPE(OpenGL_Index);
    const auto& regs = system.GPU().Maxwell3D().regs;
    const std::size_t size = CalculateIndexBufferSize();
-    const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
-    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
-    return info.offset;
+    const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
+    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
+    return offset;
 }

 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -335,7 +282,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            continue;
        }

-        Shader* const shader = shader_cache.GetStageProgram(program);
+        Shader shader{shader_cache.GetStageProgram(program)};

        if (device.UseAssemblyShaders()) {
            // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -629,16 +576,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
                   (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());

    // Prepare the vertex array.
-    const bool invalidated = buffer_cache.Map(buffer_size);
-
-    if (invalidated) {
-        // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
-        auto& dirty = gpu.dirty.flags;
-        dirty[Dirty::VertexBuffers] = true;
-        for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
-            dirty[index] = true;
-        }
-    }
+    buffer_cache.Map(buffer_size);

    // Prepare vertex array format.
    SetupVertexFormat();
@@ -655,9 +593,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    if (!device.UseAssemblyShaders()) {
        MaxwellUniformData ubo;
        ubo.SetFromRegs(gpu);
-        const auto info =
+        const auto [buffer, offset] =
            buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
+        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
                          static_cast<GLsizeiptr>(sizeof(ubo)));
    }

@@ -904,7 +842,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
    return true;
 }

-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
    static constexpr std::array PARAMETER_LUT = {
        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -934,7 +872,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
    }
 }

-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
    MICROPROFILE_SCOPE(OpenGL_UBO);
    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
    const auto& entries = kernel->GetEntries();
@@ -968,7 +906,8 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
        if (device.UseAssemblyShaders()) {
            glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
        } else {
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
+            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+                              buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
        }
        return;
    }
@@ -981,29 +920,28 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,

    const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
    const GPUVAddr gpu_addr = buffer.address;
-    auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
+    auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);

    if (device.UseAssemblyShaders()) {
        UNIMPLEMENTED_IF(use_unified);
-        if (info.offset != 0) {
+        if (offset != 0) {
            const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
-            glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
-            info.handle = staging_cbuf;
-            info.offset = 0;
+            glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
+            cbuf = staging_cbuf;
+            offset = 0;
        }
-        glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
+        glBindBufferRangeNV(stage, binding, cbuf, offset, size);
        return;
    }

    if (use_unified) {
-        glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
-                                 unified_offset, size);
+        glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
    } else {
-        glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
    }
 }

-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
    auto& gpu{system.GPU()};
    auto& memory_manager{gpu.MemoryManager()};
    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -1018,7 +956,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
    }
 }

-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
+void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
    auto& gpu{system.GPU()};
    auto& memory_manager{gpu.MemoryManager()};
    const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -1035,12 +973,13 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
 void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
                                         GPUVAddr gpu_addr, std::size_t size) {
    const auto alignment{device.GetShaderStorageBufferAlignment()};
-    const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
-    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
+    const auto [ssbo, buffer_offset] =
+        buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
+    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
                      static_cast<GLsizeiptr>(size));
 }

-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
    const auto& maxwell3d = system.GPU().Maxwell3D();
    u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -1053,7 +992,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader
    }
 }

-void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
+void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
    const auto& compute = system.GPU().KeplerCompute();
    u32 binding = 0;
@@ -1082,7 +1021,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
    }
 }

-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
    const auto& maxwell3d = system.GPU().Maxwell3D();
    u32 binding = device.GetBaseBindings(stage_index).image;
    for (const auto& entry : shader->GetEntries().images) {
@@ -1092,7 +1031,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader)
    }
 }

-void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
+void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
    const auto& compute = system.GPU().KeplerCompute();
    u32 binding = 0;
    for (const auto& entry : shader->GetEntries().images) {
@@ -1608,70 +1547,12 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
    oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
 }

-void RasterizerOpenGL::SyncTransformFeedback() {
-    // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
-    // when this is required.
-    const auto& regs = system.GPU().Maxwell3D().regs;
-
-    static constexpr std::size_t STRIDE = 3;
-    std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
-    std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
-
-    GLint* cursor = attribs.data();
-    GLint* current_stream = streams.data();
-
-    for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
-        const auto& layout = regs.tfb_layouts[feedback];
-        UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
-        if (layout.varying_count == 0) {
-            continue;
-        }
-
-        *current_stream = static_cast<GLint>(feedback);
-        if (current_stream != streams.data()) {
-            // When stepping one stream, push the expected token
-            cursor[0] = GL_NEXT_BUFFER_NV;
-            cursor[1] = 0;
-            cursor[2] = 0;
-            cursor += STRIDE;
-        }
-        ++current_stream;
-
-        const auto& locations = regs.tfb_varying_locs[feedback];
-        std::optional<u8> current_index;
-        for (u32 offset = 0; offset < layout.varying_count; ++offset) {
-            const u8 location = locations[offset];
-            const u8 index = location / 4;
-
-            if (current_index == index) {
-                // Increase number of components of the previous attachment
-                ++cursor[-2];
-                continue;
-            }
-            current_index = index;
-
-            std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
-            cursor[1] = 1;
-            cursor += STRIDE;
-        }
-    }
-
-    const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
-    const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
-    glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
-                                       GL_INTERLEAVED_ATTRIBS);
-}
-
 void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
    const auto& regs = system.GPU().Maxwell3D().regs;
    if (regs.tfb_enabled == 0) {
        return;
    }

-    if (device.UseAssemblyShaders()) {
-        SyncTransformFeedback();
-    }
-
    UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1698,10 +1579,6 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
                          static_cast<GLsizeiptr>(size));
    }

-    // We may have to call BeginTransformFeedbackNV here since they seem to call different
-    // implementations on Nvidia's driver (the pointer is different) but we are using
-    // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
-    // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
    glBeginTransformFeedback(GL_POINTS);
 }

@@ -1723,9 +1600,8 @@ void RasterizerOpenGL::EndTransformFeedback() {
        const GLuint handle = transform_feedback_buffers[index].handle;
        const GPUVAddr gpu_addr = binding.Address();
        const std::size_t size = binding.buffer_size;
-        const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-        glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
-                                 static_cast<GLsizeiptr>(size));
+        const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+        glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
    }
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_accelerated.h"
+#include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
@@ -99,10 +100,10 @@ private:
    void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);

    /// Configures the current constbuffers to use for the draw command.
-    void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
+    void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);

    /// Configures the current constbuffers to use for the kernel invocation.
-    void SetupComputeConstBuffers(Shader* kernel);
+    void SetupComputeConstBuffers(const Shader& kernel);

    /// Configures a constant buffer.
    void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -110,30 +111,30 @@ private:
                          std::size_t unified_offset);

    /// Configures the current global memory entries to use for the draw command.
-    void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
+    void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);

    /// Configures the current global memory entries to use for the kernel invocation.
-    void SetupComputeGlobalMemory(Shader* kernel);
+    void SetupComputeGlobalMemory(const Shader& kernel);

    /// Configures a constant buffer.
    void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
                           std::size_t size);

    /// Configures the current textures to use for the draw command.
-    void SetupDrawTextures(std::size_t stage_index, Shader* shader);
+    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);

    /// Configures the textures used in a compute shader.
-    void SetupComputeTextures(Shader* kernel);
+    void SetupComputeTextures(const Shader& kernel);

    /// Configures a texture.
    void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
                      const SamplerEntry& entry);

    /// Configures images in a graphics shader.
-    void SetupDrawImages(std::size_t stage_index, Shader* shader);
+    void SetupDrawImages(std::size_t stage_index, const Shader& shader);

    /// Configures images in a compute shader.
-    void SetupComputeImages(Shader* shader);
+    void SetupComputeImages(const Shader& shader);

    /// Configures an image.
    void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -201,10 +202,6 @@ private:
    /// Syncs the framebuffer sRGB state to match the guest state
    void SyncFramebufferSRGB();

-    /// Syncs transform feedback state to match guest state
-    /// @note Only valid on assembly shaders
-    void SyncTransformFeedback();
-
    /// Begin a transform feedback
    void BeginTransformFeedback(GLenum primitive_mode);

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,7 +20,6 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -30,7 +29,6 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
-#include "video_core/shader_cache.h"

 namespace OpenGL {

@@ -149,8 +147,7 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
    auto program = std::make_shared<ProgramHandle>();

    if (device.UseAssemblyShaders()) {
-        const std::string arb =
-            DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
+        const std::string arb = "Not implemented";

        GLuint& arb_prog = program->assembly_program.handle;

@@ -197,9 +194,12 @@ std::unordered_set<GLenum> GetSupportedFormats() {

 } // Anonymous namespace

-Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
-               ProgramSharedPtr program_)
-    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
+CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
+                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
+                           ShaderEntries entries, ProgramSharedPtr program_)
+    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
+      size_in_bytes{size_in_bytes}, program{std::move(program_)} {
+    // Assign either the assembly program or source program. We can't have both.
    handle = program->assembly_program.handle;
    if (handle == 0) {
        handle = program->source_program.handle;
@@ -207,16 +207,16 @@ Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderE
    ASSERT(handle != 0);
 }

-Shader::~Shader() = default;
+CachedShader::~CachedShader() = default;

-GLuint Shader::GetHandle() const {
+GLuint CachedShader::GetHandle() const {
    DEBUG_ASSERT(registry->IsConsistent());
    return handle;
 }

-std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
-                                                      Maxwell::ShaderProgram program_type,
-                                                      ProgramCode code, ProgramCode code_b) {
+Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
+                                           Maxwell::ShaderProgram program_type, ProgramCode code,
+                                           ProgramCode code_b) {
    const auto shader_type = GetShaderType(program_type);
    const std::size_t size_in_bytes = code.size() * sizeof(u64);

@@ -241,12 +241,12 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& pa
    entry.bindless_samplers = registry->GetBindlessSamplers();
    params.disk_cache.SaveEntry(std::move(entry));

-    return std::unique_ptr<Shader>(new Shader(
-        std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
+    return std::shared_ptr<CachedShader>(
+        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
+                         MakeEntries(params.device, ir, shader_type), std::move(program)));
 }

-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
-                                                       ProgramCode code) {
+Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
    const std::size_t size_in_bytes = code.size() * sizeof(u64);

    auto& engine = params.system.GPU().KeplerCompute();
@@ -266,23 +266,23 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p
    entry.bindless_samplers = registry->GetBindlessSamplers();
    params.disk_cache.SaveEntry(std::move(entry));

-    return std::unique_ptr<Shader>(new Shader(std::move(registry),
-                                              MakeEntries(params.device, ir, ShaderType::Compute),
-                                              std::move(program)));
+    return std::shared_ptr<CachedShader>(
+        new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
+                         MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
 }

-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
-                                                const PrecompiledShader& precompiled_shader) {
-    return std::unique_ptr<Shader>(new Shader(
-        precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
+Shader CachedShader::CreateFromCache(const ShaderParameters& params,
+                                     const PrecompiledShader& precompiled_shader,
+                                     std::size_t size_in_bytes) {
+    return std::shared_ptr<CachedShader>(
+        new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
+                         precompiled_shader.entries, precompiled_shader.program));
 }

 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                     Core::Frontend::EmuWindow& emu_window, const Device& device)
-    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
-      emu_window{emu_window}, device{device}, disk_cache{system} {}
-
-ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
+    : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
+      disk_cache{system} {}

 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                      const VideoCore::DiskResourceLoadCallback& callback) {
@@ -436,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
    return program;
 }

-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
        return last_shaders[static_cast<std::size_t>(program)];
    }
@@ -446,7 +446,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {

    // Look up shader in the cache based on address
    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
-    if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
+    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
+    if (shader) {
        return last_shaders[static_cast<std::size_t>(program)] = shader;
    }

@@ -467,29 +468,30 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    const ShaderParameters params{system,    disk_cache, device,
                                  *cpu_addr, host_ptr,   unique_identifier};

-    std::unique_ptr<Shader> shader;
    const auto found = runtime_cache.find(unique_identifier);
    if (found == runtime_cache.end()) {
-        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
+        shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
+                                                     std::move(code_b));
    } else {
-        shader = Shader::CreateFromCache(params, found->second);
+        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
    }

-    Shader* const result = shader.get();
    if (cpu_addr) {
-        Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
+        Register(shader);
    } else {
-        null_shader = std::move(shader);
+        null_shader = shader;
    }

-    return last_shaders[static_cast<std::size_t>(program)] = result;
+    return last_shaders[static_cast<std::size_t>(program)] = shader;
 }

-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
    auto& memory_manager{system.GPU().MemoryManager()};
    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};

-    if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
+    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+    if (kernel) {
        return kernel;
    }

@@ -501,21 +503,20 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
    const ShaderParameters params{system,    disk_cache, device,
                                  *cpu_addr, host_ptr,   unique_identifier};

-    std::unique_ptr<Shader> kernel;
    const auto found = runtime_cache.find(unique_identifier);
    if (found == runtime_cache.end()) {
-        kernel = Shader::CreateKernelFromMemory(params, std::move(code));
+        kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
    } else {
-        kernel = Shader::CreateFromCache(params, found->second);
+        const std::size_t size_in_bytes = code.size() * sizeof(u64);
+        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
    }

-    Shader* const result = kernel.get();
    if (cpu_addr) {
-        Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
+        Register(kernel);
    } else {
-        null_kernel = std::move(kernel);
+        null_kernel = kernel;
    }
-    return result;
+    return kernel;
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@

 #include "common/common_types.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
-#include "video_core/shader_cache.h"

 namespace Core {
 class System;
@@ -35,10 +35,12 @@ class EmuWindow;

 namespace OpenGL {

+class CachedShader;
 class Device;
 class RasterizerOpenGL;
 struct UnspecializedShader;

+using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 struct ProgramHandle {
@@ -62,53 +64,62 @@ struct ShaderParameters {
    u64 unique_identifier;
 };

-class Shader final {
+class CachedShader final : public RasterizerCacheObject {
 public:
-    ~Shader();
+    ~CachedShader();

    /// Gets the GL program handle for the shader
    GLuint GetHandle() const;

+    /// Returns the size in bytes of the shader
+    std::size_t GetSizeInBytes() const override {
+        return size_in_bytes;
+    }
+
    /// Gets the shader entries for the shader
    const ShaderEntries& GetEntries() const {
        return entries;
    }

-    static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
-                                                         Maxwell::ShaderProgram program_type,
-                                                         ProgramCode program_code,
-                                                         ProgramCode program_code_b);
-    static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
-                                                          ProgramCode code);
+    static Shader CreateStageFromMemory(const ShaderParameters& params,
+                                        Maxwell::ShaderProgram program_type,
+                                        ProgramCode program_code, ProgramCode program_code_b);
+    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);

-    static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
-                                                   const PrecompiledShader& precompiled_shader);
+    static Shader CreateFromCache(const ShaderParameters& params,
+                                  const PrecompiledShader& precompiled_shader,
+                                  std::size_t size_in_bytes);

 private:
-    explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
-                    ProgramSharedPtr program);
+    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
+                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
+                          ShaderEntries entries, ProgramSharedPtr program);

    std::shared_ptr<VideoCommon::Shader::Registry> registry;
    ShaderEntries entries;
+    std::size_t size_in_bytes = 0;
    ProgramSharedPtr program;
    GLuint handle = 0;
 };

-class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
+class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
 public:
    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                               Core::Frontend::EmuWindow& emu_window, const Device& device);
-    ~ShaderCacheOpenGL() override;

    /// Loads disk cache for the current game
    void LoadDiskCache(const std::atomic_bool& stop_loading,
                       const VideoCore::DiskResourceLoadCallback& callback);

    /// Gets the current specified shader stage program
-    Shader* GetStageProgram(Maxwell::ShaderProgram program);
+    Shader GetStageProgram(Maxwell::ShaderProgram program);

    /// Gets a compute kernel in the passed address
-    Shader* GetComputeKernel(GPUVAddr code_addr);
+    Shader GetComputeKernel(GPUVAddr code_addr);
+
+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const Shader& object) override {}

 private:
    ProgramSharedPtr GeneratePrecompiledProgram(
@@ -121,10 +132,10 @@ private:
    ShaderDiskCacheOpenGL disk_cache;
    std::unordered_map<u64, PrecompiledShader> runtime_cache;

-    std::unique_ptr<Shader> null_shader;
-    std::unique_ptr<Shader> null_kernel;
+    Shader null_shader{};
+    Shader null_kernel{};

-    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,7 +37,6 @@ using Tegra::Shader::IpaMode;
 using Tegra::Shader::IpaSampleMode;
 using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
 using VideoCommon::Shader::BuildTransformFeedback;
 using VideoCommon::Shader::Registry;

@@ -527,9 +526,6 @@ private:
        if (device.HasImageLoadFormatted()) {
            code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
        }
-        if (device.HasTextureShadowLod()) {
-            code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
-        }
        if (device.HasWarpIntrinsics()) {
            code.AddLine("#extension GL_NV_gpu_shader5 : require");
            code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -913,13 +909,13 @@ private:
                    return "samplerBuffer";
                }
                switch (sampler.type) {
-                case TextureType::Texture1D:
+                case Tegra::Shader::TextureType::Texture1D:
                    return "sampler1D";
-                case TextureType::Texture2D:
+                case Tegra::Shader::TextureType::Texture2D:
                    return "sampler2D";
-                case TextureType::Texture3D:
+                case Tegra::Shader::TextureType::Texture3D:
                    return "sampler3D";
-                case TextureType::TextureCube:
+                case Tegra::Shader::TextureType::TextureCube:
                    return "samplerCube";
                default:
                    UNREACHABLE();
@@ -1384,19 +1380,8 @@ private:
        const std::size_t count = operation.GetOperandsCount();
        const bool has_array = meta->sampler.is_array;
        const bool has_shadow = meta->sampler.is_shadow;
-        const bool workaround_lod_array_shadow_as_grad =
-            !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
-            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
-             meta->sampler.type == TextureType::TextureCube);
-
-        std::string expr = "texture";
-
-        if (workaround_lod_array_shadow_as_grad) {
-            expr += "Grad";
-        } else {
-            expr += function_suffix;
-        }

+        std::string expr = "texture" + function_suffix;
        if (!meta->aoffi.empty()) {
            expr += "Offset";
        } else if (!meta->ptp.empty()) {
@@ -1430,16 +1415,6 @@ private:
            expr += ')';
        }

-        if (workaround_lod_array_shadow_as_grad) {
-            switch (meta->sampler.type) {
-            case TextureType::Texture2D:
-                return expr + ", vec2(0.0), vec2(0.0))";
-            case TextureType::TextureCube:
-                return expr + ", vec3(0.0), vec3(0.0))";
-            }
-            UNREACHABLE();
-        }
-
        for (const auto& variant : extras) {
            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
                expr += GenerateTextureArgument(*argument);
@@ -2066,19 +2041,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr{};
-
-        if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
-            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
-             meta->sampler.type == TextureType::TextureCube)) {
-            LOG_ERROR(Render_OpenGL,
-                      "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
-            expr = GenerateTexture(operation, "Lod", {});
-        } else {
-            expr = GenerateTexture(operation, "Lod",
-                                   {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
-        }
-
+        std::string expr = GenerateTexture(
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
        if (meta->sampler.is_shadow) {
            expr = "vec4(" + expr + ')';
        }
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,8 +29,6 @@ using VideoCommon::Shader::KeyMap;

 namespace {

-using VideoCommon::Shader::SeparateSamplerKey;
-
 using ShaderCacheVersionHash = std::array<u8, 64>;

 struct ConstBufferKey {
@@ -39,26 +37,18 @@ struct ConstBufferKey {
    u32 value = 0;
 };

-struct BoundSamplerEntry {
+struct BoundSamplerKey {
    u32 offset = 0;
    Tegra::Engines::SamplerDescriptor sampler;
 };

-struct SeparateSamplerEntry {
-    u32 cbuf1 = 0;
-    u32 cbuf2 = 0;
-    u32 offset1 = 0;
-    u32 offset2 = 0;
-    Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
+struct BindlessSamplerKey {
    u32 cbuf = 0;
    u32 offset = 0;
    Tegra::Engines::SamplerDescriptor sampler;
 };

-constexpr u32 NativeVersion = 21;
+constexpr u32 NativeVersion = 20;

 ShaderCacheVersionHash GetShaderCacheVersionHash() {
    ShaderCacheVersionHash hash{};
@@ -97,14 +87,12 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
    u32 texture_handler_size_value;
    u32 num_keys;
    u32 num_bound_samplers;
-    u32 num_separate_samplers;
    u32 num_bindless_samplers;
    if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
        file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
        file.ReadArray(&texture_handler_size_value, 1) != 1 ||
        file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
        file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
-        file.ReadArray(&num_separate_samplers, 1) != 1 ||
        file.ReadArray(&num_bindless_samplers, 1) != 1) {
        return false;
    }
@@ -113,32 +101,23 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
    }

    std::vector<ConstBufferKey> flat_keys(num_keys);
-    std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
-    std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
-    std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
+    std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
+    std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
    if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
        file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
            flat_bound_samplers.size() ||
-        file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
-            flat_separate_samplers.size() ||
        file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
            flat_bindless_samplers.size()) {
        return false;
    }
-    for (const auto& entry : flat_keys) {
-        keys.insert({{entry.cbuf, entry.offset}, entry.value});
+    for (const auto& key : flat_keys) {
+        keys.insert({{key.cbuf, key.offset}, key.value});
    }
-    for (const auto& entry : flat_bound_samplers) {
-        bound_samplers.emplace(entry.offset, entry.sampler);
+    for (const auto& key : flat_bound_samplers) {
+        bound_samplers.emplace(key.offset, key.sampler);
    }
-    for (const auto& entry : flat_separate_samplers) {
-        SeparateSamplerKey key;
-        key.buffers = {entry.cbuf1, entry.cbuf2};
-        key.offsets = {entry.offset1, entry.offset2};
-        separate_samplers.emplace(key, entry.sampler);
-    }
-    for (const auto& entry : flat_bindless_samplers) {
-        bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
+    for (const auto& key : flat_bindless_samplers) {
+        bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
    }

    return true;
@@ -163,7 +142,6 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
        file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
        file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
        file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
-        file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
        file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
        return false;
    }
@@ -174,34 +152,22 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
        flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
    }

-    std::vector<BoundSamplerEntry> flat_bound_samplers;
+    std::vector<BoundSamplerKey> flat_bound_samplers;
    flat_bound_samplers.reserve(bound_samplers.size());
    for (const auto& [address, sampler] : bound_samplers) {
-        flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
+        flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
    }

-    std::vector<SeparateSamplerEntry> flat_separate_samplers;
-    flat_separate_samplers.reserve(separate_samplers.size());
-    for (const auto& [key, sampler] : separate_samplers) {
-        SeparateSamplerEntry entry;
-        std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
-        std::tie(entry.offset1, entry.offset2) = key.offsets;
-        entry.sampler = sampler;
-        flat_separate_samplers.push_back(entry);
-    }
-
-    std::vector<BindlessSamplerEntry> flat_bindless_samplers;
+    std::vector<BindlessSamplerKey> flat_bindless_samplers;
    flat_bindless_samplers.reserve(bindless_samplers.size());
    for (const auto& [address, sampler] : bindless_samplers) {
        flat_bindless_samplers.push_back(
-            BindlessSamplerEntry{address.first, address.second, sampler});
+            BindlessSamplerKey{address.first, address.second, sampler});
    }

    return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
           file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
               flat_bound_samplers.size() &&
-           file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
-               flat_separate_samplers.size() &&
           file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
               flat_bindless_samplers.size();
 }
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,7 +57,6 @@ struct ShaderDiskCacheEntry {
    VideoCommon::Shader::ComputeInfo compute_info;
    VideoCommon::Shader::KeyMap keys;
    VideoCommon::Shader::BoundSamplerMap bound_samplers;
-    VideoCommon::Shader::SeparateSamplerMap separate_samplers;
    VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
 };

--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,13 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <tuple>
+#include <deque>
 #include <vector>
-
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"

 MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,7 +14,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

 namespace OpenGL {

-OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
+OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
+                                 bool use_persistent)
    : buffer_size(size) {
    gl_buffer.Create();

@@ -30,22 +29,34 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
        allocate_size *= 2;
    }

-    static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
-    glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
-    mapped_ptr = static_cast<u8*>(
-        glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
-
-    if (device.HasVertexBufferUnifiedMemory()) {
-        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
-        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+    if (use_persistent) {
+        persistent = true;
+        coherent = prefer_coherent;
+        const GLbitfield flags =
+            GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
+        glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
+        mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
+            gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
+    } else {
+        glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
    }
 }

 OGLStreamBuffer::~OGLStreamBuffer() {
-    glUnmapNamedBuffer(gl_buffer.handle);
+    if (persistent) {
+        glUnmapNamedBuffer(gl_buffer.handle);
+    }
    gl_buffer.Release();
 }

+GLuint OGLStreamBuffer::GetHandle() const {
+    return gl_buffer.handle;
+}
+
+GLsizeiptr OGLStreamBuffer::GetSize() const {
+    return buffer_size;
+}
+
 std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
    ASSERT(size <= buffer_size);
    ASSERT(alignment <= buffer_size);
@@ -57,21 +68,36 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a

    bool invalidate = false;
    if (buffer_pos + size > buffer_size) {
-        MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
-        glInvalidateBufferData(gl_buffer.handle);
-
        buffer_pos = 0;
        invalidate = true;
+
+        if (persistent) {
+            glUnmapNamedBuffer(gl_buffer.handle);
+        }
    }

-    return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
+    if (invalidate || !persistent) {
+        MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
+        GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
+                           (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
+                           (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
+        mapped_ptr = static_cast<u8*>(
+            glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
+        mapped_offset = buffer_pos;
+    }
+
+    return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
 }

 void OGLStreamBuffer::Unmap(GLsizeiptr size) {
    ASSERT(size <= mapped_size);

-    if (size > 0) {
-        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
+    if (!coherent && size > 0) {
+        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
+    }
+
+    if (!persistent) {
+        glUnmapNamedBuffer(gl_buffer.handle);
    }

    buffer_pos += size;
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,13 +11,15 @@

 namespace OpenGL {

-class Device;
-
 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
+    explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
+                             bool use_persistent = true);
    ~OGLStreamBuffer();

+    GLuint GetHandle() const;
+    GLsizeiptr GetSize() const;
+
    /*
     * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
     * and the optional alignment requirement.
@@ -30,24 +32,15 @@ public:

    void Unmap(GLsizeiptr size);

-    GLuint Handle() const {
-        return gl_buffer.handle;
-    }
-
-    u64 Address() const {
-        return gpu_address;
-    }
-
-    GLsizeiptr Size() const noexcept {
-        return buffer_size;
-    }
-
 private:
    OGLBuffer gl_buffer;

-    GLuint64EXT gpu_address = 0;
+    bool coherent = false;
+    bool persistent = false;
+
    GLintptr buffer_pos = 0;
    GLsizeiptr buffer_size = 0;
+    GLintptr mapped_offset = 0;
    GLsizeiptr mapped_size = 0;
    u8* mapped_ptr = nullptr;
 };
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -263,14 +263,9 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
    target = GetTextureTarget(params.target);
    texture = CreateTexture(params, target, internal_format, texture_buffer);
    DecorateSurfaceName();
-
-    u32 num_layers = 1;
-    if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
-        num_layers = params.depth;
-    }
-
-    main_view =
-        CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
+    main_view = CreateViewInner(
+        ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
+        true);
 }

 CachedSurface::~CachedSurface() = default;
@@ -418,23 +413,20 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p

 CachedSurfaceView::~CachedSurfaceView() = default;

-void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
+void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
    ASSERT(params.num_levels == 1);

-    if (params.target == SurfaceTarget::Texture3D) {
-        if (params.num_layers > 1) {
-            ASSERT(params.base_layer == 0);
-            glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
-        } else {
-            glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
-                                   params.base_level, params.base_layer);
-        }
-        return;
-    }
-
    if (params.num_layers > 1) {
+        // Layered framebuffer attachments
        UNIMPLEMENTED_IF(params.base_layer != 0);
-        glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
+
+        switch (params.target) {
+        case SurfaceTarget::Texture2DArray:
+            glFramebufferTexture(target, attachment, GetTexture(), 0);
+            break;
+        default:
+            UNIMPLEMENTED();
+        }
        return;
    }

@@ -442,16 +434,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
    const GLuint texture = surface.GetTexture();
    switch (surface.GetSurfaceParams().target) {
    case SurfaceTarget::Texture1D:
-        glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
+        glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
        break;
    case SurfaceTarget::Texture2D:
-        glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
+        glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
        break;
    case SurfaceTarget::Texture1DArray:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::TextureCubeArray:
-        glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
+        glFramebufferTextureLayer(target, attachment, texture, params.base_level,
                                  params.base_layer);
        break;
    default:
@@ -508,13 +500,8 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
    OGLTextureView texture_view;
    texture_view.Create();

-    if (target == GL_TEXTURE_3D) {
-        glTextureView(texture_view.handle, target, surface.texture.handle, format,
-                      params.base_level, params.num_levels, 0, 1);
-    } else {
-        glTextureView(texture_view.handle, target, surface.texture.handle, format,
-                      params.base_level, params.num_levels, params.base_layer, params.num_layers);
-    }
+    glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
+                  params.num_levels, params.base_layer, params.num_layers);
    ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);

    return texture_view;
@@ -557,8 +544,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
                                   const Tegra::Engines::Fermi2D::Config& copy_config) {
    const auto& src_params{src_view->GetSurfaceParams()};
    const auto& dst_params{dst_view->GetSurfaceParams()};
-    UNIMPLEMENTED_IF(src_params.depth != 1);
-    UNIMPLEMENTED_IF(dst_params.depth != 1);
+    UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
+    UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);

    state_tracker.NotifyScissor0();
    state_tracker.NotifyFramebuffer();
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,10 +80,8 @@ public:
    explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
    ~CachedSurfaceView();

-    /// @brief Attaches this texture view to the currently bound fb_target framebuffer
-    /// @param attachment   Attachment to bind textures to
-    /// @param fb_target    Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
-    void Attach(GLenum attachment, GLenum fb_target) const;
+    /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
+    void Attach(GLenum attachment, GLenum target) const;

    GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
                      Tegra::Texture::SwizzleSource y_source,
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -46,8 +46,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
            return GL_UNSIGNED_INT;
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_UNSIGNED_INT_2_10_10_10_REV;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
        }
-        break;
    case Maxwell::VertexAttribute::Type::SignedInt:
    case Maxwell::VertexAttribute::Type::SignedNorm:
        switch (attrib.size) {
@@ -68,8 +70,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
            return GL_INT;
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_INT_2_10_10_10_REV;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
        }
-        break;
    case Maxwell::VertexAttribute::Type::Float:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_16:
@@ -82,8 +86,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_32_32_32:
        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
            return GL_FLOAT;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
        }
-        break;
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8:
@@ -96,8 +102,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_16_16_16:
        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
            return GL_UNSIGNED_SHORT;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
        }
-        break;
    case Maxwell::VertexAttribute::Type::SignedScaled:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8:
@@ -110,12 +118,14 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_16_16_16:
        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
            return GL_SHORT;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
        }
-        break;
+    default:
+        LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+        return {};
    }
-    UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
-                      attrib.SizeString());
-    return {};
 }

 inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -127,7 +137,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
    case Maxwell::IndexFormat::UnsignedInt:
        return GL_UNSIGNED_INT;
    }
-    UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
+    LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
+    UNREACHABLE();
    return {};
 }

@@ -169,20 +180,10 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
 }

 inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
-                                Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
+                                Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
    switch (filter_mode) {
-    case Tegra::Texture::TextureFilter::Nearest:
-        switch (mipmap_filter_mode) {
-        case Tegra::Texture::TextureMipmapFilter::None:
-            return GL_NEAREST;
-        case Tegra::Texture::TextureMipmapFilter::Nearest:
-            return GL_NEAREST_MIPMAP_NEAREST;
-        case Tegra::Texture::TextureMipmapFilter::Linear:
-            return GL_NEAREST_MIPMAP_LINEAR;
-        }
-        break;
-    case Tegra::Texture::TextureFilter::Linear:
-        switch (mipmap_filter_mode) {
+    case Tegra::Texture::TextureFilter::Linear: {
+        switch (mip_filter_mode) {
        case Tegra::Texture::TextureMipmapFilter::None:
            return GL_LINEAR;
        case Tegra::Texture::TextureMipmapFilter::Nearest:
@@ -192,9 +193,20 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
        }
        break;
    }
-    UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
-                    static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
-    return GL_NEAREST;
+    case Tegra::Texture::TextureFilter::Nearest: {
+        switch (mip_filter_mode) {
+        case Tegra::Texture::TextureMipmapFilter::None:
+            return GL_NEAREST;
+        case Tegra::Texture::TextureMipmapFilter::Nearest:
+            return GL_NEAREST_MIPMAP_NEAREST;
+        case Tegra::Texture::TextureMipmapFilter::Linear:
+            return GL_NEAREST_MIPMAP_LINEAR;
+        }
+        break;
+    }
+    }
+    LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
+    return GL_LINEAR;
 }

 inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -217,9 +229,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
        } else {
            return GL_MIRROR_CLAMP_TO_EDGE;
        }
+    default:
+        LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+        return GL_REPEAT;
    }
-    UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
-    return GL_REPEAT;
 }

 inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -241,7 +254,8 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
    case Tegra::Texture::DepthCompareFunc::Always:
        return GL_ALWAYS;
    }
-    UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
+    LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
+              static_cast<u32>(func));
    return GL_GREATER;
 }

@@ -263,7 +277,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
    case Maxwell::Blend::Equation::MaxGL:
        return GL_MAX;
    }
-    UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+    LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
    return GL_FUNC_ADD;
 }

@@ -327,7 +341,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
    case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
        return GL_ONE_MINUS_CONSTANT_ALPHA;
    }
-    UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+    LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
    return GL_ZERO;
 }

@@ -347,7 +361,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
    case Tegra::Texture::SwizzleSource::OneFloat:
        return GL_ONE;
    }
-    UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
+    LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
    return GL_ZERO;
 }

@@ -378,7 +392,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
    case Maxwell::ComparisonOp::AlwaysOld:
        return GL_ALWAYS;
    }
-    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+    LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
    return GL_ALWAYS;
 }

@@ -409,7 +423,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
    case Maxwell::StencilOp::DecrWrapOGL:
        return GL_DECR_WRAP;
    }
-    UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
+    LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
    return GL_KEEP;
 }

@@ -420,7 +434,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
    case Maxwell::FrontFace::CounterClockWise:
        return GL_CCW;
    }
-    UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
+    LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
    return GL_CCW;
 }

@@ -433,7 +447,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
    case Maxwell::CullFace::FrontAndBack:
        return GL_FRONT_AND_BACK;
    }
-    UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+    LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
    return GL_BACK;
 }

@@ -472,7 +486,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
    case Maxwell::LogicOperation::Set:
        return GL_SET;
    }
-    UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
+    LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
    return GL_COPY;
 }

--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -488,15 +488,6 @@ void RendererOpenGL::InitOpenGLObjects() {

    // Clear screen to black
    LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
-
-    // Enable unified vertex attributes and query vertex buffer address when the driver supports it
-    if (device.HasVertexBufferUnifiedMemory()) {
-        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
-
-        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
-        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
-                                         &vertex_buffer_address);
-    }
 }

 void RendererOpenGL::AddTelemetryFields() {
@@ -665,13 +656,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
                         offsetof(ScreenRectVertex, tex_coord));
    glVertexAttribBinding(PositionLocation, 0);
    glVertexAttribBinding(TexCoordLocation, 0);
-    if (device.HasVertexBufferUnifiedMemory()) {
-        glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
-        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
-                               sizeof(vertices));
-    } else {
-        glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
-    }
+    glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));

    glBindTextureUnit(0, screen_info.display_texture);
    glBindSampler(0, 0);
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -107,9 +107,6 @@ private:
    OGLPipeline pipeline;
    OGLFramebuffer screenshot_framebuffer;

-    // GPU address of the vertex buffer
-    GLuint64EXT vertex_buffer_address = 0;
-
    /// Display information for Switch screen
    ScreenInfo screen_info;

--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {

 VkFilter Filter(Tegra::Texture::TextureFilter filter) {
    switch (filter) {
-    case Tegra::Texture::TextureFilter::Nearest:
-        return VK_FILTER_NEAREST;
    case Tegra::Texture::TextureFilter::Linear:
        return VK_FILTER_LINEAR;
+    case Tegra::Texture::TextureFilter::Nearest:
+        return VK_FILTER_NEAREST;
    }
-    UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
+    UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
    return {};
 }

 VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
    switch (mipmap_filter) {
    case Tegra::Texture::TextureMipmapFilter::None:
-        // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
-        // of GL_LINEAR or GL_NEAREST, but they can be emulated using
-        // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
-        // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
-        return VK_SAMPLER_MIPMAP_MODE_NEAREST;
-    case Tegra::Texture::TextureMipmapFilter::Nearest:
-        return VK_SAMPLER_MIPMAP_MODE_NEAREST;
+        // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
+        // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
+        // use an image view with a single mipmap level to emulate this.
+        return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+        ;
    case Tegra::Texture::TextureMipmapFilter::Linear:
        return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+    case Tegra::Texture::TextureMipmapFilter::Nearest:
+        return VK_SAMPLER_MIPMAP_MODE_NEAREST;
    }
-    UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+    UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
    return {};
 }

@@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
    case Tegra::Texture::WrapMode::MirrorOnceBorder:
        UNIMPLEMENTED();
        return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+        return {};
    }
-    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
-    return {};
 }

 VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -287,9 +288,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
    case Maxwell::PrimitiveTopology::Patches:
        return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+        return {};
    }
-    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
-    return {};
 }

 VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,7 +13,6 @@
 #include <fmt/format.h>

 #include "common/dynamic_library.h"
-#include "common/file_util.h"
 #include "common/logging/log.h"
 #include "common/telemetry.h"
 #include "core/core.h"
@@ -77,8 +76,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
    char* libvulkan_env = getenv("LIBVULKAN_PATH");
    if (!libvulkan_env || !library.Open(libvulkan_env)) {
        // Use the libvulkan.dylib from the application bundle.
-        const std::string filename =
-            FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+        std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
        library.Open(filename.c_str());
    }
 #else
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch

 } // Anonymous namespace

-Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
-               VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
-    : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     VAddr cpu_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cpu_addr, size} {
    VkBufferCreateInfo ci;
    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
    ci.pNext = nullptr;
@@ -54,17 +54,46 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKSchedu
    buffer.commit = memory_manager.Commit(buffer.handle, false);
 }

-Buffer::~Buffer() = default;
+CachedBufferBlock::~CachedBufferBlock() = default;

-void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
+                                                                 CreateStreamBuffer(device,
+                                                                                    scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
+}
+
+VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
+}
+
+VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
+        cmdbuf.FillBuffer(buffer, 0, size, 0);
+    });
+    return *empty.handle;
+}
+
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
    std::memcpy(staging.commit->Map(size), data, size);

    scheduler.RequestOutsideRenderPassOperationContext();
-
-    const VkBuffer handle = Handle();
-    scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
-        cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
+    scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
+                      size](vk::CommandBuffer cmdbuf) {
+        cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});

        VkBufferMemoryBarrier barrier;
        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -73,7 +102,7 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const
        barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = handle;
+        barrier.buffer = buffer;
        barrier.offset = offset;
        barrier.size = size;
        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -81,12 +110,12 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const
    });
 }

-void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
    scheduler.RequestOutsideRenderPassOperationContext();
-
-    const VkBuffer handle = Handle();
-    scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
+                      size](vk::CommandBuffer cmdbuf) {
        VkBufferMemoryBarrier barrier;
        barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
        barrier.pNext = nullptr;
@@ -94,7 +123,7 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
        barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
        barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
        barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
-        barrier.buffer = handle;
+        barrier.buffer = buffer;
        barrier.offset = offset;
        barrier.size = size;

@@ -102,20 +131,18 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
                                   VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
                                   VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
                               VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
-        cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
+        cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
    });
    scheduler.Finish();

    std::memcpy(data, staging.commit->Map(size), size);
 }

-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                      std::size_t size) const {
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
    scheduler.RequestOutsideRenderPassOperationContext();
-
-    const VkBuffer dst_buffer = Handle();
-    scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
-                      size](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
+                      dst_offset, size](vk::CommandBuffer cmdbuf) {
        cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});

        std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -142,30 +169,4 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
    });
 }

-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
-                             const VKDevice& device, VKMemoryManager& memory_manager,
-                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
-    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
-                                                                 CreateStreamBuffer(device,
-                                                                                    scheduler)},
-      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
-                                                                                staging_pool} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
-                                    size);
-}
-
-VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
-    size = std::max(size, std::size_t(4));
-    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
-    scheduler.RequestOutsideRenderPassOperationContext();
-    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
-        cmdbuf.FillBuffer(buffer, 0, size, 0);
-    });
-    return {*empty.handle, 0, 0};
-}
-
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,6 +8,7 @@

 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -23,34 +24,22 @@ class VKDevice;
 class VKMemoryManager;
 class VKScheduler;

-class Buffer final : public VideoCommon::BufferBlock {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
-                    VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
-    ~Buffer();
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VAddr cpu_addr, std::size_t size);
+    ~CachedBufferBlock();

-    void Upload(std::size_t offset, std::size_t size, const u8* data) const;
-
-    void Download(std::size_t offset, std::size_t size, u8* data) const;
-
-    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                  std::size_t size) const;
-
-    VkBuffer Handle() const {
+    VkBuffer GetHandle() const {
        return *buffer.handle;
    }

-    u64 Address() const {
-        return 0;
-    }
-
 private:
-    VKScheduler& scheduler;
-    VKStagingBufferPool& staging_pool;
-
    VKBuffer buffer;
 };

+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
 class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
 public:
    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -58,10 +47,21 @@ public:
                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
    ~VKBufferCache();

-    BufferInfo GetEmptyBuffer(std::size_t size) override;
+    VkBuffer GetEmptyBuffer(std::size_t size) override;

 protected:
-    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    VkBuffer ToHandle(const Buffer& buffer) override;
+
+    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;

 private:
    const VKDevice& device;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,7 +27,6 @@
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
-#include "video_core/shader_cache.h"

 namespace Vulkan {

@@ -133,18 +132,19 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
    return std::memcmp(&rhs, this, sizeof *this) == 0;
 }

-Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-               VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
-    : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
+                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
+                           u32 main_offset)
+    : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
      registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
                                                           compiler_settings, registry},
      entries{GenerateShaderEntries(shader_ir)} {}

-Shader::~Shader() = default;
+CachedShader::~CachedShader() = default;

-Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
-                                                              Tegra::Engines::ShaderType stage) {
-    if (stage == ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
+    Core::System& system, Tegra::Engines::ShaderType stage) {
+    if (stage == Tegra::Engines::ShaderType::Compute) {
        return system.GPU().KeplerCompute();
    } else {
        return system.GPU().Maxwell3D();
@@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
                                 VKDescriptorPool& descriptor_pool,
                                 VKUpdateDescriptorQueue& update_descriptor_queue,
                                 VKRenderPassCache& renderpass_cache)
-    : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
-      scheduler{scheduler}, descriptor_pool{descriptor_pool},
-      update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
+    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
+      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
+      renderpass_cache{renderpass_cache} {}

 VKPipelineCache::~VKPipelineCache() = default;

-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
    const auto& gpu = system.GPU().Maxwell3D();

-    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
+    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const auto program{static_cast<Maxwell::ShaderProgram>(index)};

@@ -178,28 +178,24 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
        const GPUVAddr program_addr{GetShaderAddress(system, program)};
        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
        ASSERT(cpu_addr);
-
-        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
-        if (!result) {
+        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
+        if (!shader) {
            const auto host_ptr{memory_manager.GetPointer(program_addr)};

            // No shader found - create a new one
            constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
-            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
+            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
            ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
-            const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
-            auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
-                                                   stage_offset);
-            result = shader.get();

+            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
+                                                    std::move(code), stage_offset);
            if (cpu_addr) {
-                Register(std::move(shader), *cpu_addr, size_in_bytes);
+                Register(shader);
            } else {
-                null_shader = std::move(shader);
+                null_shader = shader;
            }
        }
-        shaders[index] = result;
+        shaders[index] = std::move(shader);
    }
    return last_shaders = shaders;
 }
@@ -240,22 +236,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
    ASSERT(cpu_addr);

-    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
+    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
    if (!shader) {
        // No shader found - create a new one
        const auto host_ptr = memory_manager.GetPointer(program_addr);

        ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
-        const std::size_t size_in_bytes = code.size() * sizeof(u64);
-
-        auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
-                                                    std::move(code), KERNEL_MAIN_OFFSET);
-        shader = shader_info.get();
-
+        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
+                                                program_addr, *cpu_addr, std::move(code),
+                                                KERNEL_MAIN_OFFSET);
        if (cpu_addr) {
-            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
+            Register(shader);
        } else {
-            null_kernel = std::move(shader_info);
+            null_kernel = shader;
        }
    }

@@ -271,7 +264,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
    return *entry;
 }

-void VKPipelineCache::OnShaderRemoval(Shader* shader) {
+void VKPipelineCache::Unregister(const Shader& shader) {
    bool finished = false;
    const auto Finish = [&] {
        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -303,6 +296,8 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
        Finish();
        it = compute_cache.erase(it);
    }
+
+    RasterizerCache::Unregister(shader);
 }

 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -337,11 +332,12 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
        }

        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
-        const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+        const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
+        ASSERT(shader);

        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
-        const ShaderType program_type = GetShaderType(program_enum);
+        const auto program_type = GetShaderType(program_enum);
        const auto& entries = shader->GetEntries();
        program[stage] = {
            Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,6 +17,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -25,7 +26,6 @@
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
-#include "video_core/shader_cache.h"

 namespace Core {
 class System;
@@ -41,6 +41,8 @@ class VKFence;
 class VKScheduler;
 class VKUpdateDescriptorQueue;

+class CachedShader;
+using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 struct GraphicsPipelineCacheKey {
@@ -100,16 +102,21 @@ struct hash<Vulkan::ComputePipelineCacheKey> {

 namespace Vulkan {

-class Shader {
+class CachedShader final : public RasterizerCacheObject {
 public:
-    explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                    VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
-    ~Shader();
+    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
+                          u32 main_offset);
+    ~CachedShader();

    GPUVAddr GetGpuAddr() const {
        return gpu_addr;
    }

+    std::size_t GetSizeInBytes() const override {
+        return program_code.size() * sizeof(u64);
+    }
+
    VideoCommon::Shader::ShaderIR& GetIR() {
        return shader_ir;
    }
@@ -137,23 +144,25 @@ private:
    ShaderEntries entries;
 };

-class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class VKPipelineCache final : public RasterizerCache<Shader> {
 public:
    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
                             const VKDevice& device, VKScheduler& scheduler,
                             VKDescriptorPool& descriptor_pool,
                             VKUpdateDescriptorQueue& update_descriptor_queue,
                             VKRenderPassCache& renderpass_cache);
-    ~VKPipelineCache() override;
+    ~VKPipelineCache();

-    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
+    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();

    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);

    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);

 protected:
-    void OnShaderRemoval(Shader* shader) final;
+    void Unregister(const Shader& shader) override;
+
+    void FlushObjectInner(const Shader& object) override {}

 private:
    std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -166,10 +175,10 @@ private:
    VKUpdateDescriptorQueue& update_descriptor_queue;
    VKRenderPassCache& renderpass_cache;

-    std::unique_ptr<Shader> null_shader;
-    std::unique_ptr<Shader> null_kernel;
+    Shader null_shader{};
+    Shader null_kernel{};

-    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;

    GraphicsPipelineCacheKey last_graphics_key;
    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,7 +38,6 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/shader_cache.h"

 namespace Vulkan {

@@ -99,7 +98,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
 }

 std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
-    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
    for (std::size_t i = 0; i < std::size(addresses); ++i) {
        addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -118,17 +117,6 @@ template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
                                               std::size_t stage, std::size_t index = 0) {
    const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
-    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
-        if (entry.is_separated) {
-            const u32 buffer_1 = entry.buffer;
-            const u32 buffer_2 = entry.secondary_buffer;
-            const u32 offset_1 = entry.offset;
-            const u32 offset_2 = entry.secondary_offset;
-            const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
-            const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
-            return engine.GetTextureInfo(handle_1 | handle_2);
-        }
-    }
    if (entry.is_bindless) {
        const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
        return engine.GetTextureInfo(tex_handle);
@@ -728,7 +716,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
        if (!view) {
            return false;
        }
-        key.views.push_back(view->GetAttachment());
+        key.views.push_back(view->GetHandle());
        key.width = std::min(key.width, view->GetWidth());
        key.height = std::min(key.height, view->GetHeight());
        key.layers = std::min(key.layers, view->GetNumLayers());
@@ -788,12 +776,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
 }

 void RasterizerVulkan::SetupShaderDescriptors(
-    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
    texture_cache.GuardSamplers(true);

    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        // Skip VertexA stage
-        Shader* const shader = shaders[stage + 1];
+        const auto& shader = shaders[stage + 1];
        if (!shader) {
            continue;
        }
@@ -870,10 +858,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
    UNIMPLEMENTED_IF(binding.buffer_offset != 0);

    const GPUVAddr gpu_addr = binding.Address();
-    const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
-    const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+    const std::size_t size = binding.buffer_size;
+    const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);

-    scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
        cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
        cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
    });
@@ -925,8 +913,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
            buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
            continue;
        }
-        const auto info = buffer_cache.UploadMemory(start, size);
-        buffer_bindings.AddVertexBinding(info.handle, info.offset);
+        const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
+        buffer_bindings.AddVertexBinding(buffer, offset);
    }
 }

@@ -948,9 +936,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
            break;
        }
        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
-        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
-        VkBuffer buffer = info.handle;
-        u64 offset = info.offset;
+        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
        std::tie(buffer, offset) = quad_indexed_pass.Assemble(
            regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);

@@ -964,9 +950,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
            break;
        }
        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
-        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
-        VkBuffer buffer = info.handle;
-        u64 offset = info.offset;
+        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());

        auto format = regs.index_array.format;
        const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -1113,9 +1097,10 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
        Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
    ASSERT(size <= MaxConstbufferSize);

-    const auto info =
+    const auto [buffer_handle, offset] =
        buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
-    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
+
+    update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
 }

 void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1129,14 +1114,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
        // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
        // default buffer.
        static constexpr std::size_t dummy_size = 4;
-        const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
-        update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
+        const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
+        update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
        return;
    }

-    const auto info = buffer_cache.UploadMemory(
+    const auto [buffer, offset] = buffer_cache.UploadMemory(
        actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
-    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
+    update_descriptor_queue.AddBuffer(buffer, offset, size);
 }

 void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
@@ -1152,12 +1137,12 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
    auto view = texture_cache.GetTextureSurface(texture.tic, entry);
    ASSERT(!view->IsBufferView());

-    const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
-                                                      texture.tic.z_source, texture.tic.w_source);
+    const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
+                                            texture.tic.z_source, texture.tic.w_source);
    const auto sampler = sampler_cache.GetSampler(texture.tsc);
    update_descriptor_queue.AddSampledImage(sampler, image_view);

-    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
    *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
    sampled_views.push_back(ImageView{std::move(view), image_layout});
 }
@@ -1179,11 +1164,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima

    UNIMPLEMENTED_IF(tic.IsBuffer());

-    const VkImageView image_view =
-        view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+    const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
    update_descriptor_queue.AddImage(image_view);

-    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
    *image_layout = VK_IMAGE_LAYOUT_GENERAL;
    image_views.push_back(ImageView{std::move(view), image_layout});
 }
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -168,7 +168,7 @@ private:
                                 bool is_indexed, bool is_instanced);

    /// Setup descriptors in the graphics pipeline.
-    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
+    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);

    void SetupImageTransitions(Texceptions texceptions,
                               const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,8 +9,6 @@
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/textures/texture.h"

-using Tegra::Texture::TextureMipmapFilter;
-
 namespace Vulkan {

 namespace {
@@ -65,8 +63,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
    ci.maxAnisotropy = tsc.GetMaxAnisotropy();
    ci.compareEnable = tsc.depth_compare_enabled;
    ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
-    ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
-    ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
+    ci.minLod = tsc.GetMinLod();
+    ci.maxLod = tsc.GetMaxLod();
    ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
    ci.unnormalizedCoordinates = VK_FALSE;
    return device.GetLogical().CreateSampler(ci);
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,14 +35,10 @@ public:
    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
    void Unmap(u64 size);

-    VkBuffer Handle() const noexcept {
+    VkBuffer GetHandle() const {
        return *buffer;
    }

-    u64 Address() const noexcept {
-        return 0;
-    }
-
 private:
    struct Watch final {
        VKFenceWatch fence;
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -167,7 +167,6 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
        ci.extent = {params.width, params.height, 1};
        break;
    case SurfaceTarget::Texture3D:
-        ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
        ci.extent = {params.width, params.height, params.depth};
        break;
    case SurfaceTarget::TextureBuffer:
@@ -177,12 +176,6 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
    return ci;
 }

-u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
-                  Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
-    return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
-           (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
-}
-
 } // Anonymous namespace

 CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -210,11 +203,9 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
    }

    // TODO(Rodrigo): Move this to a virtual function.
-    u32 num_layers = 1;
-    if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
-        num_layers = params.depth;
-    }
-    main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
+    main_view = CreateViewInner(
+        ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
+        true);
 }

 CachedSurface::~CachedSurface() = default;
@@ -262,8 +253,12 @@ void CachedSurface::DecorateSurfaceName() {
 }

 View CachedSurface::CreateView(const ViewParams& params) {
+    return CreateViewInner(params, false);
+}
+
+View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
    // TODO(Rodrigo): Add name decorations
-    return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
+    return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
 }

 void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -347,27 +342,18 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
 }

 CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
-                                     const ViewParams& params)
+                                     const ViewParams& params, bool is_proxy)
    : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
      image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
      aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
-      base_level{params.base_level}, num_levels{params.num_levels},
-      image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
-    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
-        base_layer = 0;
-        num_layers = 1;
-        base_slice = params.base_layer;
-        num_slices = params.num_layers;
-    } else {
-        base_layer = params.base_layer;
-        num_layers = params.num_layers;
-    }
-}
+      base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
+      num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
+                                                           : VK_IMAGE_VIEW_TYPE_1D} {}

 CachedSurfaceView::~CachedSurfaceView() = default;

-VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
-                                            SwizzleSource z_source, SwizzleSource w_source) {
+VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
+                                         SwizzleSource z_source, SwizzleSource w_source) {
    const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
    if (last_image_view && last_swizzle == new_swizzle) {
        return last_image_view;
@@ -413,11 +399,6 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
            });
    }

-    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
-        ASSERT(base_slice == 0);
-        ASSERT(num_slices == params.depth);
-    }
-
    VkImageViewCreateInfo ci;
    ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
    ci.pNext = nullptr;
@@ -436,35 +417,6 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc
    return last_image_view = *image_view;
 }

-VkImageView CachedSurfaceView::GetAttachment() {
-    if (render_target) {
-        return *render_target;
-    }
-
-    VkImageViewCreateInfo ci;
-    ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    ci.pNext = nullptr;
-    ci.flags = 0;
-    ci.image = surface.GetImageHandle();
-    ci.format = surface.GetImage().GetFormat();
-    ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
-                     VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
-    ci.subresourceRange.aspectMask = aspect_mask;
-    ci.subresourceRange.baseMipLevel = base_level;
-    ci.subresourceRange.levelCount = num_levels;
-    if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
-        ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
-        ci.subresourceRange.baseArrayLayer = base_slice;
-        ci.subresourceRange.layerCount = num_slices;
-    } else {
-        ci.viewType = image_view_type;
-        ci.subresourceRange.baseArrayLayer = base_layer;
-        ci.subresourceRange.layerCount = num_layers;
-    }
-    render_target = device.GetLogical().CreateImageView(ci);
-    return *render_target;
-}
-
 VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                               const VKDevice& device, VKResourceManager& resource_manager,
                               VKMemoryManager& memory_manager, VKScheduler& scheduler,
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,6 +91,7 @@ protected:
    void DecorateSurfaceName();

    View CreateView(const ViewParams& params) override;
+    View CreateViewInner(const ViewParams& params, bool is_proxy);

 private:
    void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -119,20 +120,23 @@ private:
 class CachedSurfaceView final : public VideoCommon::ViewBase {
 public:
    explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
-                               const ViewParams& params);
+                               const ViewParams& params, bool is_proxy);
    ~CachedSurfaceView();

-    VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
-                             Tegra::Texture::SwizzleSource y_source,
-                             Tegra::Texture::SwizzleSource z_source,
-                             Tegra::Texture::SwizzleSource w_source);
-
-    VkImageView GetAttachment();
+    VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
+                          Tegra::Texture::SwizzleSource y_source,
+                          Tegra::Texture::SwizzleSource z_source,
+                          Tegra::Texture::SwizzleSource w_source);

    bool IsSameSurface(const CachedSurfaceView& rhs) const {
        return &surface == &rhs.surface;
    }

+    VkImageView GetHandle() {
+        return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
+                         Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
+    }
+
    u32 GetWidth() const {
        return params.GetMipWidth(base_level);
    }
@@ -176,6 +180,14 @@ public:
    }

 private:
+    static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
+                             Tegra::Texture::SwizzleSource y_source,
+                             Tegra::Texture::SwizzleSource z_source,
+                             Tegra::Texture::SwizzleSource w_source) {
+        return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+               (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+    }
+
    // Store a copy of these values to avoid double dereference when reading them
    const SurfaceParams params;
    const VkImage image;
@@ -184,18 +196,15 @@ private:

    const VKDevice& device;
    CachedSurface& surface;
+    const u32 base_layer;
+    const u32 num_layers;
    const u32 base_level;
    const u32 num_levels;
    const VkImageViewType image_view_type;
-    u32 base_layer = 0;
-    u32 num_layers = 0;
-    u32 base_slice = 0;
-    u32 num_slices = 0;

    VkImageView last_image_view = nullptr;
    u32 last_swizzle = 0;

-    vk::ImageView render_target;
    std::unordered_map<u32, vk::ImageView> view_cache;
 };

--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,25 +24,35 @@ void VKUpdateDescriptorQueue::TickFrame() {
 }

 void VKUpdateDescriptorQueue::Acquire() {
-    // Minimum number of entries required.
-    // This is the maximum number of entries a single draw call migth use.
-    static constexpr std::size_t MIN_ENTRIES = 0x400;
-
-    if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
-        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
-        scheduler.WaitWorker();
-        payload.clear();
-    }
-    upload_start = &*payload.end();
+    entries.clear();
 }

 void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
                                   VkDescriptorSet set) {
-    const void* const data = upload_start;
-    const vk::Device* const logical = &device.GetLogical();
-    scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
-        logical->UpdateDescriptorSet(set, update_template, data);
-    });
+    if (payload.size() + entries.size() >= payload.max_size()) {
+        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
+        scheduler.WaitWorker();
+        payload.clear();
+    }
+
+    // TODO(Rodrigo): Rework to write the payload directly
+    const auto payload_start = payload.data() + payload.size();
+    for (const auto& entry : entries) {
+        if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
+            payload.push_back(*image);
+        } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
+            payload.push_back(*buffer);
+        } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
+            payload.push_back(*texel);
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    scheduler.Record(
+        [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
+            logical->UpdateDescriptorSet(set, update_template, payload_start);
+        });
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,13 +15,17 @@ namespace Vulkan {
 class VKDevice;
 class VKScheduler;

-struct DescriptorUpdateEntry {
-    DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
+class DescriptorUpdateEntry {
+public:
+    explicit DescriptorUpdateEntry() {}

-    DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
+    DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}

-    DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
+    DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}

+    DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
+
+private:
    union {
        VkDescriptorImageInfo image;
        VkDescriptorBufferInfo buffer;
@@ -41,34 +45,32 @@ public:
    void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);

    void AddSampledImage(VkSampler sampler, VkImageView image_view) {
-        payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+        entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
    }

    void AddImage(VkImageView image_view) {
-        payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+        entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
    }

    void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
-        payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+        entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
    }

    void AddTexelBuffer(VkBufferView texel_buffer) {
-        payload.emplace_back(texel_buffer);
+        entries.emplace_back(texel_buffer);
    }

-    VkImageLayout* LastImageLayout() {
-        return &payload.back().image.imageLayout;
-    }
-
-    const VkImageLayout* LastImageLayout() const {
-        return &payload.back().image.imageLayout;
+    VkImageLayout* GetLastImageLayout() {
+        return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
    }

 private:
+    using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
+
    const VKDevice& device;
    VKScheduler& scheduler;

-    const DescriptorUpdateEntry* upload_start = nullptr;
+    boost::container::static_vector<Variant, 0x400> entries;
    boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
 };

--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -725,7 +725,8 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
    return supported == VK_TRUE;
 }

-VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
+    noexcept {
    VkSurfaceCapabilitiesKHR capabilities;
    Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
    return capabilities;
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -779,7 +779,7 @@ public:

    bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;

-    VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
+    VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;

    std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;

--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -97,7 +97,6 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
        break;
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
        if (component == 0) {
            return descriptor.b_type;
        }
@@ -120,7 +119,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
        }
        break;
    }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+    UNIMPLEMENTED_MSG("texture format not implement={}", format);
    return ComponentType::FLOAT;
 }

@@ -192,14 +191,6 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
            return 6;
        }
        return 0;
-    case TextureFormat::BF10GF11RF11:
-        if (component == 1 || component == 2) {
-            return 11;
-        }
-        if (component == 0) {
-            return 10;
-        }
-        return 0;
    case TextureFormat::G8R24:
        if (component == 0) {
            return 8;
@@ -220,9 +211,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
        return (component == 0 || component == 1) ? 8 : 0;
    case TextureFormat::G4R4:
        return (component == 0 || component == 1) ? 4 : 0;
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return 0;
    }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-    return 0;
 }

 std::size_t GetImageComponentMask(TextureFormat format) {
@@ -243,7 +235,6 @@ std::size_t GetImageComponentMask(TextureFormat format) {
    case TextureFormat::R32_B24G8:
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
        return std::size_t{R | G | B};
    case TextureFormat::R32_G32:
    case TextureFormat::R16_G16:
@@ -257,9 +248,10 @@ std::size_t GetImageComponentMask(TextureFormat format) {
    case TextureFormat::R8:
    case TextureFormat::R1:
        return std::size_t{R};
+    default:
+        UNIMPLEMENTED_MSG("texture format not implement={}", format);
+        return std::size_t{R | G | B | A};
    }
-    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
-    return std::size_t{R | G | B | A};
 }

 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -307,7 +299,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
            return {std::move(original_value), true};
        }
    default:
-        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
+        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
        return {std::move(original_value), true};
    }
 }
@@ -467,7 +459,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
            default:
                break;
            }
-            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
+            UNIMPLEMENTED_MSG("Unimplemented operation={} type={}",
                              static_cast<u64>(instr.suatom_d.operation.Value()),
                              static_cast<u64>(instr.suatom_d.operation_type.Value()));
            return OperationCode::AtomicImageAdd;
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,11 +357,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
    return pc;
 }

-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
-    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
+                                               std::optional<u32> buffer) {
    if (info.IsComplete()) {
        return info;
    }
+    const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
+                                : registry.ObtainBoundSampler(offset);
    if (!sampler) {
        LOG_WARNING(HW_GPU, "Unknown sampler info");
        info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -379,8 +381,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(

 std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
                                            SamplerInfo sampler_info) {
-    const u32 offset = static_cast<u32>(sampler.index.Value());
-    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
+    const auto offset = static_cast<u32>(sampler.index.Value());
+    const auto info = GetSamplerInfo(sampler_info, offset);

    // If this sampler has already been used, return the existing mapping.
    const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -402,19 +404,20 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
    const Node sampler_register = GetRegister(reg);
    const auto [base_node, tracked_sampler_info] =
        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    if (!base_node) {
-        UNREACHABLE();
+    ASSERT(base_node != nullptr);
+    if (base_node == nullptr) {
        return std::nullopt;
    }

-    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
-        const u32 buffer = sampler_info->index;
-        const u32 offset = sampler_info->offset;
-        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
+    if (const auto bindless_sampler_info =
+            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = bindless_sampler_info->GetIndex();
+        const u32 offset = bindless_sampler_info->GetOffset();
+        info = GetSamplerInfo(info, offset, buffer);

        // If this sampler has already been used, return the existing mapping.
        const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
-                                     [buffer, offset](const Sampler& entry) {
+                                     [buffer = buffer, offset = offset](const Sampler& entry) {
                                         return entry.buffer == buffer && entry.offset == offset;
                                     });
        if (it != used_samplers.end()) {
@@ -428,32 +431,10 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
        return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
                                          *info.is_shadow, *info.is_buffer, false);
    }
-    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
-        const std::pair indices = sampler_info->indices;
-        const std::pair offsets = sampler_info->offsets;
-        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
-
-        // Try to use an already created sampler if it exists
-        const auto it = std::find_if(
-            used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
-                return offsets == std::pair{entry.offset, entry.secondary_offset} &&
-                       indices == std::pair{entry.buffer, entry.secondary_buffer};
-            });
-        if (it != used_samplers.end()) {
-            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
-                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
-            return *it;
-        }
-
-        // Otherwise create a new mapping for this sampler
-        const u32 next_index = static_cast<u32>(used_samplers.size());
-        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
-                                          *info.is_shadow, *info.is_buffer);
-    }
-    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
-        const u32 base_offset = sampler_info->base_offset / 4;
-        index_var = GetCustomVariable(sampler_info->bindless_var);
-        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
+    if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
+        index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
+        info = GetSamplerInfo(info, base_offset);

        // If this sampler has already been used, return the existing mapping.
        const auto it = std::find_if(
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add

 u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
                        const ProgramCode& code_b) {
-    size_t unique_identifier = boost::hash_value(code);
+    u64 unique_identifier = boost::hash_value(code);
    if (is_a) {
        // VertexA programs include two programs
        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
    }
-    return static_cast<u64>(unique_identifier);
+    return unique_identifier;
 }

 } // namespace VideoCommon::Shader
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
unknown	8cf6efe677	Reorder variables to comply with the Auzure build pipeline	2020-06-22 15:56:41 +02:00
unknown	20a779299a	Add game versio to title bar	2020-06-08 23:58:04 +02:00