texture_cache: Reintroduce preserve_contents accurately

This reverts commit 94b0e2e5da. preserve_contents proved to be a meaningful optimization. This commit reintroduces it but properly implemented on OpenGL. We have to make sure the clear removes all the previous contents of the image. It's not currently implemented on Vulkan because we can do smart things there that's preferred to be introduced in a separate commit.
2020-04-26 19:53:02 -03:00
63 changed files with 434 additions and 666 deletions
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,9 +185,10 @@ void ARM_Dynarmic_64::Step() {

 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
                                 std::size_t core_index)
-    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
-      inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
-      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+    : ARM_Interface{system},
+      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system},
+      core_index{core_index}, exclusive_monitor{
+                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

 ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;

--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -62,9 +62,8 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return false;
 }

-ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} {
-    const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
-    CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
+ARM_Unicorn::ARM_Unicorn(System& system) : ARM_Interface{system} {
+    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
    CHECKED(uc_reg_write(uc, UC_ARM64_REG_CPACR_EL1, &fpv));
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -15,12 +15,7 @@ class System;

 class ARM_Unicorn final : public ARM_Interface {
 public:
-    enum class Arch {
-        AArch32, // 32-bit ARM
-        AArch64, // 64-bit ARM
-    };
-
-    explicit ARM_Unicorn(System& system, Arch architecture);
+    explicit ARM_Unicorn(System& system);
    ~ARM_Unicorn() override;

    void SetPC(u64 pc) override;
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -30,7 +30,6 @@ HandleTable::~HandleTable() = default;

 ResultCode HandleTable::SetSize(s32 handle_table_size) {
    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
-        LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
        return ERR_OUT_OF_MEMORY;
    }

@@ -81,7 +80,6 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {

 ResultCode HandleTable::Close(Handle handle) {
    if (!IsValid(handle)) {
-        LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
        return ERR_INVALID_HANDLE;
    }

--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -7,7 +7,6 @@
 #include <vector>

 #include "common/assert.h"
-#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
@@ -68,7 +67,6 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
                             Handle requesting_thread_handle) {
    // The mutex address must be 4-byte aligned
    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
        return ERR_INVALID_ADDRESS;
    }

@@ -90,8 +88,6 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
    }

    if (holding_thread == nullptr) {
-        LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}",
-                  holding_thread_handle);
        return ERR_INVALID_HANDLE;
    }

@@ -113,7 +109,6 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
 ResultCode Mutex::Release(VAddr address) {
    // The mutex address must be 4-byte aligned
    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
        return ERR_INVALID_ADDRESS;
    }

--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -27,9 +27,7 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
        std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);

 #else
-    using Core::ARM_Unicorn;
-    arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32);
-    arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64);
+    arm_interface = std::make_shared<Core::ARM_Unicorn>(system);
    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif

--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -3,7 +3,6 @@
 // Refer to the license.txt file included.

 #include "common/bit_util.h"
-#include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/memory/page_table.h"
@@ -120,30 +119,22 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
            // The MapPhysical type uses two descriptor flags for its parameters.
            // If there's only one, then there's a problem.
            if (i >= num_capabilities) {
-                LOG_ERROR(Kernel, "Invalid combination! i={}", i);
                return ERR_INVALID_COMBINATION;
            }

            const auto size_flags = capabilities[i];
            if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
-                LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
                return ERR_INVALID_COMBINATION;
            }

            const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
            if (result.IsError()) {
-                LOG_ERROR(Kernel, "Failed to map physical flags! descriptor={}, size_flags={}",
-                          descriptor, size_flags);
                return result;
            }
        } else {
            const auto result =
                ParseSingleFlagCapability(set_flags, set_svc_bits, descriptor, page_table);
            if (result.IsError()) {
-                LOG_ERROR(
-                    Kernel,
-                    "Failed to parse capability flag! set_flags={}, set_svc_bits={}, descriptor={}",
-                    set_flags, set_svc_bits, descriptor);
                return result;
            }
        }
@@ -171,9 +162,6 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
    const u32 flag_length = GetFlagBitOffset(type);
    const u32 set_flag = 1U << flag_length;
    if ((set_flag & set_flags & InitializeOnceMask) != 0) {
-        LOG_ERROR(Kernel,
-                  "Attempted to initialize flags that may only be initialized once. set_flags={}",
-                  set_flags);
        return ERR_INVALID_COMBINATION;
    }
    set_flags |= set_flag;
@@ -199,7 +187,6 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
        break;
    }

-    LOG_ERROR(Kernel, "Invalid capability type! type={}", static_cast<u32>(type));
    return ERR_INVALID_CAPABILITY_DESCRIPTOR;
 }

@@ -221,31 +208,23 @@ void ProcessCapabilities::Clear() {

 ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
    if (priority_mask != 0 || core_mask != 0) {
-        LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
-                  priority_mask, core_mask);
        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
    }

    const u32 core_num_min = (flags >> 16) & 0xFF;
    const u32 core_num_max = (flags >> 24) & 0xFF;
    if (core_num_min > core_num_max) {
-        LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
-                  core_num_min, core_num_max);
        return ERR_INVALID_COMBINATION;
    }

    const u32 priority_min = (flags >> 10) & 0x3F;
    const u32 priority_max = (flags >> 4) & 0x3F;
    if (priority_min > priority_max) {
-        LOG_ERROR(Kernel,
-                  "Priority min is greater than priority max! priority_min={}, priority_max={}",
-                  core_num_min, priority_max);
        return ERR_INVALID_COMBINATION;
    }

    // The switch only has 4 usable cores.
    if (core_num_max >= 4) {
-        LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
        return ERR_INVALID_PROCESSOR_ID;
    }

@@ -280,7 +259,6 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)
        }

        if (svc_number >= svc_capabilities.size()) {
-            LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
            return ERR_OUT_OF_RANGE;
        }

@@ -317,8 +295,6 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
        // emulate that, it's sufficient to mark every interrupt as defined.

        if (interrupt >= interrupt_capabilities.size()) {
-            LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
-                      interrupt);
            return ERR_OUT_OF_RANGE;
        }

@@ -331,7 +307,6 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
 ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
    const u32 reserved = flags >> 17;
    if (reserved != 0) {
-        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
        return ERR_RESERVED_VALUE;
    }

@@ -349,9 +324,6 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
    const u32 major_version = kernel_version >> 19;

    if (major_version != 0 || flags < 0x80000) {
-        LOG_ERROR(Kernel,
-                  "Kernel version is non zero or flags are too small! major_version={}, flags={}",
-                  major_version, flags);
        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
    }

@@ -362,7 +334,6 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
 ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
    const u32 reserved = flags >> 26;
    if (reserved != 0) {
-        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
        return ERR_RESERVED_VALUE;
    }

@@ -373,7 +344,6 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
 ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
    const u32 reserved = flags >> 19;
    if (reserved != 0) {
-        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
        return ERR_RESERVED_VALUE;
    }

--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -4,7 +4,6 @@

 #include <algorithm>
 #include "common/assert.h"
-#include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/readable_event.h"
@@ -36,8 +35,6 @@ void ReadableEvent::Clear() {

 ResultCode ReadableEvent::Reset() {
    if (!is_signaled) {
-        LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
-                  GetObjectId(), GetTypeName(), GetName());
        return ERR_INVALID_STATE;
    }

--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -69,8 +69,6 @@ ResultCode ResourceLimit::SetLimitValue(ResourceType resource, s64 value) {
        limit[index] = value;
        return RESULT_SUCCESS;
    } else {
-        LOG_ERROR(Kernel, "Limit value is too large! resource={}, value={}, index={}",
-                  static_cast<u32>(resource), value, index);
        return ERR_INVALID_STATE;
    }
 }
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -685,8 +685,6 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
    case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
    case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
        if (info_sub_id != 0) {
-            LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
-                      info_sub_id);
            return ERR_INVALID_ENUM_VALUE;
        }

@@ -694,8 +692,6 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
            system.Kernel().CurrentProcess()->GetHandleTable();
        const auto process = current_process_handle_table.Get<Process>(static_cast<Handle>(handle));
        if (!process) {
-            LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
-                      info_id, info_sub_id, handle);
            return ERR_INVALID_HANDLE;
        }

@@ -777,7 +773,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
            break;
        }

-        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
+        LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
        return ERR_INVALID_ENUM_VALUE;
    }

@@ -787,13 +783,10 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha

    case GetInfoType::RegisterResourceLimit: {
        if (handle != 0) {
-            LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
            return ERR_INVALID_HANDLE;
        }

        if (info_sub_id != 0) {
-            LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
-                      info_sub_id);
            return ERR_INVALID_COMBINATION;
        }

@@ -873,7 +866,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
    }

    default:
-        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
+        LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
        return ERR_INVALID_ENUM_VALUE;
    }
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -423,8 +423,6 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
    if (new_core == THREADPROCESSORID_DONT_UPDATE) {
        new_core = use_override ? ideal_core_override : ideal_core;
        if ((new_affinity_mask & (1ULL << new_core)) == 0) {
-            LOG_ERROR(Kernel, "New affinity mask is incorrect! new_core={}, new_affinity_mask={}",
-                      new_core, new_affinity_mask);
            return ERR_INVALID_COMBINATION;
        }
    }
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,9 +43,9 @@

 namespace Service::AM {

-constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 2};
-constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 3};
-constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 503};
+constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 0x2};
+constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 0x3};
+constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 0x1F7};

 enum class LaunchParameterKind : u32 {
    ApplicationSpecific = 1,
--- a/src/core/hle/service/glue/errors.h
+++ b/src/core/hle/service/glue/errors.h
@@ -8,9 +8,9 @@

 namespace Service::Glue {

-constexpr ResultCode ERR_INVALID_RESOURCE{ErrorModule::ARP, 30};
-constexpr ResultCode ERR_INVALID_PROCESS_ID{ErrorModule::ARP, 31};
-constexpr ResultCode ERR_INVALID_ACCESS{ErrorModule::ARP, 42};
-constexpr ResultCode ERR_NOT_REGISTERED{ErrorModule::ARP, 102};
+constexpr ResultCode ERR_INVALID_RESOURCE{ErrorModule::ARP, 0x1E};
+constexpr ResultCode ERR_INVALID_PROCESS_ID{ErrorModule::ARP, 0x1F};
+constexpr ResultCode ERR_INVALID_ACCESS{ErrorModule::ARP, 0x2A};
+constexpr ResultCode ERR_NOT_REGISTERED{ErrorModule::ARP, 0x66};

 } // namespace Service::Glue
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -233,7 +233,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {302, nullptr, "StopConsoleSixAxisSensor"},
        {303, nullptr, "ActivateSevenSixAxisSensor"},
        {304, nullptr, "StartSevenSixAxisSensor"},
-        {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
+        {305, nullptr, "StopSevenSixAxisSensor"},
        {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
        {307, nullptr, "FinalizeSevenSixAxisSensor"},
        {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
@@ -853,17 +853,6 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

-void Hid::StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
-    IPC::RequestParser rp{ctx};
-    const auto applet_resource_user_id{rp.Pop<u64>()};
-
-    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
-                applet_resource_user_id);
-
-    IPC::ResponseBuilder rb{ctx, 2};
-    rb.Push(RESULT_SUCCESS);
-}
-
 void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_HID, "(STUBBED) called");

--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -128,7 +128,6 @@ private:
    void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
    void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
-    void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
    void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);

    std::shared_ptr<IAppletResource> applet_resource;
--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -371,15 +371,10 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage(
    // Convert to application language, get priority list
    const auto application_language = ConvertToApplicationLanguage(language_code);
    if (application_language == std::nullopt) {
-        LOG_ERROR(Service_NS, "Could not convert application language! language_code={}",
-                  language_code);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }
    const auto priority_list = GetApplicationLanguagePriorityList(*application_language);
    if (!priority_list) {
-        LOG_ERROR(Service_NS,
-                  "Could not find application language priorities! application_language={}",
-                  *application_language);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }

@@ -391,8 +386,6 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage(
        }
    }

-    LOG_ERROR(Service_NS, "Could not find a valid language! supported_languages={:08X}",
-              supported_languages);
    return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
 }

@@ -417,7 +410,6 @@ ResultVal<u64> IApplicationManagerInterface::ConvertApplicationLanguageToLanguag
    const auto language_code =
        ConvertToLanguageCode(static_cast<ApplicationLanguage>(application_language));
    if (language_code == std::nullopt) {
-        LOG_ERROR(Service_NS, "Language not found! application_language={}", application_language);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }

--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -67,7 +67,6 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
    const auto index = rp.Pop<u32>();

    if (index >= available_language_codes.size()) {
-        LOG_ERROR(Service_SET, "Invalid language code index! index={}", index);
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_INVALID_LANGUAGE);
        return;
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -28,11 +28,9 @@ void ServiceManager::InvokeControlRequest(Kernel::HLERequestContext& context) {

 static ResultCode ValidateServiceName(const std::string& name) {
    if (name.size() <= 0 || name.size() > 8) {
-        LOG_ERROR(Service_SM, "Invalid service name! service={}", name);
        return ERR_INVALID_NAME;
    }
    if (name.find('\0') != std::string::npos) {
-        LOG_ERROR(Service_SM, "A non null terminated service was passed");
        return ERR_INVALID_NAME;
    }
    return RESULT_SUCCESS;
@@ -53,10 +51,8 @@ ResultVal<std::shared_ptr<Kernel::ServerPort>> ServiceManager::RegisterService(

    CASCADE_CODE(ValidateServiceName(name));

-    if (registered_services.find(name) != registered_services.end()) {
-        LOG_ERROR(Service_SM, "Service is already registered! service={}", name);
+    if (registered_services.find(name) != registered_services.end())
        return ERR_ALREADY_REGISTERED;
-    }

    auto& kernel = Core::System::GetInstance().Kernel();
    auto [server_port, client_port] =
@@ -70,10 +66,9 @@ ResultCode ServiceManager::UnregisterService(const std::string& name) {
    CASCADE_CODE(ValidateServiceName(name));

    const auto iter = registered_services.find(name);
-    if (iter == registered_services.end()) {
-        LOG_ERROR(Service_SM, "Server is not registered! service={}", name);
+    if (iter == registered_services.end())
        return ERR_SERVICE_NOT_REGISTERED;
-    }
+
    registered_services.erase(iter);
    return RESULT_SUCCESS;
 }
@@ -84,7 +79,6 @@ ResultVal<std::shared_ptr<Kernel::ClientPort>> ServiceManager::GetServicePort(
    CASCADE_CODE(ValidateServiceName(name));
    auto it = registered_services.find(name);
    if (it == registered_services.end()) {
-        LOG_ERROR(Service_SM, "Server is not registered! service={}", name);
        return ERR_SERVICE_NOT_REGISTERED;
    }

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -267,7 +267,7 @@ protected:

 private:
    struct Data {
-        u32_le unk_0{};
+        u32_le unk_0;
    };

    Data data{};
@@ -614,14 +614,6 @@ private:
            ctx.WriteBuffer(response.Serialize());
            break;
        }
-        case TransactionId::SetBufferCount: {
-            LOG_WARNING(Service_VI, "(STUBBED) called, transaction=SetBufferCount");
-            [[maybe_unused]] const auto buffer = ctx.ReadBuffer();
-
-            IGBPEmptyResponseParcel response{};
-            ctx.WriteBuffer(response.Serialize());
-            break;
-        }
        default:
            ASSERT_MSG(false, "Unimplemented");
        }
@@ -867,7 +859,6 @@ private:

        const auto layer_id = nv_flinger->CreateLayer(display);
        if (!layer_id) {
-            LOG_ERROR(Service_VI, "Layer not found! display=0x{:016X}", display);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -984,7 +975,6 @@ private:

        const auto display_id = nv_flinger->OpenDisplay(name);
        if (!display_id) {
-            LOG_ERROR(Service_VI, "Display not found! display_name={}", name);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1084,7 +1074,6 @@ private:

        const auto display_id = nv_flinger->OpenDisplay(display_name);
        if (!display_id) {
-            LOG_ERROR(Service_VI, "Layer not found! layer_id={}", layer_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1092,7 +1081,6 @@ private:

        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
        if (!buffer_queue_id) {
-            LOG_ERROR(Service_VI, "Buffer queue id not found! display_id={}", *display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1128,7 +1116,6 @@ private:

        const auto layer_id = nv_flinger->CreateLayer(display_id);
        if (!layer_id) {
-            LOG_ERROR(Service_VI, "Layer not found! layer_id={}", *layer_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1136,7 +1123,6 @@ private:

        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
        if (!buffer_queue_id) {
-            LOG_ERROR(Service_VI, "Buffer queue id not found! display_id={}", display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1167,7 +1153,6 @@ private:

        const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
        if (!vsync_event) {
-            LOG_ERROR(Service_VI, "Vsync event was not found for display_id={}", display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1208,7 +1193,6 @@ private:
        case NintendoScaleMode::PreserveAspectRatio:
            return MakeResult(ConvertedScaleMode::PreserveAspectRatio);
        default:
-            LOG_ERROR(Service_VI, "Invalid scaling mode specified, mode={}", mode);
            return ERR_OPERATION_FAILED;
        }
    }
@@ -1265,7 +1249,6 @@ void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
    const auto policy = rp.PopEnum<Policy>();

    if (!IsValidServiceAccess(permission, policy)) {
-        LOG_ERROR(Service_VI, "Permission denied for policy {}", static_cast<u32>(policy));
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_PERMISSION_DENIED);
        return;
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -446,7 +446,6 @@ struct Values {
    bool use_asynchronous_gpu_emulation;
    bool use_vsync;
    bool force_30fps_mode;
-    bool use_fast_gpu_time;

    float bg_red;
    float bg_green;
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -124,8 +124,6 @@ add_library(video_core STATIC
    shader/decode.cpp
    shader/expr.cpp
    shader/expr.h
-    shader/memory_util.cpp
-    shader/memory_util.h
    shader/node_helper.cpp
    shader/node_helper.h
    shader/node.h
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -71,22 +71,16 @@ bool DmaPusher::Step() {
    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
                                        command_list_header.size * sizeof(u32));

-    for (std::size_t index = 0; index < command_headers.size();) {
-        const CommandHeader& command_header = command_headers[index];
+    for (const CommandHeader& command_header : command_headers) {

-        if (dma_state.method_count) {
+        // now, see if we're in the middle of a command
+        if (dma_state.length_pending) {
+            // Second word of long non-inc methods command - method count
+            dma_state.length_pending = 0;
+            dma_state.method_count = command_header.method_count_;
+        } else if (dma_state.method_count) {
            // Data word of methods command
-            if (dma_state.non_incrementing) {
-                const u32 max_write = static_cast<u32>(
-                    std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -
-                    index);
-                CallMultiMethod(&command_header.argument, max_write);
-                dma_state.method_count -= max_write;
-                index += max_write;
-                continue;
-            } else {
-                CallMethod(command_header.argument);
-            }
+            CallMethod(command_header.argument);

            if (!dma_state.non_incrementing) {
                dma_state.method++;
@@ -126,7 +120,6 @@ bool DmaPusher::Step() {
                break;
            }
        }
-        index++;
    }

    if (!non_main) {
@@ -147,9 +140,4 @@ void DmaPusher::CallMethod(u32 argument) const {
    gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
 }

-void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
-    gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
-                        dma_state.method_count);
-}
-
 } // namespace Tegra
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,7 +75,6 @@ private:
    void SetState(const CommandHeader& command_header);

    void CallMethod(u32 argument) const;
-    void CallMultiMethod(const u32* base_start, u32 num_methods) const;

    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,12 +28,6 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

-void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
-    }
-}
-
 static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
    const u32 line_a = src_2 - src_1;
    const u32 line_b = dst_2 - dst_1;
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -39,9 +39,6 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
-
    enum class Origin : u32 {
        Center = 0,
        Corner = 1,
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -51,13 +51,6 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

-void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                    u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
-    }
-}
-
 Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
    const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
    ASSERT(cbuf_mask[regs.tex_cb_index]);
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -202,9 +202,6 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
-
    Texture::FullTextureInfo GetTexture(std::size_t offset) const;

    /// Given a texture handle, returns the TSC and TIC entries.
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,11 +41,4 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

-void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                   u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
-    }
-}
-
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -40,9 +40,6 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
-
    struct Regs {
        static constexpr size_t NUM_REGS = 0x7F;

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -280,58 +280,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

-void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                u32 methods_pending) {
-    // Methods after 0xE00 are special, they're actually triggers for some microcode that was
-    // uploaded to the GPU during initialization.
-    if (method >= MacroRegistersStart) {
-        // We're trying to execute a macro
-        if (executing_macro == 0) {
-            // A macro call must begin by writing the macro method's register, not its argument.
-            ASSERT_MSG((method % 2) == 0,
-                       "Can't start macro execution by writing to the ARGS register");
-            executing_macro = method;
-        }
-
-        for (std::size_t i = 0; i < amount; i++) {
-            macro_params.push_back(base_start[i]);
-        }
-
-        // Call the macro when there are no more parameters in the command buffer
-        if (amount == methods_pending) {
-            CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
-            macro_params.clear();
-        }
-        return;
-    }
-    switch (method) {
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
-    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
-        ProcessCBMultiData(method, base_start, amount);
-        break;
-    }
-    default: {
-        for (std::size_t i = 0; i < amount; i++) {
-            CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
-        }
-    }
-    }
-}
-
 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
    if (mme_draw.current_mode == MMEDrawMode::Undefined) {
        if (mme_draw.gl_begin_consume) {
@@ -622,28 +570,6 @@ void Maxwell3D::StartCBData(u32 method) {
    ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
 }

-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
-    if (cb_data_state.current != method) {
-        if (cb_data_state.current != null_cb_data) {
-            FinishCBData();
-        }
-        constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
-        cb_data_state.start_pos = regs.const_buffer.cb_pos;
-        cb_data_state.id = method - first_cb_data;
-        cb_data_state.current = method;
-        cb_data_state.counter = 0;
-    }
-    const std::size_t id = cb_data_state.id;
-    const std::size_t size = amount;
-    std::size_t i = 0;
-    for (; i < size; i++) {
-        cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
-        cb_data_state.counter++;
-    }
-    // Increment the current buffer position.
-    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
 void Maxwell3D::FinishCBData() {
    // Write the input value to the current const buffer at the current position.
    const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1359,9 +1359,6 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
-
    /// Write the value to the register identified by method.
    void CallMethodFromMME(const GPU::MethodCall& method_call);

@@ -1515,7 +1512,6 @@ private:
    /// Handles a write to the CB_DATA[i] register.
    void StartCBData(u32 method);
    void ProcessCBData(u32 value);
-    void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
    void FinishCBData();

    /// Handles a write to the CB_BIND register.
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -36,13 +36,6 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
 #undef MAXWELLDMA_REG_INDEX
 }

-void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
-                                 u32 methods_pending) {
-    for (std::size_t i = 0; i < amount; i++) {
-        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
-    }
-}
-
 void MaxwellDMA::HandleCopy() {
    LOG_TRACE(HW_GPU, "Requested a DMA copy");

--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -35,9 +35,6 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

-    /// Write multiple values to the register identified by method.
-    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
-
    struct Regs {
        static constexpr std::size_t NUM_REGS = 0x1D6;

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -9,7 +9,6 @@
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
 #include "core/memory.h"
-#include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/kepler_memory.h"
@@ -155,10 +154,7 @@ u64 GPU::GetTicks() const {
    constexpr u64 gpu_ticks_den = 625;

    const u64 cpu_ticks = system.CoreTiming().GetTicks();
-    u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
-    if (Settings::values.use_fast_gpu_time) {
-        nanoseconds /= 256;
-    }
+    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
@@ -213,32 +209,16 @@ void GPU::CallMethod(const MethodCall& method_call) {

    ASSERT(method_call.subchannel < bound_engines.size());

-    if (ExecuteMethodOnEngine(method_call.method)) {
+    if (ExecuteMethodOnEngine(method_call)) {
        CallEngineMethod(method_call);
    } else {
        CallPullerMethod(method_call);
    }
 }

-void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                          u32 methods_pending) {
-    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
-
-    ASSERT(subchannel < bound_engines.size());
-
-    if (ExecuteMethodOnEngine(method)) {
-        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
-    } else {
-        for (std::size_t i = 0; i < amount; i++) {
-            CallPullerMethod(
-                {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
-        }
-    }
-}
-
-bool GPU::ExecuteMethodOnEngine(u32 method) {
-    const auto buffer_method = static_cast<BufferMethods>(method);
-    return buffer_method >= BufferMethods::NonPullerMethods;
+bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
+    const auto method = static_cast<BufferMethods>(method_call.method);
+    return method >= BufferMethods::NonPullerMethods;
 }

 void GPU::CallPullerMethod(const MethodCall& method_call) {
@@ -318,31 +298,6 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
    }
 }

-void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                                u32 methods_pending) {
-    const EngineID engine = bound_engines[subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::MAXWELL_B:
-        maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::KEPLER_COMPUTE_B:
-        kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
-
 void GPU::ProcessBindMethod(const MethodCall& method_call) {
    // Bind the current subchannel to the desired engine id.
    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,10 +155,6 @@ public:
    /// Calls a GPU method.
    void CallMethod(const MethodCall& method_call);

-    /// Calls a GPU multivalue method.
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending);
-
    /// Flush all current written commands into the host GPU for execution.
    void FlushCommands();
    /// Synchronizes CPU writes with Host GPU memory.
@@ -313,12 +309,8 @@ private:
    /// Calls a GPU engine method.
    void CallEngineMethod(const MethodCall& method_call);

-    /// Calls a GPU engine multivalue method.
-    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                               u32 methods_pending);
-
    /// Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(u32 method);
+    bool ExecuteMethodOnEngine(const MethodCall& method_call);

 protected:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,7 +348,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {

    texture_cache.GuardRenderTargets(true);

-    View depth_surface = texture_cache.GetDepthBufferSurface();
+    View depth_surface = texture_cache.GetDepthBufferSurface(true);

    const auto& regs = gpu.regs;
    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -357,7 +357,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
    FramebufferCacheKey key;
    const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
    for (std::size_t index = 0; index < colors_count; ++index) {
-        View color_surface{texture_cache.GetColorBufferSurface(index)};
+        View color_surface{texture_cache.GetColorBufferSurface(index, true)};
        if (!color_surface) {
            continue;
        }
@@ -381,28 +381,52 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
 }

-void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
-                                                 bool using_stencil_fb) {
+void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
    auto& gpu = system.GPU().Maxwell3D();
    const auto& regs = gpu.regs;

    texture_cache.GuardRenderTargets(true);
    View color_surface;
-    if (using_color_fb) {
+
+    if (using_color) {
+        // Determine if we have to preserve the contents.
+        // First we have to make sure all clear masks are enabled.
+        bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
+                                 !regs.clear_buffers.B || !regs.clear_buffers.A;
        const std::size_t index = regs.clear_buffers.RT;
-        color_surface = texture_cache.GetColorBufferSurface(index);
+        if (regs.clear_flags.scissor) {
+            // Then we have to confirm scissor testing clears the whole image.
+            const auto& scissor = regs.scissor_test[0];
+            preserve_contents |= scissor.min_x > 0;
+            preserve_contents |= scissor.min_y > 0;
+            preserve_contents |= scissor.max_x < regs.rt[index].width;
+            preserve_contents |= scissor.max_y < regs.rt[index].height;
+        }
+
+        color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
        texture_cache.MarkColorBufferInUse(index);
    }
+
    View depth_surface;
-    if (using_depth_fb || using_stencil_fb) {
-        depth_surface = texture_cache.GetDepthBufferSurface();
+    if (using_depth_stencil) {
+        bool preserve_contents = false;
+        if (regs.clear_flags.scissor) {
+            // For depth stencil clears we only have to confirm scissor test covers the whole image.
+            const auto& scissor = regs.scissor_test[0];
+            preserve_contents |= scissor.min_x > 0;
+            preserve_contents |= scissor.min_y > 0;
+            preserve_contents |= scissor.max_x < regs.zeta_width;
+            preserve_contents |= scissor.max_y < regs.zeta_height;
+        }
+
+        depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
        texture_cache.MarkDepthBufferInUse();
    }
    texture_cache.GuardRenderTargets(false);

    FramebufferCacheKey key;
-    key.colors[0] = color_surface;
-    key.zeta = depth_surface;
+    key.colors[0] = std::move(color_surface);
+    key.zeta = std::move(depth_surface);

    state_tracker.NotifyFramebuffer();
    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
@@ -422,8 +446,7 @@ void RasterizerOpenGL::Clear() {
    if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
        regs.clear_buffers.A) {
        use_color = true;
-    }
-    if (use_color) {
+
        state_tracker.NotifyColorMask0();
        glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
                     regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
@@ -461,7 +484,7 @@ void RasterizerOpenGL::Clear() {

    UNIMPLEMENTED_IF(regs.clear_flags.viewport);

-    ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
+    ConfigureClearFramebuffer(use_color, use_depth || use_stencil);

    if (use_color) {
        glClearBufferfv(GL_COLOR, 0, regs.clear_color);
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -95,7 +95,8 @@ private:
    /// Configures the color and depth framebuffer states.
    void ConfigureFramebuffers();

-    void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
+    /// Configures the color and depth framebuffer for clearing.
+    void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);

    /// Configures the current constbuffers to use for the draw command.
    void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -10,6 +10,8 @@
 #include <thread>
 #include <unordered_set>

+#include <boost/functional/hash.hpp>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -26,26 +28,76 @@
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/utils.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"

 namespace OpenGL {

 using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;

 namespace {

+constexpr u32 STAGE_MAIN_OFFSET = 10;
+constexpr u32 KERNEL_MAIN_OFFSET = 0;
+
 constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};

+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        offset++;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr) {
+    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(code.begin(), code.end(), 0);
+        return code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
+    code.resize(CalculateProgramSize(code));
+    return code;
+}
+
 /// Gets the shader type from a Maxwell program type
 constexpr GLenum GetGLShaderType(ShaderType shader_type) {
    switch (shader_type) {
@@ -62,6 +114,17 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
    }
 }

+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
+                        const ProgramCode& code_b = {}) {
+    u64 unique_identifier = boost::hash_value(code);
+    if (is_a) {
+        // VertexA programs include two programs
+        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
+    }
+    return unique_identifier;
+}
+
 constexpr const char* GetShaderTypeName(ShaderType shader_type) {
    switch (shader_type) {
    case ShaderType::Vertex:
@@ -393,12 +456,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    const auto host_ptr{memory_manager.GetPointer(address)};

    // No shader found - create a new one
-    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
    ProgramCode code_b;
    if (program == Maxwell::ShaderProgram::VertexA) {
        const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
-        const u8* host_ptr_b = memory_manager.GetPointer(address_b);
-        code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+        code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
    }

    const auto unique_identifier = GetUniqueIdentifier(
@@ -436,7 +498,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {

    const auto host_ptr{memory_manager.GetPointer(code_addr)};
    // No kernel found, create a new one
-    auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
    const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};

    const ShaderParameters params{system,    disk_cache, device,
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -140,12 +140,6 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
    enable.Assign(1);
 }

-void FixedPipelineState::Fill(const Maxwell& regs) {
-    rasterizer.Fill(regs);
-    depth_stencil.Fill(regs);
-    color_blending.Fill(regs);
-}
-
 std::size_t FixedPipelineState::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
@@ -155,6 +149,15 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
    return std::memcmp(this, &rhs, sizeof *this) == 0;
 }

+FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
+    FixedPipelineState fixed_state;
+    fixed_state.rasterizer.Fill(regs);
+    fixed_state.depth_stencil.Fill(regs);
+    fixed_state.color_blending.Fill(regs);
+    fixed_state.padding = {};
+    return fixed_state;
+}
+
 u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
    // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
    // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -17,7 +17,7 @@ namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-struct FixedPipelineState {
+struct alignas(32) FixedPipelineState {
    static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
    static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;

@@ -237,8 +237,7 @@ struct FixedPipelineState {
    Rasterizer rasterizer;
    DepthStencil depth_stencil;
    ColorBlending color_blending;
-
-    void Fill(const Maxwell& regs);
+    std::array<u8, 20> padding;

    std::size_t Hash() const noexcept;

@@ -251,6 +250,9 @@ struct FixedPipelineState {
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
 static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
 static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
+static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
+
+FixedPipelineState GetFixedPipelineState(const Maxwell& regs);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -82,6 +82,11 @@ public:
        return present_family;
    }

+    /// Returns true if the device is integrated with the host CPU.
+    bool IsIntegrated() const {
+        return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+    }
+
    /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
    u32 GetApiVersion() const {
        return properties.apiVersion;
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -288,7 +288,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
    depth_stencil_ci.maxDepthBounds = 0.0f;

    std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
-    const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
+    const std::size_t num_attachments = renderpass_params.color_attachments.size();
    for (std::size_t index = 0; index < num_attachments; ++index) {
        static constexpr std::array COMPONENT_TABLE = {
            VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -118,7 +118,8 @@ private:
 };

 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {}
+    : device{device}, properties{device.GetPhysical().GetMemoryProperties()},
+      is_memory_unified{GetMemoryUnified(properties)} {}

 VKMemoryManager::~VKMemoryManager() = default;

@@ -208,6 +209,16 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi
    return {};
 }

+bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) {
+            // Memory is considered unified when heaps are device local only.
+            return false;
+        }
+    }
+    return true;
+}
+
 VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
                                       const vk::DeviceMemory& memory, u64 begin, u64 end)
    : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {}
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -40,6 +40,11 @@ public:
    /// Commits memory required by the image and binds it.
    VKMemoryCommit Commit(const vk::Image& image, bool host_visible);

+    /// Returns true if the memory allocations are done always in host visible and coherent memory.
+    bool IsMemoryUnified() const {
+        return is_memory_unified;
+    }
+
 private:
    /// Allocates a chunk of memory.
    bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
@@ -48,8 +53,12 @@ private:
    VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
                                  VkMemoryPropertyFlags wanted_properties);

-    const VKDevice& device;                                       ///< Device handler.
-    const VkPhysicalDeviceMemoryProperties properties;            ///< Physical device properties.
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties);
+
+    const VKDevice& device;                            ///< Device handler.
+    const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                      ///< True if memory model is unified.
    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };

--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,18 +27,12 @@
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"

 namespace Vulkan {

 MICROPROFILE_DECLARE(Vulkan_PipelineCache);

 using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;

 namespace {

@@ -51,6 +45,60 @@ constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
 constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
    VideoCommon::Shader::CompileDepth::FullDecompile};

+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    const std::size_t start_offset = is_compute ? 0 : 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code, is_compute));
+    return program_code;
+}
+
 constexpr std::size_t GetStageFromProgram(std::size_t program) {
    return program == 0 ? 0 : program - 1;
 }
@@ -113,24 +161,6 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,

 } // Anonymous namespace

-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
-}
-
-bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
-}
-
-std::size_t ComputePipelineCacheKey::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
-}
-
-bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
-}
-
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
                           u32 main_offset)
@@ -182,9 +212,9 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
            const auto host_ptr{memory_manager.GetPointer(program_addr)};

            // No shader found - create a new one
-            constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+            constexpr u32 stage_offset = 10;
            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
-            ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);

            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
                                                    std::move(code), stage_offset);
@@ -240,10 +270,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
        // No shader found - create a new one
        const auto host_ptr = memory_manager.GetPointer(program_addr);

-        ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        constexpr u32 kernel_main_offset = 0;
        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
                                                program_addr, *cpu_addr, std::move(code),
-                                                KERNEL_MAIN_OFFSET);
+                                                kernel_main_offset);
        if (cpu_addr) {
            Register(shader);
        } else {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <cstddef>
 #include <memory>
+#include <tuple>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
@@ -24,7 +25,6 @@
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/surface.h"
@@ -47,40 +47,46 @@ class CachedShader;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

+using ProgramCode = std::vector<u64>;
+
 struct GraphicsPipelineCacheKey {
    FixedPipelineState fixed_state;
-    RenderPassParams renderpass_params;
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
-    u64 padding; // This is necessary for unique object representations
+    RenderPassParams renderpass_params;

-    std::size_t Hash() const noexcept;
+    std::size_t Hash() const noexcept {
+        std::size_t hash = fixed_state.Hash();
+        for (const auto& shader : shaders) {
+            boost::hash_combine(hash, shader);
+        }
+        boost::hash_combine(hash, renderpass_params.Hash());
+        return hash;
+    }

-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return std::tie(fixed_state, shaders, renderpass_params) ==
+               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
    }
 };
-static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);

 struct ComputePipelineCacheKey {
-    GPUVAddr shader;
-    u32 shared_memory_size;
-    std::array<u32, 3> workgroup_size;
+    GPUVAddr shader{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};

-    std::size_t Hash() const noexcept;
+    std::size_t Hash() const noexcept {
+        return static_cast<std::size_t>(shader) ^
+               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
+               static_cast<std::size_t>(workgroup_size[0]) ^
+               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
+               (static_cast<std::size_t>(workgroup_size[2]) << 24);
+    }

-    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
-
-    bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
-        return !operator==(rhs);
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+        return std::tie(shader, shared_memory_size, workgroup_size) ==
+               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
    }
 };
-static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
-static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
-static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);

 } // namespace Vulkan

@@ -107,8 +113,7 @@ namespace Vulkan {
 class CachedShader final : public RasterizerCacheObject {
 public:
    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
-                          u32 main_offset);
+                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
    ~CachedShader();

    GPUVAddr GetGpuAddr() const {
@@ -140,7 +145,7 @@ private:
                                                                 Tegra::Engines::ShaderType stage);

    GPUVAddr gpu_addr{};
-    VideoCommon::Shader::ProgramCode program_code;
+    ProgramCode program_code;
    VideoCommon::Shader::Registry registry;
    VideoCommon::Shader::ShaderIR shader_ir;
    ShaderEntries entries;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -316,8 +316,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    query_cache.UpdateCounters();

    const auto& gpu = system.GPU().Maxwell3D();
-    GraphicsPipelineCacheKey key;
-    key.fixed_state.Fill(gpu.regs);
+    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};

    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));

@@ -335,11 +334,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {

    buffer_cache.Unmap();

-    const Texceptions texceptions = UpdateAttachments();
+    const auto texceptions = UpdateAttachments();
    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);

    key.renderpass_params = GetRenderPassParams(texceptions);
-    key.padding = 0;

    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
@@ -455,12 +453,10 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    query_cache.UpdateCounters();

    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    ComputePipelineCacheKey key;
-    key.shader = code_addr;
-    key.shared_memory_size = launch_desc.shared_alloc;
-    key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
-                          launch_desc.block_dim_z};
-
+    const ComputePipelineCacheKey key{
+        code_addr,
+        launch_desc.shared_alloc,
+        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
    auto& pipeline = pipeline_cache.GetComputePipeline(key);

    // Compute dispatches can't be executed inside a renderpass
@@ -656,7 +652,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
    Texceptions texceptions;
    for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
        if (update_rendertargets) {
-            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
+            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
        }
        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
            texceptions[rt] = true;
@@ -664,7 +660,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
    }

    if (update_rendertargets) {
-        zeta_attachment = texture_cache.GetDepthBufferSurface();
+        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
    }
    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
        texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -692,7 +688,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
    FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
                            std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};

-    const auto try_push = [&key](const View& view) {
+    const auto try_push = [&](const View& view) {
        if (!view) {
            return false;
        }
@@ -703,9 +699,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
        return true;
    };

-    const auto& regs = system.GPU().Maxwell3D().regs;
-    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
-    for (std::size_t index = 0; index < num_attachments; ++index) {
+    for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
        if (try_push(color_attachments[index])) {
            texture_cache.MarkColorBufferInUse(index);
        }
@@ -1256,29 +1250,28 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
 }

 RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
+    using namespace VideoCore::Surface;
+
    const auto& regs = system.GPU().Maxwell3D().regs;
-    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
+    RenderPassParams renderpass_params;

-    RenderPassParams params;
-    params.color_formats = {};
-    std::size_t color_texceptions = 0;
-
-    std::size_t index = 0;
-    for (std::size_t rt = 0; rt < num_attachments; ++rt) {
+    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
        const auto& rendertarget = regs.rt[rt];
        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
            continue;
        }
-        params.color_formats[index] = static_cast<u8>(rendertarget.format);
-        color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
-        ++index;
+        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
+            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
+            texceptions[rt]});
    }
-    params.num_color_attachments = static_cast<u8>(index);
-    params.texceptions = static_cast<u8>(color_texceptions);

-    params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
-    params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
-    return params;
+    renderpass_params.has_zeta = regs.zeta_enable;
+    if (renderpass_params.has_zeta) {
+        renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+        renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
+    }
+
+    return renderpass_params;
 }

 VkBuffer RasterizerVulkan::DefaultBuffer() {
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -2,11 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <cstring>
 #include <memory>
 #include <vector>

-#include "common/cityhash.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_device.h"
@@ -15,15 +13,6 @@

 namespace Vulkan {

-std::size_t RenderPassParams::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
-}
-
-bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
-}
-
 VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}

 VKRenderPassCache::~VKRenderPassCache() = default;
@@ -38,22 +27,20 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
 }

 vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
-    using namespace VideoCore::Surface;
    std::vector<VkAttachmentDescription> descriptors;
    std::vector<VkAttachmentReference> color_references;

-    const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
-    for (std::size_t rt = 0; rt < num_attachments; ++rt) {
-        const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
-        const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
-        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
-                   static_cast<int>(pixel_format));
+                   static_cast<u32>(attachment.pixel_format));

-        // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
-        const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
-                                               ? VK_IMAGE_LAYOUT_GENERAL
-                                               : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? VK_IMAGE_LAYOUT_GENERAL
+                                      : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
        VkAttachmentDescription& descriptor = descriptors.emplace_back();
        descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
        descriptor.format = format.format;
@@ -71,17 +58,15 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
    }

    VkAttachmentReference zeta_attachment_ref;
-    const bool has_zeta = params.zeta_format != 0;
-    if (has_zeta) {
-        const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
-        const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
-        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
-                   static_cast<int>(pixel_format));
+                   static_cast<u32>(params.zeta_pixel_format));

-        const VkImageLayout zeta_layout = params.zeta_texception != 0
-                                              ? VK_IMAGE_LAYOUT_GENERAL
-                                              : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+        const auto zeta_layout = params.zeta_texception
+                                     ? VK_IMAGE_LAYOUT_GENERAL
+                                     : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
        VkAttachmentDescription& descriptor = descriptors.emplace_back();
        descriptor.flags = 0;
        descriptor.format = format.format;
@@ -93,7 +78,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
        descriptor.initialLayout = zeta_layout;
        descriptor.finalLayout = zeta_layout;

-        zeta_attachment_ref.attachment = static_cast<u32>(num_attachments);
+        zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size());
        zeta_attachment_ref.layout = zeta_layout;
    }

@@ -105,7 +90,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
    subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
    subpass_description.pColorAttachments = color_references.data();
    subpass_description.pResolveAttachments = nullptr;
-    subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr;
+    subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr;
    subpass_description.preserveAttachmentCount = 0;
    subpass_description.pPreserveAttachments = nullptr;

@@ -116,7 +101,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
        stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
    }

-    if (has_zeta) {
+    if (params.has_zeta) {
        access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
        stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -4,7 +4,8 @@

 #pragma once

-#include <type_traits>
+#include <memory>
+#include <tuple>
 #include <unordered_map>

 #include <boost/container/static_vector.hpp>
@@ -18,25 +19,51 @@ namespace Vulkan {

 class VKDevice;

+// TODO(Rodrigo): Optimize this structure for faster hashing
+
 struct RenderPassParams {
-    std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
-    u8 num_color_attachments;
-    u8 texceptions;
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;

-    u8 zeta_format;
-    u8 zeta_texception;
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }

-    std::size_t Hash() const noexcept;
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };

-    bool operator==(const RenderPassParams& rhs) const noexcept;
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;

-    bool operator!=(const RenderPassParams& rhs) const noexcept {
-        return !operator==(rhs);
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
    }
 };
-static_assert(std::has_unique_object_representations_v<RenderPassParams>);
-static_assert(std::is_trivially_copyable_v<RenderPassParams>);
-static_assert(std::is_trivially_constructible_v<RenderPassParams>);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -39,7 +39,8 @@ VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator

 VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
                                         VKScheduler& scheduler)
-    : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}
+    : device{device}, memory_manager{memory_manager}, scheduler{scheduler},
+      is_device_integrated{device.IsIntegrated()} {}

 VKStagingBufferPool::~VKStagingBufferPool() = default;

@@ -55,7 +56,9 @@ void VKStagingBufferPool::TickFrame() {
    current_delete_level = (current_delete_level + 1) % NumLevels;

    ReleaseCache(true);
-    ReleaseCache(false);
+    if (!is_device_integrated) {
+        ReleaseCache(false);
+    }
 }

 VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
@@ -92,7 +95,7 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
 }

 VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
-    return host_visible ? host_staging_buffers : device_staging_buffers;
+    return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
 }

 void VKStagingBufferPool::ReleaseCache(bool host_visible) {
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -71,6 +71,7 @@ private:
    const VKDevice& device;
    VKMemoryManager& memory_manager;
    VKScheduler& scheduler;
+    const bool is_device_integrated;

    StagingBuffersCache host_staging_buffers;
    StagingBuffersCache device_staging_buffers;
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,7 +13,6 @@
 #include "common/common_types.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"

@@ -116,6 +115,17 @@ Pred GetPredicate(u32 index, bool negated) {
    return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }

+/**
+ * Returns whether the instruction at the specified offset is a 'sched' instruction.
+ * Sched instructions always appear before a sequence of 3 instructions.
+ */
+constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
+    constexpr u32 SchedPeriod = 4;
+    u32 absolute_offset = offset - main_offset;
+
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
 enum class ParseResult : u32 {
    ControlCaught,
    BlockEnd,
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -13,7 +13,6 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/control_flow.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"

@@ -24,6 +23,17 @@ using Tegra::Shader::OpCode;

 namespace {

+/**
+ * Returns whether the instruction at the specified offset is a 'sched' instruction.
+ * Sched instructions always appear before a sequence of 3 instructions.
+ */
+constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
+    constexpr u32 SchedPeriod = 4;
+    u32 absolute_offset = offset - main_offset;
+
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
 void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
                              const std::list<Sampler>& used_samplers) {
    if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -1,77 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <cstddef>
-
-#include <boost/container_hash/hash.hpp>
-
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-GPUVAddr GetShaderAddress(Core::System& system,
-                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
-    static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
-
-    const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        ++offset;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute) {
-    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
-    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
-    code.resize(CalculateProgramSize(code, is_compute));
-    return code;
-}
-
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b) {
-    u64 unique_identifier = boost::hash_value(code);
-    if (is_a) {
-        // VertexA programs include two programs
-        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
-    }
-    return unique_identifier;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -1,47 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-
-namespace Core {
-class System;
-}
-
-namespace Tegra {
-class MemoryManager;
-}
-
-namespace VideoCommon::Shader {
-
-using ProgramCode = std::vector<u64>;
-
-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
-                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
-
-/// Gets if the current instruction offset is a scheduler instruction
-bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute);
-
-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b = {});
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,7 +18,6 @@
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/registry.h"

@@ -26,6 +25,8 @@ namespace VideoCommon::Shader {

 struct ShaderBlock;

+using ProgramCode = std::vector<u64>;
+
 constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;

 class ConstBuffer {
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -143,7 +143,7 @@ public:
        }

        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
        if (guard_samplers) {
            sampled_textures.push_back(surface);
        }
@@ -163,7 +163,7 @@ public:
            return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
        }
        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
-        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
+        const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
        if (guard_samplers) {
            sampled_textures.push_back(surface);
        }
@@ -178,7 +178,7 @@ public:
        return any_rt;
    }

-    TView GetDepthBufferSurface() {
+    TView GetDepthBufferSurface(bool preserve_contents) {
        std::lock_guard lock{mutex};
        auto& maxwell3d = system.GPU().Maxwell3D();
        if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -199,7 +199,7 @@ public:
            return {};
        }
        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
-        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
+        auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
        if (depth_buffer.target)
            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
        depth_buffer.target = surface_view.first;
@@ -209,7 +209,7 @@ public:
        return surface_view.second;
    }

-    TView GetColorBufferSurface(std::size_t index) {
+    TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
        std::lock_guard lock{mutex};
        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
        auto& maxwell3d = system.GPU().Maxwell3D();
@@ -239,8 +239,9 @@ public:
            return {};
        }

-        auto surface_view = GetSurface(gpu_addr, *cpu_addr,
-                                       SurfaceParams::CreateForFramebuffer(system, index), true);
+        auto surface_view =
+            GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+                       preserve_contents, true);
        if (render_targets[index].target) {
            auto& surface = render_targets[index].target;
            surface->MarkAsRenderTarget(false, NO_RT);
@@ -300,9 +301,9 @@ public:
        const std::optional<VAddr> src_cpu_addr =
            system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
        std::pair<TSurface, TView> dst_surface =
-            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
+            GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
        std::pair<TSurface, TView> src_surface =
-            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
+            GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
        ImageBlit(src_surface.second, dst_surface.second, copy_config);
        dst_surface.first->MarkAsModified(true, Tick());
    }
@@ -532,18 +533,22 @@ private:
     * @param overlaps          The overlapping surfaces registered in the cache.
     * @param params            The parameters for the new surface.
     * @param gpu_addr          The starting address of the new surface.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or left
+     *                          blank.
     * @param untopological     Indicates to the recycler that the texture has no way to match the
     *                          overlaps due to topological reasons.
     **/
    std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
                                              const SurfaceParams& params, const GPUVAddr gpu_addr,
+                                              const bool preserve_contents,
                                              const MatchTopologyResult untopological) {
+        const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
        for (auto& surface : overlaps) {
            Unregister(surface);
        }
        switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
        case RecycleStrategy::Ignore: {
-            return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
+            return InitializeSurface(gpu_addr, params, do_load);
        }
        case RecycleStrategy::Flush: {
            std::sort(overlaps.begin(), overlaps.end(),
@@ -553,7 +558,7 @@ private:
            for (auto& surface : overlaps) {
                FlushSurface(surface);
            }
-            return InitializeSurface(gpu_addr, params);
+            return InitializeSurface(gpu_addr, params, preserve_contents);
        }
        case RecycleStrategy::BufferCopy: {
            auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -562,7 +567,7 @@ private:
        }
        default: {
            UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
-            return InitializeSurface(gpu_addr, params);
+            return InitializeSurface(gpu_addr, params, do_load);
        }
        }
    }
@@ -700,11 +705,14 @@ private:
     * @param params    The parameters on the new surface.
     * @param gpu_addr  The starting address of the new surface.
     * @param cpu_addr  The starting address of the new surface on physical memory.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or
+     *                          left blank.
     */
    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
                                                               const SurfaceParams& params,
                                                               const GPUVAddr gpu_addr,
-                                                               const VAddr cpu_addr) {
+                                                               const VAddr cpu_addr,
+                                                               bool preserve_contents) {
        if (params.target == SurfaceTarget::Texture3D) {
            bool failed = false;
            if (params.num_levels > 1) {
@@ -754,7 +762,7 @@ private:
                            return std::nullopt;
                        }
                        Unregister(surface);
-                        return InitializeSurface(gpu_addr, params);
+                        return InitializeSurface(gpu_addr, params, preserve_contents);
                    }
                    return std::nullopt;
                }
@@ -765,7 +773,7 @@ private:
                    return {{surface, surface->GetMainView()}};
                }
            }
-            return InitializeSurface(gpu_addr, params);
+            return InitializeSurface(gpu_addr, params, preserve_contents);
        }
    }

@@ -788,10 +796,13 @@ private:
     *
     * @param gpu_addr          The starting address of the candidate surface.
     * @param params            The parameters on the candidate surface.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or
+     *                          left blank.
     * @param is_render         Whether or not the surface is a render target.
     **/
    std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
-                                          const SurfaceParams& params, bool is_render) {
+                                          const SurfaceParams& params, bool preserve_contents,
+                                          bool is_render) {
        // Step 1
        // Check Level 1 Cache for a fast structural match. If candidate surface
        // matches at certain level we are pretty much done.
@@ -800,7 +811,8 @@ private:
            const auto topological_result = current_surface->MatchesTopology(params);
            if (topological_result != MatchTopologyResult::FullMatch) {
                std::vector<TSurface> overlaps{current_surface};
-                return RecycleSurface(overlaps, params, gpu_addr, topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      topological_result);
            }

            const auto struct_result = current_surface->MatchesStructure(params);
@@ -825,7 +837,7 @@ private:

        // If none are found, we are done. we just load the surface and create it.
        if (overlaps.empty()) {
-            return InitializeSurface(gpu_addr, params);
+            return InitializeSurface(gpu_addr, params, preserve_contents);
        }

        // Step 3
@@ -835,13 +847,15 @@ private:
        for (const auto& surface : overlaps) {
            const auto topological_result = surface->MatchesTopology(params);
            if (topological_result != MatchTopologyResult::FullMatch) {
-                return RecycleSurface(overlaps, params, gpu_addr, topological_result);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      topological_result);
            }
        }

        // Check if it's a 3D texture
        if (params.block_depth > 0) {
-            auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
+            auto surface =
+                Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
            if (surface) {
                return *surface;
            }
@@ -861,7 +875,8 @@ private:
                        return *view;
                    }
                }
-                return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
+                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                                      MatchTopologyResult::FullMatch);
            }
            // Now we check if the candidate is a mipmap/layer of the overlap
            std::optional<TView> view =
@@ -885,7 +900,7 @@ private:
                        pair.first->EmplaceView(params, gpu_addr, candidate_size);
                    if (mirage_view)
                        return {pair.first, *mirage_view};
-                    return RecycleSurface(overlaps, params, gpu_addr,
+                    return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
                                          MatchTopologyResult::FullMatch);
                }
                return {current_surface, *view};
@@ -901,7 +916,8 @@ private:
            }
        }
        // We failed all the tests, recycle the overlaps into a new texture.
-        return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
+        return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
+                              MatchTopologyResult::FullMatch);
    }

    /**
@@ -1059,10 +1075,10 @@ private:
    }

    std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
-                                                 bool do_load = true) {
+                                                 bool preserve_contents) {
        auto new_surface{GetUncachedSurface(gpu_addr, params)};
        Register(new_surface);
-        if (do_load) {
+        if (preserve_contents) {
            LoadSurface(new_surface);
        }
        return {new_surface, new_surface->GetMainView()};
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -643,8 +643,6 @@ void Config::ReadRendererValues() {
    Settings::values.use_asynchronous_gpu_emulation =
        ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
    Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
-    Settings::values.use_fast_gpu_time =
-        ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
    Settings::values.force_30fps_mode =
        ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool();

@@ -1086,7 +1084,6 @@ void Config::SaveRendererValues() {
    WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
                 Settings::values.use_asynchronous_gpu_emulation, false);
    WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
-    WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
    WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);

    // Cast to double because Qt's written float values are not human-readable
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -22,7 +22,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
    ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
    ui->use_vsync->setEnabled(runtime_lock);
    ui->use_vsync->setChecked(Settings::values.use_vsync);
-    ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
    ui->force_30fps_mode->setEnabled(runtime_lock);
    ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
    ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
@@ -33,7 +32,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
    auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
    Settings::values.gpu_accuracy = gpu_accuracy;
    Settings::values.use_vsync = ui->use_vsync->isChecked();
-    Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
    Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
 }
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -69,13 +69,6 @@
          </property>
         </widget>
        </item>
-        <item>
-         <widget class="QCheckBox" name="use_fast_gpu_time">
-          <property name="text">
-           <string>Use Fast GPU Time</string>
-          </property>
-         </widget>
-        </item>
        <item>
         <layout class="QHBoxLayout" name="horizontalLayout_1">
          <item>
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -1304,9 +1304,7 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa
    FileSys::VirtualFile romfs;

    if (*romfs_title_id == program_id) {
-        const u64 ivfc_offset = loader->ReadRomFSIVFCOffset();
-        FileSys::PatchManager pm{program_id};
-        romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program);
+        romfs = file;
    } else {
        romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS();
    }
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -394,8 +394,6 @@ void Config::ReadValues() {
        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
    Settings::values.use_vsync =
        static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
-    Settings::values.use_fast_gpu_time =
-        sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);

    Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
    Settings::values.bg_green =
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -130,8 +130,6 @@ void Config::ReadValues() {
    Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
    Settings::values.use_asynchronous_gpu_emulation =
        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
-    Settings::values.use_fast_gpu_time =
-        sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);

    Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
    Settings::values.bg_green =