shader_ir/memory: Implement u16 and u8 for STG and LDG

Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.
Merge pull request #3279 from ReinUsesLisp/vk-pipeline-cache
2020-01-09 02:12:29 -03:00 · 2020-01-08 17:31:20 -04:00 · 2020-01-07 12:45:34 -05:00 · 2020-01-06 22:02:26 -03:00 · 2020-01-06 22:02:26 -03:00 · 2020-01-06 22:02:26 -03:00
69 changed files with 2939 additions and 1147 deletions
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
    config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
    config.page_table_address_space_bits = address_space_bits;
    config.silently_mirror_page_table = false;
+    config.absolute_offset_page_table = true;

    // Multi-process state
    config.processor_id = core_index;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -46,7 +46,6 @@
 #include "core/settings.h"
 #include "core/telemetry_session.h"
 #include "core/tools/freezer.h"
-#include "video_core/debug_utils/debug_utils.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -341,7 +340,6 @@ struct System::Impl {
    std::unique_ptr<Loader::AppLoader> app_loader;
    std::unique_ptr<VideoCore::RendererBase> renderer;
    std::unique_ptr<Tegra::GPU> gpu_core;
-    std::shared_ptr<Tegra::DebugContext> debug_context;
    std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
    Memory::Memory memory;
    CpuCoreManager cpu_core_manager;
@@ -580,14 +578,6 @@ Loader::AppLoader& System::GetAppLoader() const {
    return *impl->app_loader;
 }

-void System::SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
-    impl->debug_context = std::move(context);
-}
-
-Tegra::DebugContext* System::GetGPUDebugContext() const {
-    return impl->debug_context.get();
-}
-
 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
    impl->virtual_filesystem = std::move(vfs);
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -307,10 +307,6 @@ public:
    Service::SM::ServiceManager& ServiceManager();
    const Service::SM::ServiceManager& ServiceManager() const;

-    void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context);
-
-    Tegra::DebugContext* GetGPUDebugContext() const;
-
    void SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs);

    std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"

 namespace Service::NIFM {

@@ -86,7 +87,12 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(RequestState::Connected);
+
+        if (Settings::values.bcat_backend == "none") {
+            rb.PushEnum(RequestState::NotSubmitted);
+        } else {
+            rb.PushEnum(RequestState::Connected);
+        }
    }

    void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    Core::System& system;
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -104,10 +104,12 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)

        ASSERT(object->status == nvmap::Object::Status::Allocated);

-        u64 size = static_cast<u64>(entry.pages) << 0x10;
+        const u64 size = static_cast<u64>(entry.pages) << 0x10;
        ASSERT(size <= object->size);
+        const u64 map_offset = static_cast<u64>(entry.map_offset) << 0x10;

-        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        const GPUVAddr returned =
+            gpu.MemoryManager().MapBufferEx(object->addr + map_offset, offset, size);
        ASSERT(returned == offset);
    }
    std::memcpy(output.data(), entries.data(), output.size());
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -62,7 +62,7 @@ private:
        u16_le flags;
        u16_le kind;
        u32_le nvmap_handle;
-        INSERT_PADDING_WORDS(1);
+        u32_le map_offset;
        u32_le offset;
        u32_le pages;
    };
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
    return layer_id;
 }

+void NVFlinger::CloseLayer(u64 layer_id) {
+    for (auto& display : displays) {
+        display.CloseLayer(layer_id);
+    }
+}
+
 std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
    const auto* const layer = FindLayer(display_id, layer_id);

@@ -192,7 +198,7 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
            const auto& fence = multi_fence.fences[fence_id];
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -54,6 +54,9 @@ public:
    /// If an invalid display ID is specified, then an empty optional is returned.
    std::optional<u64> CreateLayer(u64 display_id);

+    /// Closes a layer on all displays for the given layer ID.
+    void CloseLayer(u64 layer_id);
+
    /// Finds the buffer queue ID of the specified layer in the specified display.
    ///
    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
 Display::~Display() = default;

 Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
+    return *layers.at(index);
 }

 const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
+    return *layers.at(index);
 }

 std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
    // TODO(Subv): Support more than 1 layer.
    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");

-    layers.emplace_back(id, buffer_queue);
+    layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
+}
+
+void Display::CloseLayer(u64 id) {
+    layers.erase(
+        std::remove_if(layers.begin(), layers.end(),
+                       [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
+        layers.end());
 }

 Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <vector>

@@ -69,6 +70,12 @@ public:
    ///
    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);

+    /// Closes and removes a layer from this display with the given ID.
+    ///
+    /// @param id           The ID assigned to the layer to close.
+    ///
+    void CloseLayer(u64 id);
+
    /// Attempts to find a layer with the given ID.
    ///
    /// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
    u64 id;
    std::string name;

-    std::vector<Layer> layers;
+    std::vector<std::shared_ptr<Layer>> layers;
    Kernel::EventPair vsync_event;
 };

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1066,6 +1066,18 @@ private:
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
    }

+    void CloseLayer(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto layer_id{rp.Pop<u64>()};
+
+        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
+
+        nv_flinger->CloseLayer(layer_id);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
    void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
        const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
        {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
        {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
        {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
-        {2021, nullptr, "CloseLayer"},
+        {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
        {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
        {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
        {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -146,7 +146,7 @@ struct Memory::Impl {
    u8* GetPointer(const VAddr vaddr) {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
-            return page_pointer + (vaddr & PAGE_MASK);
+            return page_pointer + vaddr;
        }

        if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +229,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* const src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_buffer, src_ptr, copy_amount);
                break;
            }
@@ -276,7 +277,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* const dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_ptr, src_buffer, copy_amount);
                break;
            }
@@ -322,7 +324,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memset(dest_ptr, 0, copy_amount);
                break;
            }
@@ -368,7 +371,8 @@ struct Memory::Impl {
            }
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                WriteBlock(process, dest_addr, src_ptr, copy_amount);
                break;
            }
@@ -446,7 +450,8 @@ struct Memory::Impl {
                        page_type = Common::PageType::Unmapped;
                    } else {
                        page_type = Common::PageType::Memory;
-                        current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
+                        current_page_table->pointers[vaddr >> PAGE_BITS] =
+                            pointer - (vaddr & ~PAGE_MASK);
                    }
                    break;
                }
@@ -493,7 +498,9 @@ struct Memory::Impl {
                      memory);
        } else {
            while (base != end) {
-                page_table.pointers[base] = memory;
+                page_table.pointers[base] = memory - (base << PAGE_BITS);
+                ASSERT_MSG(page_table.pointers[base],
+                           "memory mapping base yield a nullptr within the table");

                base += 1;
                memory += PAGE_SIZE;
@@ -518,7 +525,7 @@ struct Memory::Impl {
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
            T value;
-            std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
+            std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
            return value;
        }

@@ -559,7 +566,7 @@ struct Memory::Impl {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
-            std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
+            std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
            return;
        }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,8 +4,6 @@ add_library(video_core STATIC
    buffer_cache/map_interval.h
    dma_pusher.cpp
    dma_pusher.h
-    debug_utils/debug_utils.cpp
-    debug_utils/debug_utils.h
    engines/const_buffer_engine_interface.h
    engines/const_buffer_info.h
    engines/engine_upload.cpp
@@ -157,10 +155,23 @@ if (ENABLE_VULKAN)
        renderer_vulkan/maxwell_to_vk.h
        renderer_vulkan/vk_buffer_cache.cpp
        renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_compute_pipeline.cpp
+        renderer_vulkan/vk_compute_pipeline.h
+        renderer_vulkan/vk_descriptor_pool.cpp
+        renderer_vulkan/vk_descriptor_pool.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_graphics_pipeline.cpp
+        renderer_vulkan/vk_graphics_pipeline.h
+        renderer_vulkan/vk_image.cpp
+        renderer_vulkan/vk_image.h
        renderer_vulkan/vk_memory_manager.cpp
        renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_pipeline_cache.cpp
+        renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_rasterizer.h
+        renderer_vulkan/vk_renderpass_cache.cpp
+        renderer_vulkan/vk_renderpass_cache.h
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_sampler_cache.cpp
@@ -169,10 +180,14 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_shader_decompiler.cpp
        renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_staging_buffer_pool.cpp
+        renderer_vulkan/vk_staging_buffer_pool.h
        renderer_vulkan/vk_stream_buffer.cpp
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
-        renderer_vulkan/vk_swapchain.h)
+        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_update_descriptor.cpp
+        renderer_vulkan/vk_update_descriptor.h)

    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -1,49 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#include <mutex>
-
-#include "video_core/debug_utils/debug_utils.h"
-
-namespace Tegra {
-
-void DebugContext::DoOnEvent(Event event, void* data) {
-    {
-        std::unique_lock lock{breakpoint_mutex};
-
-        // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
-        // show on debug widgets
-
-        // TODO: Should stop the CPU thread here once we multithread emulation.
-
-        active_breakpoint = event;
-        at_breakpoint = true;
-
-        // Tell all observers that we hit a breakpoint
-        for (auto& breakpoint_observer : breakpoint_observers) {
-            breakpoint_observer->OnMaxwellBreakPointHit(event, data);
-        }
-
-        // Wait until another thread tells us to Resume()
-        resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
-    }
-}
-
-void DebugContext::Resume() {
-    {
-        std::lock_guard lock{breakpoint_mutex};
-
-        // Tell all observers that we are about to resume
-        for (auto& breakpoint_observer : breakpoint_observers) {
-            breakpoint_observer->OnMaxwellResume();
-        }
-
-        // Resume the waiting thread (i.e. OnEvent())
-        at_breakpoint = false;
-    }
-
-    resume_from_breakpoint.notify_one();
-}
-
-} // namespace Tegra
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -1,157 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <condition_variable>
-#include <list>
-#include <memory>
-#include <mutex>
-
-namespace Tegra {
-
-class DebugContext {
-public:
-    enum class Event {
-        FirstEvent = 0,
-
-        MaxwellCommandLoaded = FirstEvent,
-        MaxwellCommandProcessed,
-        IncomingPrimitiveBatch,
-        FinishedPrimitiveBatch,
-
-        NumEvents
-    };
-
-    /**
-     * Inherit from this class to be notified of events registered to some debug context.
-     * Most importantly this is used for our debugger GUI.
-     *
-     * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
-     * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
-     * access
-     * @todo Evaluate an alternative interface, in which there is only one managing observer and
-     * multiple child observers running (by design) on the same thread.
-     */
-    class BreakPointObserver {
-    public:
-        /// Constructs the object such that it observes events of the given DebugContext.
-        explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
-            : context_weak(debug_context) {
-            std::unique_lock lock{debug_context->breakpoint_mutex};
-            debug_context->breakpoint_observers.push_back(this);
-        }
-
-        virtual ~BreakPointObserver() {
-            auto context = context_weak.lock();
-            if (context) {
-                {
-                    std::unique_lock lock{context->breakpoint_mutex};
-                    context->breakpoint_observers.remove(this);
-                }
-
-                // If we are the last observer to be destroyed, tell the debugger context that
-                // it is free to continue. In particular, this is required for a proper yuzu
-                // shutdown, when the emulation thread is waiting at a breakpoint.
-                if (context->breakpoint_observers.empty())
-                    context->Resume();
-            }
-        }
-
-        /**
-         * Action to perform when a breakpoint was reached.
-         * @param event Type of event which triggered the breakpoint
-         * @param data Optional data pointer (if unused, this is a nullptr)
-         * @note This function will perform nothing unless it is overridden in the child class.
-         */
-        virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
-
-        /**
-         * Action to perform when emulation is resumed from a breakpoint.
-         * @note This function will perform nothing unless it is overridden in the child class.
-         */
-        virtual void OnMaxwellResume() {}
-
-    protected:
-        /**
-         * Weak context pointer. This need not be valid, so when requesting a shared_ptr via
-         * context_weak.lock(), always compare the result against nullptr.
-         */
-        std::weak_ptr<DebugContext> context_weak;
-    };
-
-    /**
-     * Simple structure defining a breakpoint state
-     */
-    struct BreakPoint {
-        bool enabled = false;
-    };
-
-    /**
-     * Static constructor used to create a shared_ptr of a DebugContext.
-     */
-    static std::shared_ptr<DebugContext> Construct() {
-        return std::shared_ptr<DebugContext>(new DebugContext);
-    }
-
-    /**
-     * Used by the emulation core when a given event has happened. If a breakpoint has been set
-     * for this event, OnEvent calls the event handlers of the registered breakpoint observers.
-     * The current thread then is halted until Resume() is called from another thread (or until
-     * emulation is stopped).
-     * @param event Event which has happened
-     * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
-     * Resume() is called.
-     */
-    void OnEvent(Event event, void* data) {
-        // This check is left in the header to allow the compiler to inline it.
-        if (!breakpoints[(int)event].enabled)
-            return;
-        // For the rest of event handling, call a separate function.
-        DoOnEvent(event, data);
-    }
-
-    void DoOnEvent(Event event, void* data);
-
-    /**
-     * Resume from the current breakpoint.
-     * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
-     * Calling from any other thread is safe.
-     */
-    void Resume();
-
-    /**
-     * Delete all set breakpoints and resume emulation.
-     */
-    void ClearBreakpoints() {
-        for (auto& bp : breakpoints) {
-            bp.enabled = false;
-        }
-        Resume();
-    }
-
-    // TODO: Evaluate if access to these members should be hidden behind a public interface.
-    std::array<BreakPoint, static_cast<int>(Event::NumEvents)> breakpoints;
-    Event active_breakpoint{};
-    bool at_breakpoint = false;
-
-private:
-    /**
-     * Private default constructor to make sure people always construct this through Construct()
-     * instead.
-     */
-    DebugContext() = default;
-
-    /// Mutex protecting current breakpoint state and the observer list.
-    std::mutex breakpoint_mutex;
-
-    /// Used by OnEvent to wait for resumption.
-    std::condition_variable resume_from_breakpoint;
-
-    /// List of registered observers
-    std::list<BreakPointObserver*> breakpoint_observers;
-};
-
-} // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,7 +7,6 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
@@ -88,11 +87,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
        color_mask.A.Assign(1);
    }

-    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
+    // NVN games expect these values to be enabled at boot
+    regs.rasterize_enable = 1;
    regs.rt_separate_frag_data = 1;
-
-    // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
    regs.framebuffer_srgb = 1;
+
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
@@ -273,8 +272,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
 }

 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
-    auto debug_context = system.GetGPUDebugContext();
-
    const u32 method = method_call.method;

    if (method == cb_data_state.current) {
@@ -315,10 +312,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method < Regs::NUM_REGS,
               "Invalid Maxwell3D register, increase the size of the Regs structure");

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
-    }
-
    if (regs.reg_array[method] != method_call.argument) {
        regs.reg_array[method] = method_call.argument;
        const std::size_t dirty_reg = dirty_pointers[method];
@@ -424,10 +417,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    default:
        break;
    }
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
-    }
 }

 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
@@ -485,12 +474,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
    ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);

-    auto debug_context = system.GetGPUDebugContext();
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-    }
-
    // Both instance configuration registers can not be set at the same time.
    ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
               "Illegal combination of instancing parameters");
@@ -500,10 +483,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
        rasterizer.DrawMultiBatch(is_indexed);
    }

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-    }
-
    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
    // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
    // it's possible that it is incorrect and that there is some other register used to specify the
@@ -650,12 +629,6 @@ void Maxwell3D::DrawArrays() {
              regs.vertex_buffer.count);
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");

-    auto debug_context = system.GetGPUDebugContext();
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-    }
-
    // Both instance configuration registers can not be set at the same time.
    ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
               "Illegal combination of instancing parameters");
@@ -673,10 +646,6 @@ void Maxwell3D::DrawArrays() {
        rasterizer.DrawBatch(is_indexed);
    }

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-    }
-
    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
    // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
    // it's possible that it is incorrect and that there is some other register used to specify the
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -657,7 +657,11 @@ public:
                std::array<f32, 4> tess_level_outer;
                std::array<f32, 2> tess_level_inner;

-                INSERT_UNION_PADDING_WORDS(0x102);
+                INSERT_UNION_PADDING_WORDS(0x10);
+
+                u32 rasterize_enable;
+
+                INSERT_UNION_PADDING_WORDS(0xF1);

                u32 tfb_enabled;

@@ -1427,6 +1431,7 @@ ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tess_mode, 0xC8);
 ASSERT_REG_POSITION(tess_level_outer, 0xC9);
 ASSERT_REG_POSITION(tess_level_inner, 0xCD);
+ASSERT_REG_POSITION(rasterize_enable, 0xDF);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1051,7 +1051,7 @@ union Instruction {
        BitField<40, 1, R2pMode> mode;
        BitField<41, 2, u64> byte;
        BitField<20, 7, u64> immediate_mask;
-    } r2p;
+    } p2r_r2p;

    union {
        BitField<39, 3, u64> pred39;
@@ -1239,7 +1239,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<54, 2, u64> info;
+        BitField<54, 2, u64> offset_mode;
        BitField<56, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1251,9 +1251,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1265,7 +1265,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<33, 2, u64> info;
+        BitField<33, 2, u64> offset_mode;
        BitField<37, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1277,9 +1277,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1801,6 +1801,7 @@ public:
        PSET,
        CSETP,
        R2P_IMM,
+        P2R_IMM,
        XMAD_IMM,
        XMAD_CR,
        XMAD_RC,
@@ -2106,6 +2107,7 @@ private:
            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
            INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
+            INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
            INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
            INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
            INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
    // Synced GPU, is always in sync
    if (!is_async) {
        return;
    }
    MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }

 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
    syncpoints[syncpoint_id]++;
    std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
    if (!syncpt_interrupts[syncpoint_id].empty()) {
        u32 value = syncpoints[syncpoint_id].load();
        auto it = syncpt_interrupts[syncpoint_id].begin();
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
    virtual void WaitIdle() const = 0;

    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);

    void IncrementSyncPoint(u32 syncpoint_id);

@@ -312,6 +313,8 @@ private:

    std::mutex sync_mutex;

+    std::condition_variable sync_cv;
+
    const bool is_async;
 };

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -271,6 +271,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            case Maxwell::ShaderProgram::Geometry:
                shader_program_manager->UseTrivialGeometryShader();
                break;
+            case Maxwell::ShaderProgram::Fragment:
+                shader_program_manager->UseTrivialFragmentShader();
+                break;
            default:
                break;
            }
@@ -514,6 +517,7 @@ void RasterizerOpenGL::Clear() {
    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);

    SyncViewport(clear_state);
+    SyncRasterizeEnable(clear_state);
    if (regs.clear_flags.scissor) {
        SyncScissorTest(clear_state);
    }
@@ -541,6 +545,7 @@ void RasterizerOpenGL::Clear() {
 void RasterizerOpenGL::DrawPrelude() {
    auto& gpu = system.GPU().Maxwell3D();

+    SyncRasterizeEnable(state);
    SyncColorMask();
    SyncFragmentColorClampState();
    SyncMultiSampleState();
@@ -1133,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
    }
 }

+void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    current_state.rasterizer_discard = regs.rasterize_enable == 0;
+}
+
 void RasterizerOpenGL::SyncColorMask() {
    auto& maxwell3d = system.GPU().Maxwell3D();
    if (!maxwell3d.dirty.color_mask) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -168,6 +168,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs the rasterizer enable state to match the guest state
+    void SyncRasterizeEnable(OpenGLState& current_state);
+
    /// Syncs Color Mask
    void SyncColorMask();

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -48,10 +48,10 @@ class ExprDecompiler;

 enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };

-struct TextureAoffi {};
+struct TextureOffset {};
 struct TextureDerivates {};
 using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
+using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;

 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
    static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -751,6 +751,9 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            if (operation_index >= operation_decompilers.size()) {
                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -872,6 +875,9 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
            ++code.scope;
@@ -1077,7 +1083,7 @@ private:
    }

    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
-                                const std::vector<TextureIR>& extras, bool sepparate_dc = false) {
+                                const std::vector<TextureIR>& extras, bool separate_dc = false) {
        constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1090,10 +1096,12 @@ private:
        std::string expr = "texture" + function_suffix;
        if (!meta->aoffi.empty()) {
            expr += "Offset";
+        } else if (!meta->ptp.empty()) {
+            expr += "Offsets";
        }
        expr += '(' + GetSampler(meta->sampler) + ", ";
        expr += coord_constructors.at(count + (has_array ? 1 : 0) +
-                                      (has_shadow && !sepparate_dc ? 1 : 0) - 1);
+                                      (has_shadow && !separate_dc ? 1 : 0) - 1);
        expr += '(';
        for (std::size_t i = 0; i < count; ++i) {
            expr += Visit(operation[i]).AsFloat();
@@ -1106,7 +1114,7 @@ private:
            expr += ", float(" + Visit(meta->array).AsInt() + ')';
        }
        if (has_shadow) {
-            if (sepparate_dc) {
+            if (separate_dc) {
                expr += "), " + Visit(meta->depth_compare).AsFloat();
            } else {
                expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
@@ -1118,8 +1126,12 @@ private:
        for (const auto& variant : extras) {
            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
                expr += GenerateTextureArgument(*argument);
-            } else if (std::holds_alternative<TextureAoffi>(variant)) {
-                expr += GenerateTextureAoffi(meta->aoffi);
+            } else if (std::holds_alternative<TextureOffset>(variant)) {
+                if (!meta->aoffi.empty()) {
+                    expr += GenerateTextureAoffi(meta->aoffi);
+                } else if (!meta->ptp.empty()) {
+                    expr += GenerateTexturePtp(meta->ptp);
+                }
            } else if (std::holds_alternative<TextureDerivates>(variant)) {
                expr += GenerateTextureDerivates(meta->derivates);
            } else {
@@ -1160,6 +1172,20 @@ private:
        return expr;
    }

+    std::string ReadTextureOffset(const Node& value) {
+        if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
+            // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+            // to be constant by the standard).
+            return std::to_string(static_cast<s32>(immediate->GetValue()));
+        } else if (device.HasVariableAoffi()) {
+            // Avoid using variable AOFFI on unsupported devices.
+            return Visit(value).AsInt();
+        } else {
+            // Insert 0 on devices not supporting variable AOFFI.
+            return "0";
+        }
+    }
+
    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
        if (aoffi.empty()) {
            return {};
@@ -1170,18 +1196,7 @@ private:
        expr += '(';

        for (std::size_t index = 0; index < aoffi.size(); ++index) {
-            const auto operand{aoffi.at(index)};
-            if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
-                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
-                // to be constant by the standard).
-                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-            } else if (device.HasVariableAoffi()) {
-                // Avoid using variable AOFFI on unsupported devices.
-                expr += Visit(operand).AsInt();
-            } else {
-                // Insert 0 on devices not supporting variable AOFFI.
-                expr += '0';
-            }
+            expr += ReadTextureOffset(aoffi.at(index));
            if (index + 1 < aoffi.size()) {
                expr += ", ";
            }
@@ -1191,6 +1206,20 @@ private:
        return expr;
    }

+    std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
+        static constexpr std::size_t num_vectors = 4;
+        ASSERT(ptp.size() == num_vectors * 2);
+
+        std::string expr = ", ivec2[](";
+        for (std::size_t vector = 0; vector < num_vectors; ++vector) {
+            const bool has_next = vector + 1 < num_vectors;
+            expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
+                                ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
+        }
+        expr += ')';
+        return expr;
+    }
+
    std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
        if (derivates.empty()) {
            return {};
@@ -1689,7 +1718,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+            operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1701,7 +1730,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1709,21 +1738,19 @@ private:
    }

    Expression TextureGather(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
+        const auto& meta = std::get<MetaTexture>(operation.GetMeta());

-        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        if (meta->sampler.IsShadow()) {
-            return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) +
-                        GetSwizzle(meta->element),
-                    Type::Float};
+        const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
+        const bool separate_dc = meta.sampler.IsShadow();
+
+        std::vector<TextureIR> ir;
+        if (meta.sampler.IsShadow()) {
+            ir = {TextureOffset{}};
        } else {
-            return {GenerateTexture(operation, "Gather",
-                                    {TextureAoffi{}, TextureArgument{type, meta->component}},
-                                    false) +
-                        GetSwizzle(meta->element),
-                    Type::Float};
+            ir = {TextureOffset{}, TextureArgument{type, meta.component}};
        }
+        return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
+                Type::Float};
    }

    Expression TextureQueryDimensions(Operation operation) {
@@ -1794,7 +1821,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
+        std::string expr =
+            GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
        return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
    }

--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -50,6 +50,10 @@ public:
        current_state.geometry_shader = 0;
    }

+    void UseTrivialFragmentShader() {
+        current_state.fragment_shader = 0;
+    }
+
 private:
    struct PipelineState {
        bool operator==(const PipelineState& rhs) const {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -182,6 +182,10 @@ void OpenGLState::ApplyCulling() {
    }
 }

+void OpenGLState::ApplyRasterizerDiscard() {
+    Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
+}
+
 void OpenGLState::ApplyColorMask() {
    if (!dirty.color_mask) {
        return;
@@ -455,6 +459,7 @@ void OpenGLState::Apply() {
    ApplyPointSize();
    ApplyFragmentColorClamp();
    ApplyMultisample();
+    ApplyRasterizerDiscard();
    ApplyColorMask();
    ApplyDepthClamp();
    ApplyViewport();
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -48,6 +48,8 @@ public:
        GLuint index = 0;
    } primitive_restart; // GL_PRIMITIVE_RESTART

+    bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
+
    struct ColorMask {
        GLboolean red_enabled = GL_TRUE;
        GLboolean green_enabled = GL_TRUE;
@@ -56,6 +58,7 @@ public:
    };
    std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
        color_mask; // GL_COLOR_WRITEMASK
+
    struct {
        bool test_enabled = false; // GL_STENCIL_TEST
        struct {
@@ -174,6 +177,7 @@ public:
    void ApplyMultisample();
    void ApplySRgb();
    void ApplyCulling();
+    void ApplyRasterizerDiscard();
    void ApplyColorMask();
    void ApplyDepth();
    void ApplyPrimitiveRestart();
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_POINTS;
    case Maxwell::PrimitiveTopology::Lines:
        return GL_LINES;
+    case Maxwell::PrimitiveTopology::LineLoop:
+        return GL_LINE_LOOP;
    case Maxwell::PrimitiveTopology::LineStrip:
        return GL_LINE_STRIP;
    case Maxwell::PrimitiveTopology::Triangles:
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_TRIANGLE_FAN;
    case Maxwell::PrimitiveTopology::Quads:
        return GL_QUADS;
-    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
-        UNREACHABLE();
-        return {};
+    case Maxwell::PrimitiveTopology::QuadStrip:
+        return GL_QUAD_STRIP;
+    case Maxwell::PrimitiveTopology::Polygon:
+        return GL_POLYGON;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+        return GL_LINES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        return GL_LINE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+        return GL_TRIANGLES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        return GL_TRIANGLE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::Patches:
+        return GL_PATCHES;
    }
+    UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+    return GL_POINTS;
 }

 inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
    const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
    const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];

+    const auto& clip = regs.view_volume_clip_control;
+    const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
+
    Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
    if (regs.screen_y_control.triangle_rast_flip != 0 &&
        regs.viewport_transform[0].scale_y > 0.0f) {
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
    }

    const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
-    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
-                                          regs.cull.cull_face, front_face);
+    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
+                                          depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
+                                          front_face);
 }

 } // Anonymous namespace
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
 std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
    return static_cast<std::size_t>(cull_enable) ^
           (static_cast<std::size_t>(depth_bias_enable) << 1) ^
-           (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
+           (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
+           (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
           (static_cast<std::size_t>(cull_face) << 24) ^
           (static_cast<std::size_t>(front_face) << 48);
 }

 bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
-    return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
-           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
-                    rhs.front_face);
+    return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
+                    cull_face, front_face) ==
+           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
+                    rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
 }

 std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -170,15 +170,17 @@ struct FixedPipelineState {
    };

    struct Rasterizer {
-        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
-                             Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
+        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
+                             bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
+                             Maxwell::Cull::FrontFace front_face)
            : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
-              ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
-                                                                                    front_face} {}
+              depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
+              cull_face{cull_face}, front_face{front_face} {}
        Rasterizer() = default;

        bool cull_enable;
        bool depth_bias_enable;
+        bool depth_clamp_enable;
        bool ndc_minus_one_to_one;
        Maxwell::Cull::CullFace cull_face;
        Maxwell::Cull::FrontFace front_face;
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>

-#include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "common/bit_util.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"

 namespace Vulkan {

-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
-                                     std::size_t alignment, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
-      alignment{alignment} {}
+namespace {

-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
-                             Memory::Memory& cpu_memory_,
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
-                                                                                   cpu_memory_} {
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
-                       vk::BufferUsageFlagBits::eUniformBuffer;
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
-                        vk::AccessFlagBits::eUniformRead;
-    stream_buffer =
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
-                                         vk::PipelineStageFlagBits::eAllCommands);
-    buffer_handle = stream_buffer->GetBuffer();
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
+
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }

+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
+
 VKBufferCache::~VKBufferCache() = default;

-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    // Cache management is a big overhead, so only cache entries with a given size.
-    // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
-    if (cache) {
-        const auto entry = TryGet(host_ptr);
-        if (entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return entry->GetOffset();
-            }
-            Unregister(entry);
-        }
-    }
-
-    AlignBuffer(alignment);
-    const u64 uploaded_offset = buffer_offset;
-
-    if (host_ptr == nullptr) {
-        return uploaded_offset;
-    }
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
-        Register(entry);
-    }
-
-    return uploaded_offset;
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 }

-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return uploaded_offset;
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
 }

-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    u8* const uploaded_ptr = buffer_ptr;
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return {uploaded_ptr, uploaded_offset};
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
 }

-void VKBufferCache::Reserve(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
-    buffer_offset = buffer_offset_base;
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);

-    if (invalidate) {
-        InvalidateAll();
-    }
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
 }

-void VKBufferCache::Send() {
-    stream_buffer->Send(buffer_offset - buffer_offset_base);
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
 }

-void VKBufferCache::AlignBuffer(std::size_t alignment) {
-    // Align the offset, not the mapped pointer
-    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
-    buffer_ptr += offset_aligned - buffer_offset;
-    buffer_offset = offset_aligned;
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
 #pragma once

 #include <memory>
-#include <tuple>
+#include <unordered_map>
+#include <vector>

 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"

-namespace Memory {
-class Memory;
-}
-
-namespace Tegra {
-class MemoryManager;
+namespace Core {
+class System;
 }

 namespace Vulkan {

 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;

-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
-                               u8* host_ptr);
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();

-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
-    std::size_t GetSizeInBytes() const override {
-        return size;
-    }
-
-    std::size_t GetSize() const {
-        return size;
-    }
-
-    u64 GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetAlignment() const {
-        return alignment;
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
    }

 private:
-    VAddr cpu_addr{};
-    std::size_t size{};
-    u64 offset{};
-    std::size_t alignment{};
+    VKBuffer buffer;
 };

-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
    ~VKBufferCache();

-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
-    /// allocated.
-    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
-
-    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
-    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
-
-    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
-    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
-
-    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
-    void Reserve(std::size_t max_size);
-
-    /// Ensures that the set data is sent to the device.
-    void Send();
-
-    /// Returns the buffer cache handle.
-    vk::Buffer GetBuffer() const {
-        return buffer_handle;
-    }
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;

 protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+    void WriteBarrier() override {}
+
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;

 private:
-    void AlignBuffer(std::size_t alignment);
-
-    Tegra::MemoryManager& tegra_memory_manager;
-    Memory::Memory& cpu_memory;
-
-    std::unique_ptr<VKStreamBuffer> stream_buffer;
-    vk::Buffer buffer_handle;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -0,0 +1,112 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                                     VKDescriptorPool& descriptor_pool,
+                                     VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     const SPIRVShader& shader)
+    : device{device}, scheduler{scheduler}, entries{shader.entries},
+      descriptor_set_layout{CreateDescriptorSetLayout()},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate()},
+      shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
+
+VKComputePipeline::~VKComputePipeline() = default;
+
+vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+    u32 binding = 0;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        // TODO(Rodrigo): Maybe make individual bindings here?
+        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
+            bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
+                                  nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
+    const auto dev = device.GetLogical();
+    return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
+    const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
+    const auto dev = device.GetLogical();
+    return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniquePipeline VKComputePipeline::CreatePipeline() const {
+    vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
+                                                      *shader_module, "main", nullptr);
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+    if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
+        shader_stage_ci.pNext = &subgroup_size_ci;
+    }
+
+    const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
+    const auto dev = device.GetLogical();
+    return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -0,0 +1,66 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class VKComputePipeline final {
+public:
+    explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                               VKDescriptorPool& descriptor_pool,
+                               VKUpdateDescriptorQueue& update_descriptor_queue,
+                               const SPIRVShader& shader);
+    ~VKComputePipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    const ShaderEntries& GetEntries() {
+        return entries;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
+
+    UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
+
+    UniquePipeline CreatePipeline() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    ShaderEntries entries;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniqueShaderModule shader_module;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
+constexpr std::size_t SETS_GROW_RATE = 0x20;
+
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
+                                         vk::DescriptorSetLayout layout)
+    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+
+DescriptorAllocator::~DescriptorAllocator() = default;
+
+vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
+    return *descriptors[CommitResource(fence)];
+}
+
+void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
+    auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
+    descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
+                       std::make_move_iterator(new_sets.end()));
+}
+
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
+    : device{device}, active_pool{AllocateNewPool()} {}
+
+VKDescriptorPool::~VKDescriptorPool() = default;
+
+vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
+    static constexpr u32 num_sets = 0x20000;
+    static constexpr vk::DescriptorPoolSize pool_sizes[] = {
+        {vk::DescriptorType::eUniformBuffer, num_sets * 90},
+        {vk::DescriptorType::eStorageBuffer, num_sets * 60},
+        {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
+        {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
+        {vk::DescriptorType::eStorageImage, num_sets * 40}};
+
+    const vk::DescriptorPoolCreateInfo create_info(
+        vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
+        static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
+    const auto dev = device.GetLogical();
+    return *pools.emplace_back(
+        dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
+}
+
+std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
+    vk::DescriptorSetLayout layout, std::size_t count) {
+    std::vector layout_copies(count, layout);
+    vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
+                                                layout_copies.data());
+
+    std::vector<vk::DescriptorSet> sets(count);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfPoolMemory:
+        active_pool = AllocateNewPool();
+        allocate_info.descriptorPool = active_pool;
+        if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
+            break;
+        }
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
+    }
+
+    vk::PoolFree deleter(dev, active_pool, dld);
+    std::vector<UniqueDescriptorSet> unique_sets;
+    unique_sets.reserve(count);
+    for (const auto set : sets) {
+        unique_sets.push_back(UniqueDescriptorSet{set, deleter});
+    }
+    return unique_sets;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+class VKDescriptorPool;
+
+class DescriptorAllocator final : public VKFencedPool {
+public:
+    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
+    ~DescriptorAllocator() override;
+
+    DescriptorAllocator(const DescriptorAllocator&) = delete;
+
+    vk::DescriptorSet Commit(VKFence& fence);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    VKDescriptorPool& descriptor_pool;
+    const vk::DescriptorSetLayout layout;
+
+    std::vector<UniqueDescriptorSet> descriptors;
+};
+
+class VKDescriptorPool final {
+    friend DescriptorAllocator;
+
+public:
+    explicit VKDescriptorPool(const VKDevice& device);
+    ~VKDescriptorPool();
+
+private:
+    vk::DescriptorPool AllocateNewPool();
+
+    std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
+                                                         std::size_t count);
+
+    const VKDevice& device;
+
+    std::vector<UniqueDescriptorPool> pools;
+    vk::DescriptorPool active_pool;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,271 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+namespace {
+
+vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
+    return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
+                              MaxwellToVK::StencilOp(face.action_depth_pass),
+                              MaxwellToVK::StencilOp(face.action_depth_fail),
+                              MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
+}
+
+bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
+    static constexpr std::array unsupported_topologies = {
+        vk::PrimitiveTopology::ePointList,
+        vk::PrimitiveTopology::eLineList,
+        vk::PrimitiveTopology::eTriangleList,
+        vk::PrimitiveTopology::eLineListWithAdjacency,
+        vk::PrimitiveTopology::eTriangleListWithAdjacency,
+        vk::PrimitiveTopology::ePatchList};
+    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
+                     topology) == std::end(unsupported_topologies);
+}
+
+} // Anonymous namespace
+
+VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                       VKDescriptorPool& descriptor_pool,
+                                       VKUpdateDescriptorQueue& update_descriptor_queue,
+                                       VKRenderPassCache& renderpass_cache,
+                                       const GraphicsPipelineCacheKey& key,
+                                       const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                       const SPIRVProgram& program)
+    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
+                                                                        program)},
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
+                                                                             key.renderpass_params,
+                                                                             program)} {}
+
+VKGraphicsPipeline::~VKGraphicsPipeline() = default;
+
+vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
+    const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
+    const SPIRVProgram& program) const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    for (const auto& stage : program) {
+        if (stage) {
+            FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
+                                                template_entries);
+        }
+    }
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
+    const SPIRVProgram& program) const {
+    std::vector<UniqueShaderModule> modules;
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
+        const auto& stage = program[i];
+        if (!stage) {
+            continue;
+        }
+        const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
+                                                   stage->code.data());
+        modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
+    }
+    return modules;
+}
+
+UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
+                                                  const SPIRVProgram& program) const {
+    const auto& vi = fixed_state.vertex_input;
+    const auto& ia = fixed_state.input_assembly;
+    const auto& ds = fixed_state.depth_stencil;
+    const auto& cd = fixed_state.color_blending;
+    const auto& ts = fixed_state.tessellation;
+    const auto& rs = fixed_state.rasterizer;
+
+    std::vector<vk::VertexInputBindingDescription> vertex_bindings;
+    std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
+    for (std::size_t i = 0; i < vi.num_bindings; ++i) {
+        const auto& binding = vi.bindings[i];
+        const bool instanced = binding.divisor != 0;
+        const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
+        vertex_bindings.emplace_back(binding.index, binding.stride, rate);
+        if (instanced) {
+            vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
+        }
+    }
+
+    std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
+    const auto& input_attributes = program[0]->entries.attributes;
+    for (std::size_t i = 0; i < vi.num_attributes; ++i) {
+        const auto& attribute = vi.attributes[i];
+        if (input_attributes.find(attribute.index) == input_attributes.end()) {
+            // Skip attributes not used by the vertex shaders.
+            continue;
+        }
+        vertex_attributes.emplace_back(attribute.index, attribute.buffer,
+                                       MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+                                       attribute.offset);
+    }
+
+    vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
+        {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
+        static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
+
+    const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
+        static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
+    if (!vertex_binding_divisors.empty()) {
+        vertex_input_ci.pNext = &vertex_input_divisor_ci;
+    }
+
+    const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
+        {}, primitive_topology,
+        ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
+
+    const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
+
+    const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
+                                                          Maxwell::NumViewports, nullptr);
+
+    // TODO(Rodrigo): Find out what's the default register value for front face
+    const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
+        {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
+        rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
+        rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
+        rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
+        {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
+
+    const vk::CompareOp depth_test_compare = ds.depth_test_enable
+                                                 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
+                                                 : vk::CompareOp::eAlways;
+
+    const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
+        {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
+        ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
+        GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
+
+    std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    const std::size_t num_attachments =
+        std::min(cd.attachments_count, renderpass_params.color_attachments.size());
+    for (std::size_t i = 0; i < num_attachments; ++i) {
+        constexpr std::array component_table{
+            vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
+            vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
+        const auto& blend = cd.attachments[i];
+
+        vk::ColorComponentFlags color_components{};
+        for (std::size_t j = 0; j < component_table.size(); ++j) {
+            if (blend.components[j])
+                color_components |= component_table[j];
+        }
+
+        cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
+            blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
+            MaxwellToVK::BlendFactor(blend.dst_rgb_func),
+            MaxwellToVK::BlendEquation(blend.rgb_equation),
+            MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
+            MaxwellToVK::BlendEquation(blend.a_equation), color_components);
+    }
+    const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
+                                                                  static_cast<u32>(num_attachments),
+                                                                  cb_attachments.data(), {});
+
+    constexpr std::array dynamic_states = {
+        vk::DynamicState::eViewport,         vk::DynamicState::eScissor,
+        vk::DynamicState::eDepthBias,        vk::DynamicState::eBlendConstants,
+        vk::DynamicState::eDepthBounds,      vk::DynamicState::eStencilCompareMask,
+        vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
+    const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+    std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
+    std::size_t module_index = 0;
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!program[stage]) {
+            continue;
+        }
+        const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
+        const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
+        auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
+                                                    *modules[module_index++], "main", nullptr);
+        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
+            stage_ci.pNext = &subgroup_size_ci;
+        }
+    }
+
+    const vk::GraphicsPipelineCreateInfo create_info(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
+        &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
+        &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+struct GraphicsPipelineCacheKey;
+
+class VKDescriptorPool;
+class VKDevice;
+class VKRenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+
+class VKGraphicsPipeline final {
+public:
+    explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                VKDescriptorPool& descriptor_pool,
+                                VKUpdateDescriptorQueue& update_descriptor_queue,
+                                VKRenderPassCache& renderpass_cache,
+                                const GraphicsPipelineCacheKey& key,
+                                const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                const SPIRVProgram& program);
+    ~VKGraphicsPipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    vk::RenderPass GetRenderPass() const {
+        return renderpass;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout(
+        const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
+        const SPIRVProgram& program) const;
+
+    std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+
+    UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
+                                  const SPIRVProgram& program) const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    const FixedPipelineState fixed_state;
+    const u64 hash;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    std::vector<UniqueShaderModule> modules;
+
+    vk::RenderPass renderpass;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -0,0 +1,106 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
+                 const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask)
+    : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
+      image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
+    UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
+                         "Queue family tracking is not implemented");
+
+    const auto dev = device.GetLogical();
+    image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
+
+    const u32 num_ranges = image_num_layers * image_num_levels;
+    barriers.resize(num_ranges);
+    subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
+}
+
+VKImage::~VKImage() = default;
+
+void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                         vk::ImageLayout new_layout) {
+    if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
+        return;
+    }
+
+    std::size_t cursor = 0;
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            barriers[cursor] = vk::ImageMemoryBarrier(
+                state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED,
+                VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1});
+            state.access = new_access;
+            state.layout = new_layout;
+        }
+    }
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) {
+        // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
+        constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands;
+        cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr,
+                               static_cast<u32>(cursor), barriers.data(), dld);
+    });
+}
+
+bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept {
+    const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
+                               base_level == 0 && num_levels == image_num_levels;
+    if (!is_full_range) {
+        state_diverged = true;
+    }
+
+    if (!state_diverged) {
+        auto& state = GetSubrangeState(0, 0);
+        if (state.access != new_access || state.layout != new_layout) {
+            return true;
+        }
+    }
+
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            if (state.access != new_access || state.layout != new_layout) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void VKImage::CreatePresentView() {
+    // Image type has to be 2D to be presented.
+    const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {},
+                                                {aspect_mask, 0, 1, 0, 1});
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+}
+
+VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
+    return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
+                           static_cast<std::size_t>(level)];
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ b/src/video_core/renderer_vulkan/vk_image.h
@@ -0,0 +1,84 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class VKImage {
+public:
+    explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
+                     const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask);
+    ~VKImage();
+
+    /// Records in the passed command buffer an image transition and updates the state of the image.
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout);
+
+    /// Returns a view compatible with presentation, the image has to be 2D.
+    vk::ImageView GetPresentView() {
+        if (!present_view) {
+            CreatePresentView();
+        }
+        return *present_view;
+    }
+
+    /// Returns the Vulkan image handler.
+    vk::Image GetHandle() const {
+        return *image;
+    }
+
+    /// Returns the Vulkan format for this image.
+    vk::Format GetFormat() const {
+        return format;
+    }
+
+    /// Returns the Vulkan aspect mask.
+    vk::ImageAspectFlags GetAspectMask() const {
+        return aspect_mask;
+    }
+
+private:
+    struct SubrangeState final {
+        vk::AccessFlags access{};                             ///< Current access bits.
+        vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout.
+    };
+
+    bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept;
+
+    /// Creates a presentation view.
+    void CreatePresentView();
+
+    /// Returns the subrange state for a layer and layer.
+    SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
+
+    const VKDevice& device; ///< Device handler.
+    VKScheduler& scheduler; ///< Device scheduler.
+
+    const vk::Format format;                ///< Vulkan format.
+    const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
+    const u32 image_num_layers;             ///< Number of layers.
+    const u32 image_num_levels;             ///< Number of mipmap levels.
+
+    UniqueImage image;            ///< Image handle.
+    UniqueImageView present_view; ///< Image view compatible with presentation.
+
+    std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers.
+    std::vector<SubrangeState> subrange_states;   ///< Current subrange state.
+
+    bool state_diverged = false; ///< True when subresources mismatch in layout.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -16,34 +17,32 @@

 namespace Vulkan {

-// TODO(Rodrigo): Fine tune this number
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+namespace {
+
+u64 GetAllocationChunkSize(u64 required_size) {
+    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
+    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
+    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
+}
+
+} // Anonymous namespace

 class VKMemoryAllocation final {
 public:
    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
-                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
-        if (is_mappable) {
-            const auto dev = device.GetLogical();
-            const auto& dld = device.GetDispatchLoader();
-            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
-        }
-    }
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
+          shifted_type{ShiftType(type)} {}

    ~VKMemoryAllocation() {
        const auto dev = device.GetLogical();
        const auto& dld = device.GetDispatchLoader();
-        if (is_mappable)
-            dev.unmapMemory(memory, dld);
        dev.free(memory, nullptr, dld);
    }

    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
-                                        static_cast<u64>(alignment));
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
        if (!found) {
            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                       static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
                return nullptr;
            }
        }
-        u8* address = is_mappable ? base_address + *found : nullptr;
-        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
                                                           *found + commit_size);
        commits.push_back(commit.get());

@@ -65,12 +63,10 @@ public:

    void Free(const VKMemoryCommitImpl* commit) {
        ASSERT(commit);
-        const auto it =
-            std::find_if(commits.begin(), commits.end(),
-                         [&](const auto& stored_commit) { return stored_commit == commit; });
+
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
        if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
-            UNREACHABLE();
+            UNREACHABLE_MSG("Freeing unallocated commit!");
            return;
        }
        commits.erase(it);
@@ -88,11 +84,11 @@ private:
    }

    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
-        while (iterator + size < end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+        u64 iterator = Common::AlignUp(start, alignment);
+        while (iterator + size <= end) {
+            const u64 try_left = iterator;
            const u64 try_right = try_left + size;

            bool overlap = false;
@@ -100,7 +96,7 @@ private:
                const auto [commit_left, commit_right] = commit->interval;
                if (try_left < commit_right && commit_left < try_right) {
                    // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                    overlap = true;
                    break;
                }
@@ -110,6 +106,7 @@ private:
                return try_left;
            }
        }
+
        // No free regions where found, return an empty optional.
        return std::nullopt;
    }
@@ -117,12 +114,8 @@ private:
    const VKDevice& device;                   ///< Vulkan device.
    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
-    const bool is_mappable;                   ///< Whether the allocation is mappable.
-
-    /// Base address of the mapped pointer.
-    u8* base_address{};

    /// Hints where the next free region is likely going to be.
    u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
 };

 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
-      is_memory_unified{GetMemoryUnified(props)} {}
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
+                          device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(properties)} {}

 VKMemoryManager::~VKMemoryManager() = default;

-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
+                                       bool host_visible) {
+    const u64 chunk_size = GetAllocationChunkSize(requirements.size);

    // When a host visible commit is asked, search for host visible and coherent, otherwise search
    // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
            : vk::MemoryPropertyFlagBits::eDeviceLocal;

-    const auto TryCommit = [&]() -> VKMemoryCommit {
-        for (auto& alloc : allocs) {
-            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
-                continue;
-
-            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
-                return commit;
-            }
-        }
-        return {};
-    };
-
-    if (auto commit = TryCommit(); commit) {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
        return commit;
    }

    // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
-        // TODO(Rodrigo): Try to use host memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
-        UNREACHABLE();
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
+        // Allocation has failed, panic.
+        UNREACHABLE_MSG("Ran out of VRAM!");
+        return {};
    }

    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
    // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
    ASSERT(commit);
    return commit;
 }
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }

 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                  u64 size) {
-    const u32 type = [&]() {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+    const u32 type = [&] {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                // The type matches in type and in the wanted properties.
                return type_index;
            }
        }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
-        UNREACHABLE();
-        return 0u;
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
+        return 0U;
    }();

    const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    // Try to allocate found type.
    const vk::MemoryAllocateInfo memory_ai(size, type);
    vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
        res != vk::Result::eSuccess) {
        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
        return false;
    }
-    allocs.push_back(
+    allocations.push_back(
        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
    return true;
 }

-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                               vk::MemoryPropertyFlags wanted_properties) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return {};
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(
+    const vk::PhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
            // Memory is considered unified when heaps are device local only.
            return false;
        }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    return true;
 }

-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
-                                       u8* data, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}

 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
    allocation->Free(this);
 }

-u8* VKMemoryCommitImpl::GetData() const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
-    return data;
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
+    const auto dev = device.GetLogical();
+    const auto address = reinterpret_cast<u8*>(
+        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
+    return MemoryMap{this, address};
+}
+
+void VKMemoryCommitImpl::Unmap() const {
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(memory, device.GetDispatchLoader());
+}
+
+MemoryMap VKMemoryCommitImpl::Map() const {
+    return Map(interval.second - interval.first);
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@

 namespace Vulkan {

+class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
    explicit VKMemoryManager(const VKDevice& device);
+    VKMemoryManager(const VKMemoryManager&) = delete;
    ~VKMemoryManager();

    /**
     * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
     * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     * memory. When passing false, it will try to allocate device local memory.
+     *                     memory. When passing false, it will try to allocate device local memory.
     * @returns A memory commit.
     */
    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
    /// Allocates a chunk of memory.
    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);

-    /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    /// Tries to allocate a memory commit.
+    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                  vk::MemoryPropertyFlags wanted_properties);

-    const VKDevice& device;                                  ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
-    const bool is_memory_unified;                            ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
+
+    const VKDevice& device;                              ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                        ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };

 class VKMemoryCommitImpl final {
    friend VKMemoryAllocation;
+    friend MemoryMap;

 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
-                                u64 begin, u64 end);
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                vk::DeviceMemory memory, u64 begin, u64 end);
    ~VKMemoryCommitImpl();

-    /// Returns the writeable memory map. The commit has to be mappable.
-    u8* GetData() const;
+    /// Maps a memory region and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map(u64 size, u64 offset = 0) const;
+
+    /// Maps the whole commit and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map() const;

    /// Returns the Vulkan memory handler.
    vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
    }

 private:
+    /// Unmaps memory.
+    void Unmap() const;
+
+    const VKDevice& device;           ///< Vulkan device.
    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+/// Holds ownership of a memory map.
+class MemoryMap final {
+public:
+    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
+        : commit{commit}, address{address} {}
+
+    ~MemoryMap() {
+        if (commit) {
+            commit->Unmap();
+        }
+    }
+
+    /// Prematurely releases the memory map.
+    void Release() {
+        commit->Unmap();
+        commit = nullptr;
+    }
+
+    /// Returns the address of the memory map.
+    u8* GetAddress() const {
+        return address;
+    }
+
+    /// Returns the address of the memory map;
+    operator u8*() const {
+        return address;
+    }
+
+private:
+    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
+    u8* address{};                      ///< Address to the mapped memory.
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -0,0 +1,395 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader/compiler_settings.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
+    VideoCommon::Shader::CompileDepth::FullDecompile};
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    const std::size_t start_offset = is_compute ? 0 : 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code, is_compute));
+    return program_code;
+}
+
+constexpr std::size_t GetStageFromProgram(std::size_t program) {
+    return program == 0 ? 0 : program - 1;
+}
+
+constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
+    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+}
+
+ShaderType GetShaderType(Maxwell::ShaderProgram program) {
+    switch (program) {
+    case Maxwell::ShaderProgram::VertexB:
+        return ShaderType::Vertex;
+    case Maxwell::ShaderProgram::TesselationControl:
+        return ShaderType::TesselationControl;
+    case Maxwell::ShaderProgram::TesselationEval:
+        return ShaderType::TesselationEval;
+    case Maxwell::ShaderProgram::Geometry:
+        return ShaderType::Geometry;
+    case Maxwell::ShaderProgram::Fragment:
+        return ShaderType::Fragment;
+    default:
+        UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
+        return ShaderType::Vertex;
+    }
+}
+
+u32 FillDescriptorLayout(const ShaderEntries& entries,
+                         std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                         Maxwell::ShaderProgram program_type, u32 base_binding) {
+    const ShaderType stage = GetStageFromProgram(program_type);
+    const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
+
+    u32 binding = base_binding;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        for (std::size_t i = 0; i < num_entries; ++i) {
+            bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+    return binding;
+}
+
+} // Anonymous namespace
+
+CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
+                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
+                           ProgramCode program_code, u32 main_offset)
+    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
+      program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
+      shader_ir{this->program_code, main_offset, compiler_settings, locker},
+      entries{GenerateShaderEntries(shader_ir)} {}
+
+CachedShader::~CachedShader() = default;
+
+Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
+    Core::System& system, Tegra::Engines::ShaderType stage) {
+    if (stage == Tegra::Engines::ShaderType::Compute) {
+        return system.GPU().KeplerCompute();
+    } else {
+        return system.GPU().Maxwell3D();
+    }
+}
+
+VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                                 const VKDevice& device, VKScheduler& scheduler,
+                                 VKDescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue)
+    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
+      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
+      renderpass_cache(device) {}
+
+VKPipelineCache::~VKPipelineCache() = default;
+
+std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+    const auto& gpu = system.GPU().Maxwell3D();
+    auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
+    if (!dirty) {
+        return last_shaders;
+    }
+    dirty = false;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto& shader_config = gpu.regs.shader_config[index];
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const GPUVAddr program_addr{GetShaderAddress(system, program)};
+        const auto host_ptr{memory_manager.GetPointer(program_addr)};
+        auto shader = TryGet(host_ptr);
+        if (!shader) {
+            // No shader found - create a new one
+            constexpr u32 stage_offset = 10;
+            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+
+            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+            ASSERT(cpu_addr);
+
+            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
+                                                    host_ptr, std::move(code), stage_offset);
+            Register(shader);
+        }
+        shaders[index] = std::move(shader);
+    }
+    return last_shaders = shaders;
+}
+
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    if (last_graphics_pipeline && last_graphics_key == key) {
+        return *last_graphics_pipeline;
+    }
+    last_graphics_key = key;
+
+    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+        const auto [program, bindings] = DecompileShaders(key);
+        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
+                                                     update_descriptor_queue, renderpass_cache, key,
+                                                     bindings, program);
+    }
+    return *(last_graphics_pipeline = entry.get());
+}
+
+VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (!is_cache_miss) {
+        return *entry;
+    }
+    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto program_addr = key.shader;
+    const auto host_ptr = memory_manager.GetPointer(program_addr);
+
+    auto shader = TryGet(host_ptr);
+    if (!shader) {
+        // No shader found - create a new one
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+
+        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        constexpr u32 kernel_main_offset = 0;
+        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
+                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                kernel_main_offset);
+        Register(shader);
+    }
+
+    Specialization specialization;
+    specialization.workgroup_size = key.workgroup_size;
+    specialization.shared_memory_size = key.shared_memory_size;
+
+    const SPIRVShader spirv_shader{
+        Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
+        shader->GetEntries()};
+    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
+                                                update_descriptor_queue, spirv_shader);
+    return *entry;
+}
+
+void VKPipelineCache::Unregister(const Shader& shader) {
+    bool finished = false;
+    const auto Finish = [&] {
+        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
+        // flush.
+        if (finished) {
+            return;
+        }
+        finished = true;
+        scheduler.Finish();
+    };
+
+    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
+    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
+        auto& entry = it->first;
+        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
+            entry.shaders.end()) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = graphics_cache.erase(it);
+    }
+    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
+        auto& entry = it->first;
+        if (entry.shader != invalidated_addr) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = compute_cache.erase(it);
+    }
+
+    RasterizerCache::Unregister(shader);
+}
+
+std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
+VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
+    const auto& fixed_state = key.fixed_state;
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto& gpu = system.GPU().Maxwell3D();
+
+    Specialization specialization;
+    specialization.primitive_topology = fixed_state.input_assembly.topology;
+    if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+        ASSERT(fixed_state.input_assembly.point_size != 0.0f);
+        specialization.point_size = fixed_state.input_assembly.point_size;
+    }
+    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+        specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
+    }
+    specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+    specialization.tessellation.primitive = fixed_state.tessellation.primitive;
+    specialization.tessellation.spacing = fixed_state.tessellation.spacing;
+    specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
+    for (const auto& rt : key.renderpass_params.color_attachments) {
+        specialization.enabled_rendertargets.set(rt.index);
+    }
+
+    SPIRVProgram program;
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const auto shader = TryGet(host_ptr);
+        ASSERT(shader);
+
+        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
+        const auto program_type = GetShaderType(program_enum);
+        const auto& entries = shader->GetEntries();
+        program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
+                          entries};
+
+        if (program_enum == Maxwell::ShaderProgram::VertexA) {
+            // VertexB was combined with VertexA, so we skip the VertexB iteration
+            ++index;
+        }
+
+        const u32 old_binding = specialization.base_binding;
+        specialization.base_binding =
+            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
+        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    }
+    return {std::move(program), std::move(bindings)};
+}
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
+    static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
+    const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
+        const u32 count = static_cast<u32>(count_);
+        if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
+            device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
+            // crash.
+            for (u32 i = 0; i < count; ++i) {
+                template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
+                                              offset + i * entry_size, entry_size);
+            }
+        } else if (count != 0) {
+            template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+        }
+        offset += count * entry_size;
+        binding += count;
+    };
+
+    AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -0,0 +1,200 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKComputePipeline;
+class VKDescriptorPool;
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class CachedShader;
+using Shader = std::shared_ptr<CachedShader>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using ProgramCode = std::vector<u64>;
+
+struct GraphicsPipelineCacheKey {
+    FixedPipelineState fixed_state;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    RenderPassParams renderpass_params;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = fixed_state.Hash();
+        for (const auto& shader : shaders) {
+            boost::hash_combine(hash, shader);
+        }
+        boost::hash_combine(hash, renderpass_params.Hash());
+        return hash;
+    }
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return std::tie(fixed_state, shaders, renderpass_params) ==
+               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    }
+};
+
+struct ComputePipelineCacheKey {
+    GPUVAddr shader{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};
+
+    std::size_t Hash() const noexcept {
+        return static_cast<std::size_t>(shader) ^
+               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
+               static_cast<std::size_t>(workgroup_size[0]) ^
+               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
+               (static_cast<std::size_t>(workgroup_size[2]) << 24);
+    }
+
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+        return std::tie(shader, shared_memory_size, workgroup_size) ==
+               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<Vulkan::ComputePipelineCacheKey> {
+    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class CachedShader final : public RasterizerCacheObject {
+public:
+    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+    ~CachedShader();
+
+    GPUVAddr GetGpuAddr() const {
+        return gpu_addr;
+    }
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return program_code.size() * sizeof(u64);
+    }
+
+    VideoCommon::Shader::ShaderIR& GetIR() {
+        return shader_ir;
+    }
+
+    const VideoCommon::Shader::ShaderIR& GetIR() const {
+        return shader_ir;
+    }
+
+    const ShaderEntries& GetEntries() const {
+        return entries;
+    }
+
+private:
+    static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
+                                                                 Tegra::Engines::ShaderType stage);
+
+    GPUVAddr gpu_addr{};
+    VAddr cpu_addr{};
+    ProgramCode program_code;
+    VideoCommon::Shader::ConstBufferLocker locker;
+    VideoCommon::Shader::ShaderIR shader_ir;
+    ShaderEntries entries;
+};
+
+class VKPipelineCache final : public RasterizerCache<Shader> {
+public:
+    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                             const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~VKPipelineCache();
+
+    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+
+    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+
+protected:
+    void Unregister(const Shader& shader) override;
+
+    void FlushObjectInner(const Shader& object) override {}
+
+private:
+    std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
+        const GraphicsPipelineCacheKey& key);
+
+    Core::System& system;
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    VKDescriptorPool& descriptor_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    VKRenderPassCache renderpass_cache;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    GraphicsPipelineCacheKey last_graphics_key;
+    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
+        graphics_cache;
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
+};
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -0,0 +1,13 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/rasterizer_interface.h"
+
+namespace Vulkan {
+
+class RasterizerVulkan : public VideoCore::RasterizerInterface {};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+
+namespace Vulkan {
+
+VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
+
+VKRenderPassCache::~VKRenderPassCache() = default;
+
+vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
+    const auto [pair, is_cache_miss] = cache.try_emplace(params);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        entry = CreateRenderPass(params);
+    }
+    return *entry;
+}
+
+UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    std::vector<vk::AttachmentDescription> descriptors;
+    std::vector<vk::AttachmentReference> color_references;
+
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(attachment.pixel_format));
+
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eColorAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+                                 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
+        color_references.emplace_back(static_cast<u32>(rt), color_layout);
+    }
+
+    vk::AttachmentReference zeta_attachment_ref;
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(params.zeta_pixel_format));
+
+        const auto zeta_layout = params.zeta_texception
+                                     ? vk::ImageLayout::eGeneral
+                                     : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
+        zeta_attachment_ref =
+            vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
+    }
+
+    const vk::SubpassDescription subpass_description(
+        {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
+        color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
+        nullptr);
+
+    vk::AccessFlags access;
+    vk::PipelineStageFlags stage;
+    if (!color_references.empty()) {
+        access |=
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    }
+
+    if (params.has_zeta) {
+        access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                  vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
+    }
+
+    const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
+                                                   {});
+
+    const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
+                                               descriptors.data(), 1, &subpass_description, 1,
+                                               &subpass_dependency);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createRenderPassUnique(create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+// TODO(Rodrigo): Optimize this structure for faster hashing
+
+struct RenderPassParams {
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;
+
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }
+
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };
+
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::RenderPassParams> {
+    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKRenderPassCache final {
+public:
+    explicit VKRenderPassCache(const VKDevice& device);
+    ~VKRenderPassCache();
+
+    vk::RenderPass GetRenderPass(const RenderPassParams& params);
+
+private:
+    UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
+
+    const VKDevice& device;
+    std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -954,6 +954,10 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            const auto decompiler = operation_decompilers[operation_index];
            if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            const Id true_label = OpLabel();
            const Id skip_label = OpLabel();
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -0,0 +1,127 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+
+namespace Vulkan {
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
+                                                  u64 last_epoch)
+    : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
+    buffer = std::move(rhs.buffer);
+    watch = std::move(rhs.watch);
+    last_epoch = rhs.last_epoch;
+}
+
+VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
+
+VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
+    StagingBuffer&& rhs) noexcept {
+    buffer = std::move(rhs.buffer);
+    watch = std::move(rhs.watch);
+    last_epoch = rhs.last_epoch;
+    return *this;
+}
+
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+                                         VKScheduler& scheduler)
+    : device{device}, memory_manager{memory_manager}, scheduler{scheduler},
+      is_device_integrated{device.IsIntegrated()} {}
+
+VKStagingBufferPool::~VKStagingBufferPool() = default;
+
+VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
+    if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
+        return *buffer;
+    }
+    return CreateStagingBuffer(size, host_visible);
+}
+
+void VKStagingBufferPool::TickFrame() {
+    ++epoch;
+    current_delete_level = (current_delete_level + 1) % NumLevels;
+
+    ReleaseCache(true);
+    if (!is_device_integrated) {
+        ReleaseCache(false);
+    }
+}
+
+VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
+    for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+        if (entry.watch.TryWatch(scheduler.GetFence())) {
+            entry.last_epoch = epoch;
+            return &*entry.buffer;
+        }
+    }
+    return nullptr;
+}
+
+VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
+    const auto usage =
+        vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
+        vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
+    const u32 log2 = Common::Log2Ceil64(size);
+    const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+    const auto dev = device.GetLogical();
+    auto buffer = std::make_unique<VKBuffer>();
+    buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader());
+    buffer->commit = memory_manager.Commit(*buffer->handle, host_visible);
+
+    auto& entries = GetCache(host_visible)[log2].entries;
+    return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+}
+
+VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
+    return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
+}
+
+void VKStagingBufferPool::ReleaseCache(bool host_visible) {
+    auto& cache = GetCache(host_visible);
+    const u64 size = ReleaseLevel(cache, current_delete_level);
+    if (size == 0) {
+        return;
+    }
+}
+
+u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
+    static constexpr u64 epochs_to_destroy = 180;
+    static constexpr std::size_t deletions_per_tick = 16;
+
+    auto& staging = cache[log2];
+    auto& entries = staging.entries;
+    const std::size_t old_size = entries.size();
+
+    const auto is_deleteable = [this](const auto& entry) {
+        return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+    };
+    const std::size_t begin_offset = staging.delete_index;
+    const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
+    const auto begin = std::begin(entries) + begin_offset;
+    const auto end = std::begin(entries) + end_offset;
+    entries.erase(std::remove_if(begin, end, is_deleteable), end);
+
+    const std::size_t new_size = entries.size();
+    staging.delete_index += deletions_per_tick;
+    if (staging.delete_index >= new_size) {
+        staging.delete_index = 0;
+    }
+
+    return (1ULL << log2) * (old_size - new_size);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -0,0 +1,83 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <climits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFenceWatch;
+class VKScheduler;
+
+struct VKBuffer final {
+    UniqueBuffer handle;
+    VKMemoryCommit commit;
+};
+
+class VKStagingBufferPool final {
+public:
+    explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+                                 VKScheduler& scheduler);
+    ~VKStagingBufferPool();
+
+    VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
+
+    void TickFrame();
+
+private:
+    struct StagingBuffer final {
+        explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
+        StagingBuffer(StagingBuffer&& rhs) noexcept;
+        StagingBuffer(const StagingBuffer&) = delete;
+        ~StagingBuffer();
+
+        StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+
+        std::unique_ptr<VKBuffer> buffer;
+        VKFenceWatch watch;
+        u64 last_epoch = 0;
+    };
+
+    struct StagingBuffers final {
+        std::vector<StagingBuffer> entries;
+        std::size_t delete_index = 0;
+    };
+
+    static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
+    using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
+
+    VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
+
+    VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
+
+    StagingBuffersCache& GetCache(bool host_visible);
+
+    void ReleaseCache(bool host_visible);
+
+    u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
+
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    const bool is_device_integrated;
+
+    StagingBuffersCache host_staging_buffers;
+    StagingBuffersCache device_staging_buffers;
+
+    u64 epoch = 0;
+
+    std::size_t current_delete_level = 0;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
 // Refer to the license.txt file included.

 #include <algorithm>
-#include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"

 namespace Vulkan {

+namespace {
+
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;

-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
-                                                                                   pipeline_stage} {
-    CreateBuffers(memory_manager, usage);
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+                                  vk::MemoryPropertyFlags wanted) {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+        if (!(filter & (1 << i))) {
+            continue;
+        }
+        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+            return i;
+        }
+    }
+    return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                               vk::BufferUsageFlags usage)
+    : device{device}, scheduler{scheduler} {
+    CreateBuffers(usage);
+    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }

 VKStreamBuffer::~VKStreamBuffer() = default;

-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
-    ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+    ASSERT(size <= STREAM_BUFFER_SIZE);
    mapped_size = size;

-    if (offset + size > buffer_size) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
-        // reset the state.
-        invalidation_mark = used_watches;
-        used_watches = 0;
+    if (alignment > 0) {
+        offset = Common::AlignUp(offset, alignment);
+    }
+
+    WaitPendingOperations(offset);
+
+    bool invalidated = false;
+    if (offset + size > STREAM_BUFFER_SIZE) {
+        // The buffer would overflow, save the amount of used watches and reset the state.
+        invalidation_mark = current_watch_cursor;
+        current_watch_cursor = 0;
        offset = 0;
-    }

-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
-}
+        // Swap watches and reset waiting cursors.
+        std::swap(previous_watches, current_watches);
+        wait_cursor = 0;
+        wait_bound = 0;

-void VKStreamBuffer::Send(u64 size) {
-    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
-
-    if (invalidation_mark) {
-        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        // Ensure that we don't wait for uncommitted fences.
        scheduler.Flush();
-        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
-                      [&](auto& resource) { resource->Wait(); });
-        invalidation_mark = std::nullopt;
+
+        invalidated = true;
    }

-    if (used_watches + 1 >= watches.size()) {
-        // Ensure that there are enough watches.
-        ReserveWatches(WATCHES_RESERVE_CHUNK);
-    }
-    // Add a watch for this allocation.
-    watches[used_watches++]->Watch(scheduler.GetFence());
-
-    offset += size;
-}
-
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
-    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
-                                         nullptr);
-
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
-    commit = memory_manager.Commit(*buffer, true);
-    mapped_pointer = commit->GetData();
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+    return {pointer, offset, invalidated};
 }

-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
-    const std::size_t previous_size = watches.size();
-    watches.resize(previous_size + grow_size);
-    std::generate(watches.begin() + previous_size, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::Unmap(u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+    offset += size;
+
+    if (current_watch_cursor + 1 >= current_watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
+    }
+    auto& watch = current_watches[current_watch_cursor++];
+    watch.upper_bound = offset;
+    watch.fence.Watch(scheduler.GetFence());
+}
+
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+                                         0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+
+    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+    auto type = FindMemoryType(device, requirements.memoryTypeBits,
+                               vk::MemoryPropertyFlagBits::eHostVisible |
+                                   vk::MemoryPropertyFlagBits::eHostCoherent |
+                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
+    if (!type) {
+        // Otherwise search for a host visible allocation.
+        type = FindMemoryType(device, requirements.memoryTypeBits,
+                              vk::MemoryPropertyFlagBits::eHostVisible |
+                                  vk::MemoryPropertyFlagBits::eHostCoherent);
+        ASSERT_MSG(type, "No host visible and coherent memory type found");
+    }
+    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+    dev.bindBufferMemory(*buffer, *memory, 0, dld);
+}
+
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+    watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+    if (!invalidation_mark) {
+        return;
+    }
+    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+        auto& watch = previous_watches[wait_cursor];
+        wait_bound = watch.upper_bound;
+        watch.fence.Wait();
+        ++wait_cursor;
+    }
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@

 #pragma once

-#include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>

 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"

 namespace Vulkan {

 class VKDevice;
 class VKFence;
 class VKFenceWatch;
-class VKResourceManager;
 class VKScheduler;

-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                            vk::BufferUsageFlags usage);
    ~VKStreamBuffer();

    /**
@@ -34,39 +30,47 @@ public:
     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
     * offset and a boolean that's true when buffer has been invalidated.
     */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);

    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);

-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
        return *buffer;
    }

 private:
+    struct Watch final {
+        VKFenceWatch fence;
+        u64 upper_bound{};
+    };
+
    /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);

    /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+    void WaitPendingOperations(u64 requested_upper_bound);

    const VKDevice& device;                      ///< Vulkan device manager.
    VKScheduler& scheduler;                      ///< Command scheduler.
-    const u64 buffer_size;                       ///< Total size of the stream buffer.
    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.

-    UniqueBuffer buffer;   ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
-    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+    UniqueBuffer buffer;       ///< Mapped buffer.
+    UniqueDeviceMemory memory; ///< Memory allocation.

    u64 offset{};      ///< Buffer iterator.
    u64 mapped_size{}; ///< Size reserved for the current copy.

-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
+    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -0,0 +1,57 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
+    : device{device}, scheduler{scheduler} {}
+
+VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
+
+void VKUpdateDescriptorQueue::TickFrame() {
+    payload.clear();
+}
+
+void VKUpdateDescriptorQueue::Acquire() {
+    entries.clear();
+}
+
+void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
+                                   vk::DescriptorSet set) {
+    if (payload.size() + entries.size() >= payload.max_size()) {
+        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
+        scheduler.WaitWorker();
+        payload.clear();
+    }
+
+    const auto payload_start = payload.data() + payload.size();
+    for (const auto& entry : entries) {
+        if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
+            payload.push_back(*image);
+        } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
+            payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
+        } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
+            payload.push_back(*texel);
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    scheduler.Record([dev = device.GetLogical(), payload_start, set,
+                      update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
+        dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
+    });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -0,0 +1,86 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class DescriptorUpdateEntry {
+public:
+    explicit DescriptorUpdateEntry() : image{} {}
+
+    DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
+
+    DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
+        : buffer{buffer, offset, size} {}
+
+    DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
+
+private:
+    union {
+        vk::DescriptorImageInfo image;
+        vk::DescriptorBufferInfo buffer;
+        vk::BufferView texel_buffer;
+    };
+};
+
+class VKUpdateDescriptorQueue final {
+public:
+    explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
+    ~VKUpdateDescriptorQueue();
+
+    void TickFrame();
+
+    void Acquire();
+
+    void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
+
+    void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
+    }
+
+    void AddImage(vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
+    }
+
+    void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
+        entries.push_back(Buffer{buffer, offset, size});
+    }
+
+    void AddTexelBuffer(vk::BufferView texel_buffer) {
+        entries.emplace_back(texel_buffer);
+    }
+
+    vk::ImageLayout* GetLastImageLayout() {
+        return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
+    }
+
+private:
+    struct Buffer {
+        const vk::Buffer* buffer{};
+        u64 offset{};
+        std::size_t size{};
+    };
+    using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
+    // Old gcc versions don't consider this trivially copyable.
+    // static_assert(std::is_trivially_copyable_v<Variant>);
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+
+    boost::container::static_vector<Variant, 0x400> entries;
+    boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
 #include <vector>
 #include <fmt/format.h>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -22,34 +23,39 @@ using Tegra::Shader::Register;

 namespace {

-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::UnsignedByte:
-    case Tegra::Shader::UniformType::Single:
-        return 1;
-    case Tegra::Shader::UniformType::Double:
-        return 2;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
    default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        UNREACHABLE();
+        return 0;
    }
 }

-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
    case Tegra::Shader::UniformType::Single:
-        return 1;
+        return 32;
    case Tegra::Shader::UniformType::Double:
-        return 2;
+        return 64;
    case Tegra::Shader::UniformType::Quad:
    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 128;
    default:
        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        return 32;
    }
 }

@@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }();

        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, true, false);

-        const u32 count = GetLdgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
@@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);

-            if (type == Tegra::Shader::UniformType::UnsignedByte) {
-                // To handle unaligned loads get the byte used to dereferenced global memory
-                // and extract that byte from the loaded uint32.
-                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
-                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));

-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
-                                 Immediate(8));
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
+                                 std::move(offset), Immediate(size));
            }

            SetTemporary(bb, i, gmem);
@@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            }
        }();

+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true);
+            TrackGlobalMemory(bb, instr, is_read, true);
        if (!real_address_base || !base_address) {
            // Tracking failed, skip the store.
            break;
        }

-        const u32 count = GetStgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            const Node value = GetRegister(instr.gpr0.Value() + i);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
+
+                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
+                                  Immediate(size));
+            }
+
            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
@@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                     Instruction instr,
-                                                                     bool is_write) {
+                                                                     bool is_read, bool is_write) {
    const auto addr_register{GetRegister(instr.gmem.gpr)};
    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};

@@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
    const GlobalMemoryBase descriptor{index, offset};
    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
    auto& usage = entry->second;
-    if (is_write) {
-        usage.is_written = true;
-    } else {
-        usage.is_read = true;
-    }
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;

    const auto real_address =
        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -13,37 +13,65 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

+namespace {
+constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
+}
+
 u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

-    UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+    UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);

-    const Node apply_mask = [&]() {
+    const Node apply_mask = [&] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::R2P_IMM:
-            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        case OpCode::Id::P2R_IMM:
+            return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
        default:
            UNREACHABLE();
-            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+            return Immediate(0);
        }
    }();
-    const Node mask = GetRegister(instr.gpr8);
-    const auto offset = static_cast<u32>(instr.r2p.byte) * 8;

-    constexpr u32 programmable_preds = 7;
-    for (u64 pred = 0; pred < programmable_preds; ++pred) {
-        const auto shift = static_cast<u32>(pred);
+    const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;

-        const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
-        const Node condition =
-            Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::R2P_IMM: {
+        const Node mask = GetRegister(instr.gpr8);

-        const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
-        const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+            const auto shift = static_cast<u32>(pred);

-        const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
-        bb.push_back(Conditional(condition, {code}));
+            const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
+            const Node condition =
+                Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+
+            const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
+            const Node value =
+                Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+
+            const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
+            bb.push_back(Conditional(condition, {code}));
+        }
+        break;
+    }
+    case OpCode::Id::P2R_IMM: {
+        Node value = Immediate(0);
+        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+            Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
+                                 Immediate(0));
+            value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
+        }
+        value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
+        value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
+
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
+        break;
    }

    return pc;
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -89,59 +89,62 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        [[fallthrough]];
    }
    case OpCode::Id::TLD4: {
-        ASSERT(instr.tld4.array == 0);
        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
                             "NDV is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
-                             "PTP is not implemented");
-
        const auto texture_type = instr.tld4.texture_type.Value();
        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
        const bool is_array = instr.tld4.array != 0;
        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
-        WriteTexInstructionFloat(
-            bb, instr,
-            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
+        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
+                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
+                                             is_ptp, is_bindless));
        break;
    }
    case OpCode::Id::TLD4S: {
-        const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
-        UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented");
-
-        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        constexpr std::size_t num_coords = 2;
+        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);

        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
        std::vector<Node> coords;
-        Node dc_reg;
-        if (depth_compare) {
+        std::vector<Node> aoffi;
+        Node depth_compare;
+        if (is_depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
            coords.push_back(op_a);
            coords.push_back(op_y);
-            dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b;
+            if (is_aoffi) {
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+                depth_compare = GetRegister(instr.gpr20.Value() + 1);
+            } else {
+                depth_compare = op_b;
+            }
        } else {
+            // There's no depth compare
            coords.push_back(op_a);
-            if (uses_aoffi) {
-                const Node op_y = GetRegister(instr.gpr8.Value() + 1);
-                coords.push_back(op_y);
+            if (is_aoffi) {
+                coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
            } else {
                coords.push_back(op_b);
            }
-            dc_reg = {};
        }
        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));

-        const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
+        const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
        const Sampler& sampler = *GetSampler(instr.sampler, info);

        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

@@ -190,7 +193,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        }

        for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
        }

@@ -230,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
-                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta,
                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -299,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                continue;
            }
            auto params = coords;
-            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporary(bb, indexer++, value);
        }
@@ -367,7 +370,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
    if (it != used_samplers.end()) {
        ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
               it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
-        return &(*it);
+        return &*it;
    }

    // Otherwise create a new mapping for this sampler
@@ -397,7 +400,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
    if (it != used_samplers.end()) {
        ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
               it->IsShadow() == info.is_shadow);
-        return &(*it);
+        return &*it;
    }

    // Otherwise create a new mapping for this sampler
@@ -538,7 +541,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,

    for (u32 element = 0; element < values.size(); ++element) {
        auto copy_coords = coords;
-        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
+        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

@@ -635,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 }

 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array, bool is_aoffi, bool is_bindless) {
+                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
+    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
+
    const std::size_t coord_count = GetCoordCount(texture_type);

    // If enabled arrays index is always stored in the gpr8 field
@@ -661,12 +666,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
        return values;
    }

-    std::vector<Node> aoffi;
+    std::vector<Node> aoffi, ptp;
    if (is_aoffi) {
        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    } else if (is_ptp) {
+        ptp = GetPtpCoordinates(
+            {GetRegister(parameter_register++), GetRegister(parameter_register++)});
    }

-    Node dc{};
+    Node dc;
    if (depth_compare) {
        dc = GetRegister(parameter_register++);
    }
@@ -676,8 +684,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de

    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
-                         element};
+        MetaTexture meta{
+            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }

@@ -710,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }

@@ -760,7 +768,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
@@ -825,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
    return aoffi;
 }

+std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
+    static constexpr u32 num_entries = 8;
+
+    std::vector<Node> ptp;
+    ptp.reserve(num_entries);
+
+    const auto global_size = static_cast<s64>(global_code.size());
+    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
+    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
+    if (!low || !high) {
+        for (u32 entry = 0; entry < num_entries; ++entry) {
+            const u32 reg = entry / 4;
+            const u32 offset = entry % 4;
+            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
+            ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return ptp;
+    }
+
+    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
+    for (u32 entry = 0; entry < num_entries; ++entry) {
+        s32 value = (immediate >> (entry * 8)) & 0b111111;
+        if (value >= 32) {
+            value -= 64;
+        }
+        ptp.push_back(Immediate(value));
+    }
+
+    return ptp;
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -374,6 +374,7 @@ struct MetaTexture {
    Node array;
    Node depth_compare;
    std::vector<Node> aoffi;
+    std::vector<Node> ptp;
    std::vector<Node> derivates;
    Node bias;
    Node lod;
@@ -391,8 +392,30 @@ struct MetaImage {
 using Meta =
    std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;

+class AmendNode {
+public:
+    std::optional<std::size_t> GetAmendIndex() const {
+        if (amend_index == amend_null_index) {
+            return std::nullopt;
+        }
+        return {amend_index};
+    }
+
+    void SetAmendIndex(std::size_t index) {
+        amend_index = index;
+    }
+
+    void ClearAmend() {
+        amend_index = amend_null_index;
+    }
+
+private:
+    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
+    std::size_t amend_index{amend_null_index};
+};
+
 /// Holds any kind of operation that can be done in the IR
-class OperationNode final {
+class OperationNode final : public AmendNode {
 public:
    explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}

@@ -432,7 +455,7 @@ private:
 };

 /// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
+class ConditionalNode final : public AmendNode {
 public:
    explicit ConditionalNode(Node condition, std::vector<Node>&& code)
        : condition{std::move(condition)}, code{std::move(code)} {}
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
                     Immediate(bits));
 }

+std::size_t ShaderIR::DeclareAmend(Node new_amend) {
+    const std::size_t id = amend_code.size();
+    amend_code.push_back(new_amend);
+    return id;
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
    /// Returns a condition code evaluated from internal flags
    Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;

+    const Node& GetAmendNode(std::size_t index) const {
+        return amend_code[index];
+    }
+
 private:
    friend class ASTDecoder;

@@ -350,7 +354,8 @@ private:
                      bool is_array);

    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
+                      bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
+                      bool is_bindless);

    Node4 GetTldCode(Tegra::Shader::Instruction instr);

@@ -363,6 +368,8 @@ private:

    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);

+    std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
+
    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
@@ -387,7 +394,10 @@ private:

    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                               Tegra::Shader::Instruction instr,
-                                                               bool is_write);
+                                                               bool is_read, bool is_write);
+
+    /// Register new amending code and obtain the reference id.
+    std::size_t DeclareAmend(Node new_amend);

    const ProgramCode& program_code;
    const u32 main_offset;
@@ -403,6 +413,7 @@ private:
    std::map<u32, NodeBlock> basic_blocks;
    NodeBlock global_code;
    ASTManager program_manager{true, true};
+    std::vector<Node> amend_code;

    std::set<u32> used_registers;
    std::set<Tegra::Shader::Pred> used_predicates;
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -78,11 +78,6 @@ add_executable(yuzu
    configuration/configure_web.cpp
    configuration/configure_web.h
    configuration/configure_web.ui
-    debugger/graphics/graphics_breakpoint_observer.cpp
-    debugger/graphics/graphics_breakpoint_observer.h
-    debugger/graphics/graphics_breakpoints.cpp
-    debugger/graphics/graphics_breakpoints.h
-    debugger/graphics/graphics_breakpoints_p.h
    debugger/console.cpp
    debugger/console.h
    debugger/profiler.cpp
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
 }

 void GRenderWindow::SwapBuffers() {
-    // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
-    // since we never call `doneCurrent` in this thread.
-    // However:
-    // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
-    // since the last time `swapBuffers` was executed;
-    // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
-    context->makeCurrent(child);
-
    context->swapBuffers(child);
+
    if (!first_frame) {
-        emit FirstFrameDisplayed();
        first_frame = true;
+        emit FirstFrameDisplayed();
    }
 }

--- a/src/yuzu/debugger/graphics/graphics_breakpoint_observer.cpp
+++ b/src/yuzu/debugger/graphics/graphics_breakpoint_observer.cpp
@@ -1,27 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QMetaType>
-#include "yuzu/debugger/graphics/graphics_breakpoint_observer.h"
-
-BreakPointObserverDock::BreakPointObserverDock(std::shared_ptr<Tegra::DebugContext> debug_context,
-                                               const QString& title, QWidget* parent)
-    : QDockWidget(title, parent), BreakPointObserver(debug_context) {
-    qRegisterMetaType<Tegra::DebugContext::Event>("Tegra::DebugContext::Event");
-
-    connect(this, &BreakPointObserverDock::Resumed, this, &BreakPointObserverDock::OnResumed);
-
-    // NOTE: This signal is emitted from a non-GUI thread, but connect() takes
-    //       care of delaying its handling to the GUI thread.
-    connect(this, &BreakPointObserverDock::BreakPointHit, this,
-            &BreakPointObserverDock::OnBreakPointHit, Qt::BlockingQueuedConnection);
-}
-
-void BreakPointObserverDock::OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) {
-    emit BreakPointHit(event, data);
-}
-
-void BreakPointObserverDock::OnMaxwellResume() {
-    emit Resumed();
-}
--- a/src/yuzu/debugger/graphics/graphics_breakpoint_observer.h
+++ b/src/yuzu/debugger/graphics/graphics_breakpoint_observer.h
@@ -1,33 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <QDockWidget>
-#include "video_core/debug_utils/debug_utils.h"
-
-/**
- * Utility class which forwards calls to OnMaxwellBreakPointHit and OnMaxwellResume to public slots.
- * This is because the Maxwell breakpoint callbacks are called from a non-GUI thread, while
- * the widget usually wants to perform reactions in the GUI thread.
- */
-class BreakPointObserverDock : public QDockWidget,
-                               protected Tegra::DebugContext::BreakPointObserver {
-    Q_OBJECT
-
-public:
-    BreakPointObserverDock(std::shared_ptr<Tegra::DebugContext> debug_context, const QString& title,
-                           QWidget* parent = nullptr);
-
-    void OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
-    void OnMaxwellResume() override;
-
-signals:
-    void Resumed();
-    void BreakPointHit(Tegra::DebugContext::Event event, void* data);
-
-private:
-    virtual void OnBreakPointHit(Tegra::DebugContext::Event event, void* data) = 0;
-    virtual void OnResumed() = 0;
-};
--- a/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints.cpp
@@ -1,221 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QLabel>
-#include <QMetaType>
-#include <QPushButton>
-#include <QTreeView>
-#include <QVBoxLayout>
-#include "common/assert.h"
-#include "yuzu/debugger/graphics/graphics_breakpoints.h"
-#include "yuzu/debugger/graphics/graphics_breakpoints_p.h"
-
-BreakPointModel::BreakPointModel(std::shared_ptr<Tegra::DebugContext> debug_context,
-                                 QObject* parent)
-    : QAbstractListModel(parent), context_weak(debug_context),
-      at_breakpoint(debug_context->at_breakpoint),
-      active_breakpoint(debug_context->active_breakpoint) {}
-
-int BreakPointModel::columnCount(const QModelIndex& parent) const {
-    return 1;
-}
-
-int BreakPointModel::rowCount(const QModelIndex& parent) const {
-    return static_cast<int>(Tegra::DebugContext::Event::NumEvents);
-}
-
-QVariant BreakPointModel::data(const QModelIndex& index, int role) const {
-    const auto event = static_cast<Tegra::DebugContext::Event>(index.row());
-
-    switch (role) {
-    case Qt::DisplayRole: {
-        if (index.column() == 0) {
-            return DebugContextEventToString(event);
-        }
-        break;
-    }
-
-    case Qt::CheckStateRole: {
-        if (index.column() == 0)
-            return data(index, Role_IsEnabled).toBool() ? Qt::Checked : Qt::Unchecked;
-        break;
-    }
-
-    case Qt::BackgroundRole: {
-        if (at_breakpoint && index.row() == static_cast<int>(active_breakpoint)) {
-            return QBrush(QColor(0xE0, 0xE0, 0x10));
-        }
-        break;
-    }
-
-    case Role_IsEnabled: {
-        auto context = context_weak.lock();
-        return context && context->breakpoints[(int)event].enabled;
-    }
-
-    default:
-        break;
-    }
-    return QVariant();
-}
-
-Qt::ItemFlags BreakPointModel::flags(const QModelIndex& index) const {
-    if (!index.isValid())
-        return 0;
-
-    Qt::ItemFlags flags = Qt::ItemIsEnabled;
-    if (index.column() == 0)
-        flags |= Qt::ItemIsUserCheckable;
-    return flags;
-}
-
-bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, int role) {
-    const auto event = static_cast<Tegra::DebugContext::Event>(index.row());
-
-    switch (role) {
-    case Qt::CheckStateRole: {
-        if (index.column() != 0)
-            return false;
-
-        auto context = context_weak.lock();
-        if (!context)
-            return false;
-
-        context->breakpoints[(int)event].enabled = value == Qt::Checked;
-        QModelIndex changed_index = createIndex(index.row(), 0);
-        emit dataChanged(changed_index, changed_index);
-        return true;
-    }
-    }
-
-    return false;
-}
-
-void BreakPointModel::OnBreakPointHit(Tegra::DebugContext::Event event) {
-    auto context = context_weak.lock();
-    if (!context)
-        return;
-
-    active_breakpoint = context->active_breakpoint;
-    at_breakpoint = context->at_breakpoint;
-    emit dataChanged(createIndex(static_cast<int>(event), 0),
-                     createIndex(static_cast<int>(event), 0));
-}
-
-void BreakPointModel::OnResumed() {
-    auto context = context_weak.lock();
-    if (!context)
-        return;
-
-    at_breakpoint = context->at_breakpoint;
-    emit dataChanged(createIndex(static_cast<int>(active_breakpoint), 0),
-                     createIndex(static_cast<int>(active_breakpoint), 0));
-    active_breakpoint = context->active_breakpoint;
-}
-
-QString BreakPointModel::DebugContextEventToString(Tegra::DebugContext::Event event) {
-    switch (event) {
-    case Tegra::DebugContext::Event::MaxwellCommandLoaded:
-        return tr("Maxwell command loaded");
-    case Tegra::DebugContext::Event::MaxwellCommandProcessed:
-        return tr("Maxwell command processed");
-    case Tegra::DebugContext::Event::IncomingPrimitiveBatch:
-        return tr("Incoming primitive batch");
-    case Tegra::DebugContext::Event::FinishedPrimitiveBatch:
-        return tr("Finished primitive batch");
-    case Tegra::DebugContext::Event::NumEvents:
-        break;
-    }
-
-    return tr("Unknown debug context event");
-}
-
-GraphicsBreakPointsWidget::GraphicsBreakPointsWidget(
-    std::shared_ptr<Tegra::DebugContext> debug_context, QWidget* parent)
-    : QDockWidget(tr("Maxwell Breakpoints"), parent), Tegra::DebugContext::BreakPointObserver(
-                                                          debug_context) {
-    setObjectName(QStringLiteral("TegraBreakPointsWidget"));
-
-    status_text = new QLabel(tr("Emulation running"));
-    resume_button = new QPushButton(tr("Resume"));
-    resume_button->setEnabled(false);
-
-    breakpoint_model = new BreakPointModel(debug_context, this);
-    breakpoint_list = new QTreeView;
-    breakpoint_list->setRootIsDecorated(false);
-    breakpoint_list->setHeaderHidden(true);
-    breakpoint_list->setModel(breakpoint_model);
-
-    qRegisterMetaType<Tegra::DebugContext::Event>("Tegra::DebugContext::Event");
-
-    connect(breakpoint_list, &QTreeView::doubleClicked, this,
-            &GraphicsBreakPointsWidget::OnItemDoubleClicked);
-
-    connect(resume_button, &QPushButton::clicked, this,
-            &GraphicsBreakPointsWidget::OnResumeRequested);
-
-    connect(this, &GraphicsBreakPointsWidget::BreakPointHit, this,
-            &GraphicsBreakPointsWidget::OnBreakPointHit, Qt::BlockingQueuedConnection);
-    connect(this, &GraphicsBreakPointsWidget::Resumed, this, &GraphicsBreakPointsWidget::OnResumed);
-
-    connect(this, &GraphicsBreakPointsWidget::BreakPointHit, breakpoint_model,
-            &BreakPointModel::OnBreakPointHit, Qt::BlockingQueuedConnection);
-    connect(this, &GraphicsBreakPointsWidget::Resumed, breakpoint_model,
-            &BreakPointModel::OnResumed);
-
-    connect(this, &GraphicsBreakPointsWidget::BreakPointsChanged,
-            [this](const QModelIndex& top_left, const QModelIndex& bottom_right) {
-                breakpoint_model->dataChanged(top_left, bottom_right);
-            });
-
-    QWidget* main_widget = new QWidget;
-    auto main_layout = new QVBoxLayout;
-    {
-        auto sub_layout = new QHBoxLayout;
-        sub_layout->addWidget(status_text);
-        sub_layout->addWidget(resume_button);
-        main_layout->addLayout(sub_layout);
-    }
-    main_layout->addWidget(breakpoint_list);
-    main_widget->setLayout(main_layout);
-
-    setWidget(main_widget);
-}
-
-void GraphicsBreakPointsWidget::OnMaxwellBreakPointHit(Event event, void* data) {
-    // Process in GUI thread
-    emit BreakPointHit(event, data);
-}
-
-void GraphicsBreakPointsWidget::OnBreakPointHit(Tegra::DebugContext::Event event, void* data) {
-    status_text->setText(tr("Emulation halted at breakpoint"));
-    resume_button->setEnabled(true);
-}
-
-void GraphicsBreakPointsWidget::OnMaxwellResume() {
-    // Process in GUI thread
-    emit Resumed();
-}
-
-void GraphicsBreakPointsWidget::OnResumed() {
-    status_text->setText(tr("Emulation running"));
-    resume_button->setEnabled(false);
-}
-
-void GraphicsBreakPointsWidget::OnResumeRequested() {
-    if (auto context = context_weak.lock())
-        context->Resume();
-}
-
-void GraphicsBreakPointsWidget::OnItemDoubleClicked(const QModelIndex& index) {
-    if (!index.isValid())
-        return;
-
-    QModelIndex check_index = breakpoint_list->model()->index(index.row(), 0);
-    QVariant enabled = breakpoint_list->model()->data(check_index, Qt::CheckStateRole);
-    QVariant new_state = Qt::Unchecked;
-    if (enabled == Qt::Unchecked)
-        new_state = Qt::Checked;
-    breakpoint_list->model()->setData(check_index, new_state, Qt::CheckStateRole);
-}
--- a/src/yuzu/debugger/graphics/graphics_breakpoints.h
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints.h
@@ -1,45 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <QDockWidget>
-#include "video_core/debug_utils/debug_utils.h"
-
-class QLabel;
-class QPushButton;
-class QTreeView;
-
-class BreakPointModel;
-
-class GraphicsBreakPointsWidget : public QDockWidget, Tegra::DebugContext::BreakPointObserver {
-    Q_OBJECT
-
-    using Event = Tegra::DebugContext::Event;
-
-public:
-    explicit GraphicsBreakPointsWidget(std::shared_ptr<Tegra::DebugContext> debug_context,
-                                       QWidget* parent = nullptr);
-
-    void OnMaxwellBreakPointHit(Tegra::DebugContext::Event event, void* data) override;
-    void OnMaxwellResume() override;
-
-signals:
-    void Resumed();
-    void BreakPointHit(Tegra::DebugContext::Event event, void* data);
-    void BreakPointsChanged(const QModelIndex& topLeft, const QModelIndex& bottomRight);
-
-private:
-    void OnBreakPointHit(Tegra::DebugContext::Event event, void* data);
-    void OnItemDoubleClicked(const QModelIndex&);
-    void OnResumeRequested();
-    void OnResumed();
-
-    QLabel* status_text;
-    QPushButton* resume_button;
-
-    BreakPointModel* breakpoint_model;
-    QTreeView* breakpoint_list;
-};
--- a/src/yuzu/debugger/graphics/graphics_breakpoints_p.h
+++ b/src/yuzu/debugger/graphics/graphics_breakpoints_p.h
@@ -1,37 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <QAbstractListModel>
-#include "video_core/debug_utils/debug_utils.h"
-
-class BreakPointModel : public QAbstractListModel {
-    Q_OBJECT
-
-public:
-    enum {
-        Role_IsEnabled = Qt::UserRole,
-    };
-
-    BreakPointModel(std::shared_ptr<Tegra::DebugContext> context, QObject* parent);
-
-    int columnCount(const QModelIndex& parent = QModelIndex()) const override;
-    int rowCount(const QModelIndex& parent = QModelIndex()) const override;
-    QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
-    Qt::ItemFlags flags(const QModelIndex& index) const override;
-
-    bool setData(const QModelIndex& index, const QVariant& value, int role = Qt::EditRole) override;
-
-    void OnBreakPointHit(Tegra::DebugContext::Event event);
-    void OnResumed();
-
-private:
-    static QString DebugContextEventToString(Tegra::DebugContext::Event event);
-
-    std::weak_ptr<Tegra::DebugContext> context_weak;
-    bool at_breakpoint;
-    Tegra::DebugContext::Event active_breakpoint;
-};
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -93,7 +93,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "core/perf_stats.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"
-#include "video_core/debug_utils/debug_utils.h"
 #include "yuzu/about_dialog.h"
 #include "yuzu/bootmanager.h"
 #include "yuzu/compatdb.h"
@@ -101,7 +100,6 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "yuzu/configuration/config.h"
 #include "yuzu/configuration/configure_dialog.h"
 #include "yuzu/debugger/console.h"
-#include "yuzu/debugger/graphics/graphics_breakpoints.h"
 #include "yuzu/debugger/profiler.h"
 #include "yuzu/debugger/wait_tree.h"
 #include "yuzu/discord.h"
@@ -187,8 +185,6 @@ GMainWindow::GMainWindow()
      provider(std::make_unique<FileSys::ManualContentProvider>()) {
    InitializeLogging();

-    debug_context = Tegra::DebugContext::Construct();
-
    setAcceptDrops(true);
    ui.setupUi(this);
    statusBar()->hide();
@@ -495,11 +491,6 @@ void GMainWindow::InitializeDebugWidgets() {
    debug_menu->addAction(microProfileDialog->toggleViewAction());
 #endif

-    graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this);
-    addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
-    graphicsBreakpointsWidget->hide();
-    debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
-
    waitTreeWidget = new WaitTreeWidget(this);
    addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget);
    waitTreeWidget->hide();
@@ -869,8 +860,6 @@ bool GMainWindow::LoadROM(const QString& filename) {
    Core::System& system{Core::System::GetInstance()};
    system.SetFilesystem(vfs);

-    system.SetGPUDebugContext(debug_context);
-
    system.SetAppletFrontendSet({
        nullptr,                                     // Parental Controls
        std::make_unique<QtErrorDisplay>(*this),     //
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -22,7 +22,6 @@ class Config;
 class EmuThread;
 class GameList;
 class GImageInfo;
-class GraphicsBreakPointsWidget;
 class GRenderWindow;
 class LoadingScreen;
 class MicroProfileDialog;
@@ -42,10 +41,6 @@ class ManualContentProvider;
 class VfsFilesystem;
 } // namespace FileSys

-namespace Tegra {
-class DebugContext;
-}
-
 enum class EmulatedDirectoryTarget {
    NAND,
    SDMC,
@@ -223,8 +218,6 @@ private:

    Ui::MainWindow ui;

-    std::shared_ptr<Tegra::DebugContext> debug_context;
-
    GRenderWindow* render_window;
    GameList* game_list;
    LoadingScreen* loading_screen;
@@ -255,7 +248,6 @@ private:
    // Debugger panes
    ProfilerWidget* profilerWidget;
    MicroProfileDialog* microProfileDialog;
-    GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
    WaitTreeWidget* waitTreeWidget;

    QAction* actions_recent_files[max_recent_files_item];
Author	SHA1	Message	Date
ReinUsesLisp	e2a2a556b9	shader_ir/memory: Implement u16 and u8 for STG and LDG Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.	2020-01-09 02:12:29 -03:00
Fernando Sahmkow	80436c1330	Merge pull request #3279 from ReinUsesLisp/vk-pipeline-cache vk_pipeline_cache: Initial implementation	2020-01-08 17:31:20 -04:00
bunnei	319c4d2108	Merge pull request #3272 from bunnei/vi-close-layer service: vi: Implement CloseLayer.	2020-01-07 12:45:34 -05:00
ReinUsesLisp	6888d776ff	vk_pipeline_cache: Initial implementation Given a pipeline key, this cache returns a pipeline abstraction (for graphics or compute).	2020-01-06 22:02:26 -03:00
ReinUsesLisp	2effdeb924	vk_graphics_pipeline: Initial implementation This abstractio represents the state of the 3D engine at a given draw. Instead of changing individual bits of the pipeline how it's done in APIs like D3D11, OpenGL and NVN; on Vulkan we are forced to put everything together into a single, immutable object. It takes advantage of the few dynamic states Vulkan offers.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	dc96a59fa0	vk_compute_pipeline: Initial implementation This abstraction represents a Vulkan compute pipeline.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	b392a5986e	vk_pipeline_cache: Add file and define descriptor update template filler This function allows us to share code between compute and graphics pipelines compilation.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	3142f1b597	fixed_pipeline_state: Add depth clamp	2020-01-06 22:02:26 -03:00
ReinUsesLisp	9c548146ca	vk_rasterizer: Add placeholder	2020-01-06 22:02:26 -03:00
bunnei	5be00cba15	Merge pull request #3276 from ReinUsesLisp/pipeline-reqs vk_update_descriptor/vk_renderpass_cache: Add pipeline cache dependencies	2020-01-06 17:03:34 -05:00
bunnei	ee9b4a7f9a	Merge pull request #3278 from ReinUsesLisp/vk-memory-manager renderer_vulkan: Buffer cache, stream buffer and memory manager changes	2020-01-06 17:03:04 -05:00
ReinUsesLisp	5aeff9aff5	vk_renderpass_cache: Initial implementation The renderpass cache is used to avoid creating renderpasses on each draw. The hashed structure is not currently optimized.	2020-01-06 18:28:32 -03:00
ReinUsesLisp	322d6a0311	vk_update_descriptor: Initial implementation The update descriptor is used to store in flat memory a large chunk of staging data used to update descriptor sets through templates. It provides a push interface to easily insert descriptors following the current pipeline. The order used in the descriptor update template has to be implicitly followed. We can catch bugs here using validation layers.	2020-01-06 18:28:32 -03:00
ReinUsesLisp	5b01f80a12	vk_stream_buffer/vk_buffer_cache: Avoid halting and use generic cache The stream buffer before this commit once it was full (no more bytes to write before looping) waiting for all previous operations to finish. This was a temporary solution and had a noticeable performance penalty in performance (from what a profiler showed). To avoid this mark with fences usages of the stream buffer and once it loops wait for them to be signaled. On average this will never wait. Each fence knows where its usage finishes, resulting in a non-paged stream buffer. On the other side, the buffer cache is reimplemented using the generic buffer cache. It makes use of the staging buffer pool and the new stream buffer.	2020-01-06 18:13:41 -03:00
ReinUsesLisp	ceb851b590	vk_memory_manager: Misc changes * Allocate memory in discrete exponentially increasing chunks until the 128 MiB threshold. Allocations larger thant that increase linearly by 256 MiB (depending on the required size). This allows to use small allocations for small resources. * Move memory maps to a RAII abstraction. To optimize for debugging tools (like RenderDoc) users will map/unmap on usage. If this ever becomes a noticeable overhead (from my profiling it doesn't) we can transparently move to persistent memory maps without harming the API, getting optimal performance for both gameplay and debugging. * Improve messages on exceptional situations. * Fix typos "requeriments" -> "requirements". * Small style changes.	2020-01-06 18:13:41 -03:00
ReinUsesLisp	85bb6a6f08	vk_buffer_cache: Temporarily remove buffer cache This is intended for a follow up commit to avoid circular dependencies.	2020-01-06 17:58:46 -03:00
bunnei	984563b773	Merge pull request #3277 from ReinUsesLisp/make-current yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers	2020-01-06 14:09:19 -05:00
ReinUsesLisp	8306703a7d	yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers MakeCurrent is a costly (according to Nsight's profiler it takes a tenth of a millisecond to complete), and we don't have a reason to call it because: - Qt no longer signals a warning if it's not called - yuzu no longer supports macOS	2020-01-06 14:02:47 -03:00
bunnei	09908207fb	Merge pull request #3261 from degasus/page_table core/memory + arm/dynarmic: Use a global offset within our arm page table.	2020-01-06 11:56:59 -05:00
bunnei	89fc75d769	Merge pull request #3257 from degasus/no_busy_loops video_core: Block in WaitFence.	2020-01-06 00:09:57 -05:00
Fernando Sahmkow	56e450a3f7	Merge pull request #3264 from ReinUsesLisp/vk-descriptor-pool vk_descriptor_pool: Initial implementation	2020-01-05 15:54:41 -04:00
bunnei	6fe51f398f	Merge pull request #2945 from FernandoS27/fix-bcat nifm: Only return that there's an internet connection when there's a BCATServer	2020-01-05 02:17:16 -05:00
bunnei	cd0a7dfdbc	Merge pull request #3258 from FernandoS27/shader-amend Shader_IR: add the ability to amend code in the shader ir.	2020-01-04 14:05:17 -05:00
Fernando Sahmkow	3dd6b55851	Shader_IR: Address Feedback	2020-01-04 14:40:57 -04:00
bunnei	64c5631579	service: vi: Implement CloseLayer. - Needed for Undertale.	2020-01-04 00:45:06 -05:00
Rodrigo Locatti	6e347d8d1b	Update src/video_core/renderer_vulkan/vk_descriptor_pool.cpp Co-Authored-By: Mat M. <mathew1800@gmail.com>	2020-01-03 17:34:30 -03:00
bunnei	624a0f7f3f	Merge pull request #3247 from FernandoS27/remap-fix NvServices: Correct Ioctl Remap.	2020-01-03 12:30:56 -05:00
bunnei	c332c66eb2	Merge pull request #3267 from ReinUsesLisp/remove-maxwell-debugger yuzu: Remove Maxwell debugger	2020-01-02 22:03:30 -05:00
ReinUsesLisp	0d6d8129c4	yuzu: Remove Maxwell debugger This was carried from Citra and wasn't really used on yuzu. It also adds some runtime overhead. This commit removes it from yuzu's codebase.	2020-01-02 23:09:44 -03:00
bunnei	ae0e481677	Merge pull request #3243 from ReinUsesLisp/topologies maxwell_to_gl: Implement missing primitive topologies	2020-01-01 20:33:33 -05:00
ReinUsesLisp	1fe7df4517	vk_descriptor_pool: Initial implementation Create a large descriptor pool where we allocate all our descriptors from. It has to be wide enough to support any pipeline, hence its large numbers. If the descritor pool is filled, we allocate more memory at that moment. This way we can take advantage of permissive drivers like Nvidia's that allocate more descriptors than what the spec requires.	2020-01-01 16:44:06 -03:00
Markus Wick	0986caa8d8	core/memory + arm/dynarmic: Use a global offset within our arm page table. This saves us two x64 instructions per load/store instruction. TODO: Clean up our memory code. We can use this optimization here as well.	2020-01-01 12:24:54 +01:00
bunnei	028b2718ed	Merge pull request #3239 from ReinUsesLisp/p2r shader/p2r: Implement P2R Pr	2019-12-31 20:37:16 -05:00
Fernando Sahmkow	b3371ed09e	Shader_IR: add the ability to amend code in the shader ir. This commit introduces a mechanism by which shader IR code can be amended and extended. This useful for track algorithms where certain information can derived from before the track such as indexes to array samplers.	2019-12-30 15:31:48 -04:00
Fernando Sahmkow	7bd447355f	Merge pull request #3248 from ReinUsesLisp/vk-image vk_image: Add an image object abstraction	2019-12-30 14:25:14 -04:00
Rodrigo Locatti	4cbb363d3f	vk_image: Avoid unnecesary equals	2019-12-30 13:28:23 -03:00
Fernando Sahmkow	287d5921cf	Merge pull request #3249 from ReinUsesLisp/vk-staging-buffer-pool vk_staging_buffer_pool: Add a staging pool for temporary operations	2019-12-30 12:25:59 -04:00
Markus Wick	cb9dd01ffd	video_core: Block in WaitFence. This function is called rarely and blocks quite often for a long time. So don't waste power and let the CPU sleep. This might also increase the performance as the other cores might be allowed to clock higher.	2019-12-30 13:04:53 +01:00
Rodrigo Locatti	f2c61bbe13	vk_staging_buffer_pool: Initialize last epoch to zero	2019-12-29 19:19:43 -03:00
Fernando Sahmkow	f846e3d6d0	Merge pull request #3250 from ReinUsesLisp/empty-fragment gl_rasterizer: Allow rendering without fragment shader	2019-12-28 14:33:53 -04:00
bunnei	8a76f816a4	Merge pull request #3228 from ReinUsesLisp/ptp shader/texture: Implement AOFFI and PTP for TLD4 and TLD4S	2019-12-26 21:43:44 -05:00
ReinUsesLisp	5b989f189f	gl_rasterizer: Allow rendering without fragment shader Rendering without a fragment shader is usually used in depth-only passes.	2019-12-26 16:38:49 -03:00
ReinUsesLisp	3813af2f3c	vk_staging_buffer_pool: Add a staging pool for temporary operations The job of this abstraction is to provide staging buffers for temporary operations. Think of image uploads or buffer uploads to device memory. It automatically deletes unused buffers.	2019-12-25 18:12:17 -03:00
ReinUsesLisp	c83bf7cd1e	vk_image: Add an image object abstraction This object's job is to contain an image and manage its transitions. Since Nvidia hardware doesn't know what a transition is but Vulkan requires them anyway, we have to state track image subresources individually. To avoid the overhead of tracking each subresource in images with many subresources (think of cubemap arrays with several mipmaps), this commit tracks when subresources have diverged. As long as this doesn't happen we can check the state of the first subresource (that will be shared with all subresources) and update accordingly. Image transitions are deferred to the scheduler command buffer.	2019-12-25 18:00:16 -03:00
Fernando Sahmkow	a5bb1ac6e3	NvServices: Correct Ioctl Remap. This commit corrects a padding value in Ioctl Remap that was actually an offset to the mapping address.	2019-12-25 14:37:28 -04:00
Fernando Sahmkow	5619d24377	Merge pull request #3244 from ReinUsesLisp/vk-fps fixed_pipeline_state: Define structure and loaders	2019-12-25 14:31:29 -04:00
bunnei	4af569ee47	Merge pull request #3236 from ReinUsesLisp/rasterize-enable gl_rasterizer: Implement RASTERIZE_ENABLE	2019-12-24 22:54:10 -05:00
ReinUsesLisp	91d35559e5	maxwell_to_gl: Implement missing primitive topologies Many of these topologies are exclusively available in OpenGL.	2019-12-22 22:33:01 -03:00
ReinUsesLisp	38d3a48873	shader/p2r: Implement P2R Pr P2R dumps predicate or condition codes state to a register. This is useful for unit testing.	2019-12-20 18:02:41 -03:00
ReinUsesLisp	cf27b59493	shader/r2p: Refactor P2R to support P2R	2019-12-20 17:55:42 -03:00
ReinUsesLisp	da0aa4da6b	gl_rasterizer: Implement RASTERIZE_ENABLE RASTERIZE_ENABLE is the opposite of GL_RASTERIZER_DISCARD. Implement it naturally using this. NVN games expect rasterize to be enabled by default, reflect that in our initial GPU state.	2019-12-18 19:28:23 -03:00
ReinUsesLisp	e09c1fbc1f	shader/texture: Implement TLD4.PTP	2019-12-16 04:09:24 -03:00
ReinUsesLisp	844e4a297b	shader/texture: Enable arrayed TLD4	2019-12-16 02:37:21 -03:00
ReinUsesLisp	a87c85eba2	gl_shader_decompiler: Rename "sepparate" to "separate"	2019-12-16 02:12:51 -03:00
ReinUsesLisp	3d2c44848b	shader/texture: Implement AOFFI for TLD4S	2019-12-16 02:06:42 -03:00
ReinUsesLisp	3d9fff82c0	shader/texture: Remove unnecesary parenthesis	2019-12-16 01:52:33 -03:00
Fernando Sahmkow	3c95e49c42	nifm: Only return that there's an internet connection when there's a BCATServer This helps games that need internet for other purposes boot as the rest of our internet infrastructure is incomplete.	2019-11-06 23:10:32 -05:00