vk_stream_buffer: Remove copy code path

vk_stream_buffer: Implement a stream buffer
This manages two kinds of streaming buffers: one for unified memory models and one for dedicated GPUs. The first one skips the copy from the staging buffer to the real buffer, since it creates an unified buffer. This implementation waits for all fences to finish their operation before "invalidating". This is suboptimal since it should allocate another buffer or start searching from the beginning. There is room for improvement here. This could also handle AMD's "pinned" memory (a heap with 256 MiB) that seems to be designed for buffer streaming.
2019-02-26 02:09:43 -03:00 · 2019-02-24 04:27:51 -03:00 · 2019-02-24 04:19:04 -03:00
31 changed files with 847 additions and 935 deletions
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,7 +73,6 @@ set(HASH_FILES
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
-    "${VIDEO_CORE}/shader/decode/texture.cpp"
    "${VIDEO_CORE}/shader/decode/other.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
        }
    }

-    state.yn1 = static_cast<s16>(yn1);
-    state.yn2 = static_cast<s16>(yn2);
+    state.yn1 = yn1;
+    state.yn2 = yn2;

    return ret;
 }
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -46,7 +46,7 @@ public:
        }
    }

-    ~CubebSinkStream() override {
+    ~CubebSinkStream() {
        if (!ctx) {
            return;
        }
@@ -75,11 +75,11 @@ public:
        queue.Push(samples);
    }

-    std::size_t SamplesInQueue(u32 channel_count) const override {
+    std::size_t SamplesInQueue(u32 num_channels) const override {
        if (!ctx)
            return 0;

-        return queue.Size() / channel_count;
+        return queue.Size() / num_channels;
    }

    void Flush() override {
@@ -98,7 +98,7 @@ private:
    u32 num_channels{};

    Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame{};
+    std::array<s16, 2> last_frame;
    std::atomic<bool> should_flush{};
    TimeStretcher time_stretch;

--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ add_custom_command(OUTPUT scm_rev.cpp
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
-      "${VIDEO_CORE}/shader/decode/texture.cpp"
      "${VIDEO_CORE}/shader/decode/other.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC
-#ifdef __GNUC__
+// GCC 4.6+
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif defined(__clang__)
+#elif __clang__

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
    if (offset + length > data.size())
        data.resize(offset + length);
    const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data() + offset, data_, write);
+    std::memcpy(data.data(), data_, write);
    return write;
 }

--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -28,13 +28,9 @@ namespace Service::NVFlinger {
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

-NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
-    displays.emplace_back(0, "Default");
-    displays.emplace_back(1, "External");
-    displays.emplace_back(2, "Edid");
-    displays.emplace_back(3, "Internal");
-    displays.emplace_back(4, "Null");
-
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing)
+    : displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}},
+      core_timing{core_timing} {
    // Schedule the screen composition events
    composition_event =
        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -59,14 +55,13 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    // TODO(Subv): Currently we only support the Default display.
    ASSERT(name == "Default");

-    const auto itr =
-        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetName() == name; });
+    const auto itr = std::find_if(displays.begin(), displays.end(),
+                                  [&](const VI::Display& display) { return display.name == name; });
    if (itr == displays.end()) {
        return {};
    }

-    return itr->GetID();
+    return itr->id;
 }

 std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -76,10 +71,13 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
        return {};
    }

+    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
+
    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
-    buffer_queues.emplace_back(buffer_queue_id, layer_id);
-    display->CreateLayer(layer_id, buffer_queues.back());
+    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
+    display->layers.emplace_back(layer_id, buffer_queue);
+    buffer_queues.emplace_back(std::move(buffer_queue));
    return layer_id;
 }

@@ -90,7 +88,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
        return {};
    }

-    return layer->GetBufferQueue().GetId();
+    return layer->buffer_queue->GetId();
 }

 Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -100,20 +98,12 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
        return nullptr;
    }

-    return display->GetVSyncEvent();
+    return display->vsync_event.readable;
 }

-BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
+std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
-
-    ASSERT(itr != buffer_queues.end());
-    return *itr;
-}
-
-const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
-    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
+                                  [&](const auto& queue) { return queue->GetId() == id; });

    ASSERT(itr != buffer_queues.end());
    return *itr;
@@ -122,7 +112,7 @@ const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
 VI::Display* NVFlinger::FindDisplay(u64 display_id) {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetID() == display_id; });
+                     [&](const VI::Display& display) { return display.id == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -134,7 +124,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
 const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetID() == display_id; });
+                     [&](const VI::Display& display) { return display.id == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -150,7 +140,14 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
        return nullptr;
    }

-    return display->FindLayer(layer_id);
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
+                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
+
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

 const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
@@ -160,24 +157,33 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
        return nullptr;
    }

-    return display->FindLayer(layer_id);
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
+                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
+
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

 void NVFlinger::Compose() {
    for (auto& display : displays) {
        // Trigger vsync for this display at the end of drawing
-        SCOPE_EXIT({ display.SignalVSyncEvent(); });
+        SCOPE_EXIT({ display.vsync_event.writable->Signal(); });

        // Don't do anything for displays without layers.
-        if (!display.HasLayers())
+        if (display.layers.empty())
            continue;

        // TODO(Subv): Support more than 1 layer.
-        VI::Layer& layer = display.GetLayer(0);
-        auto& buffer_queue = layer.GetBufferQueue();
+        ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
+
+        VI::Layer& layer = display.layers[0];
+        auto& buffer_queue = layer.buffer_queue;

        // Search for a queued buffer and acquire it
-        auto buffer = buffer_queue.AcquireBuffer();
+        auto buffer = buffer_queue->AcquireBuffer();

        MicroProfileFlip();

@@ -202,7 +208,7 @@ void NVFlinger::Compose() {
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue.ReleaseBuffer(buffer->get().slot);
+        buffer_queue->ReleaseBuffer(buffer->get().slot);
    }
 }

--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -28,8 +28,8 @@ class Module;
 } // namespace Service::Nvidia

 namespace Service::VI {
-class Display;
-class Layer;
+struct Display;
+struct Layer;
 } // namespace Service::VI

 namespace Service::NVFlinger {
@@ -65,10 +65,7 @@ public:
    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
-    BufferQueue& FindBufferQueue(u32 id);
-
-    /// Obtains a buffer queue identified by the ID.
-    const BufferQueue& FindBufferQueue(u32 id) const;
+    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
@@ -90,7 +87,7 @@ private:
    std::shared_ptr<Nvidia::Module> nvdrv;

    std::vector<VI::Display> displays;
-    std::vector<BufferQueue> buffer_queues;
+    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
    u64 next_layer_id = 1;
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -2,12 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <algorithm>
-#include <utility>
-
 #include <fmt/format.h>

-#include "common/assert.h"
 #include "core/core.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/service/vi/display/vi_display.h"
@@ -23,49 +19,4 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {

 Display::~Display() = default;

-Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
-}
-
-const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
-}
-
-Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
-    return vsync_event.readable;
-}
-
-void Display::SignalVSyncEvent() {
-    vsync_event.writable->Signal();
-}
-
-void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
-    // TODO(Subv): Support more than 1 layer.
-    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
-
-    layers.emplace_back(id, buffer_queue);
-}
-
-Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
-
-    if (itr == layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
-}
-
-const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
-
-    if (itr == layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
-}
-
 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -10,84 +10,14 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"

-namespace Service::NVFlinger {
-class BufferQueue;
-}
-
 namespace Service::VI {

-class Layer;
+struct Layer;

-/// Represents a single display type
-class Display {
-public:
-    /// Constructs a display with a given unique ID and name.
-    ///
-    /// @param id   The unique ID for this display.
-    /// @param name The name for this display.
-    ///
+struct Display {
    Display(u64 id, std::string name);
    ~Display();

-    Display(const Display&) = delete;
-    Display& operator=(const Display&) = delete;
-
-    Display(Display&&) = default;
-    Display& operator=(Display&&) = default;
-
-    /// Gets the unique ID assigned to this display.
-    u64 GetID() const {
-        return id;
-    }
-
-    /// Gets the name of this display
-    const std::string& GetName() const {
-        return name;
-    }
-
-    /// Whether or not this display has any layers added to it.
-    bool HasLayers() const {
-        return !layers.empty();
-    }
-
-    /// Gets a layer for this display based off an index.
-    Layer& GetLayer(std::size_t index);
-
-    /// Gets a layer for this display based off an index.
-    const Layer& GetLayer(std::size_t index) const;
-
-    /// Gets the readable vsync event.
-    Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
-
-    /// Signals the internal vsync event.
-    void SignalVSyncEvent();
-
-    /// Creates and adds a layer to this display with the given ID.
-    ///
-    /// @param id           The ID to assign to the created layer.
-    /// @param buffer_queue The buffer queue for the layer instance to use.
-    ///
-    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
-
-    /// Attempts to find a layer with the given ID.
-    ///
-    /// @param id The layer ID.
-    ///
-    /// @returns If found, the Layer instance with the given ID.
-    ///          If not found, then nullptr is returned.
-    ///
-    Layer* FindLayer(u64 id);
-
-    /// Attempts to find a layer with the given ID.
-    ///
-    /// @param id The layer ID.
-    ///
-    /// @returns If found, the Layer instance with the given ID.
-    ///          If not found, then nullptr is returned.
-    ///
-    const Layer* FindLayer(u64 id) const;
-
-private:
    u64 id;
    std::string name;

--- a/src/core/hle/service/vi/layer/vi_layer.cpp
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -6,7 +6,8 @@

 namespace Service::VI {

-Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
+Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue)
+    : id{id}, buffer_queue{std::move(queue)} {}

 Layer::~Layer() = default;

--- a/src/core/hle/service/vi/layer/vi_layer.h
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <memory>
+
 #include "common/common_types.h"

 namespace Service::NVFlinger {
@@ -12,41 +14,12 @@ class BufferQueue;

 namespace Service::VI {

-/// Represents a single display layer.
-class Layer {
-public:
-    /// Constructs a layer with a given ID and buffer queue.
-    ///
-    /// @param id    The ID to assign to this layer.
-    /// @param queue The buffer queue for this layer to use.
-    ///
-    Layer(u64 id, NVFlinger::BufferQueue& queue);
+struct Layer {
+    Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue);
    ~Layer();

-    Layer(const Layer&) = delete;
-    Layer& operator=(const Layer&) = delete;
-
-    Layer(Layer&&) = default;
-    Layer& operator=(Layer&&) = delete;
-
-    /// Gets the ID for this layer.
-    u64 GetID() const {
-        return id;
-    }
-
-    /// Gets a reference to the buffer queue this layer is using.
-    NVFlinger::BufferQueue& GetBufferQueue() {
-        return buffer_queue;
-    }
-
-    /// Gets a const reference to the buffer queue this layer is using.
-    const NVFlinger::BufferQueue& GetBufferQueue() const {
-        return buffer_queue;
-    }
-
-private:
    u64 id;
-    NVFlinger::BufferQueue& buffer_queue;
+    std::shared_ptr<NVFlinger::BufferQueue> buffer_queue;
 };

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -525,7 +525,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  static_cast<u32>(transaction), flags);

-        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+        auto buffer_queue = nv_flinger->FindBufferQueue(id);

        if (transaction == TransactionId::Connect) {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
        } else if (transaction == TransactionId::SetPreallocatedBuffer) {
            IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
+            buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);

            IGBPSetPreallocatedBufferResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);

            if (slot) {
                // Buffer is available
@@ -559,8 +559,8 @@ private:
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
-                        std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
+                        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
                        ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");

                        IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
                        IPC::ResponseBuilder rb{ctx, 2};
                        rb.Push(RESULT_SUCCESS);
                    },
-                    buffer_queue.GetWritableBufferWaitEvent());
+                    buffer_queue->GetWritableBufferWaitEvent());
            }
        } else if (transaction == TransactionId::RequestBuffer) {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

-            auto& buffer = buffer_queue.RequestBuffer(request.slot);
+            auto& buffer = buffer_queue->RequestBuffer(request.slot);

            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::QueueBuffer) {
            IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
-                                     request.data.GetCropRect());
+            buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
+                                      request.data.GetCropRect());

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::Query) {
            IGBPQueryRequestParcel request{ctx.ReadBuffer()};

-            const u32 value =
-                buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
+            u32 value =
+                buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));

            IGBPQueryResponseParcel response{value};
            ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+        const auto buffer_queue = nv_flinger->FindBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
+        rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
    }

    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,7 +752,6 @@ public:
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
            {2011, nullptr, "DestroyManagedLayer"},
-            {2012, nullptr, "CreateStrayLayer"},
            {2050, nullptr, "CreateIndirectLayer"},
            {2051, nullptr, "DestroyIndirectLayer"},
            {2052, nullptr, "CreateIndirectProducerEndPoint"},
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -74,7 +74,6 @@ add_library(video_core STATIC
    shader/decode/hfma2.cpp
    shader/decode/conversion.cpp
    shader/decode/memory.cpp
-    shader/decode/texture.cpp
    shader/decode/float_set_predicate.cpp
    shader/decode/integer_set_predicate.cpp
    shader/decode/half_set_predicate.cpp
@@ -112,7 +111,9 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_scheduler.cpp
-        renderer_vulkan/vk_scheduler.h)
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h)

    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,36 +33,18 @@ void DmaPusher::DispatchCalls() {
 }

 bool DmaPusher::Step() {
-    if (!ib_enable || dma_pushbuffer.empty()) {
-        // pushbuffer empty and IB empty or nonexistent - nothing to do
-        return false;
-    }
+    if (dma_get != dma_put) {
+        // Push buffer non-empty, read a word
+        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+        ASSERT_MSG(address, "Invalid GPU address");

-    const CommandList& command_list{dma_pushbuffer.front()};
-    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-    GPUVAddr dma_get = command_list_header.addr;
-    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
-    bool non_main = command_list_header.is_non_main;
+        const CommandHeader command_header{Memory::Read32(*address)};

-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+        dma_get += sizeof(u32);

-    if (command_list_header.size == 0) {
-        return true;
-    }
-
-    // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
-    command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
-
-    for (const CommandHeader& command_header : command_headers) {
+        if (!non_main) {
+            dma_mget = dma_get;
+        }

        // now, see if we're in the middle of a command
        if (dma_state.length_pending) {
@@ -109,11 +91,22 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    }
+    } else if (ib_enable && !dma_pushbuffer.empty()) {
+        // Current pushbuffer empty, but we have more IB entries to read
+        const CommandList& command_list{dma_pushbuffer.front()};
+        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+        dma_get = command_list_header.addr;
+        dma_put = dma_get + command_list_header.size * sizeof(u32);
+        non_main = command_list_header.is_non_main;

-    if (!non_main) {
-        // TODO (degasus): This is dead code, as dma_mget is never read.
-        dma_mget = dma_put;
+        if (dma_pushbuffer_subindex >= command_list.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
+    } else {
+        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
+        return {};
    }

    return true;
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,8 +75,6 @@ private:

    GPU& gpu;

-    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
-
    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer

@@ -91,8 +89,11 @@ private:
    DmaState dma_state{};
    bool dma_increment_once{};

+    GPUVAddr dma_put{};   ///< pushbuffer current end address
+    GPUVAddr dma_get{};   ///< pushbuffer current read address
    GPUVAddr dma_mget{};  ///< main pushbuffer last read address
    bool ib_enable{true}; ///< IB mode enabled
+    bool non_main{};      ///< non-main pushbuffer active
 };

 } // namespace Tegra
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -325,11 +325,11 @@ enum class TextureQueryType : u64 {

 enum class TextureProcessMode : u64 {
    None = 0,
-    LZ = 1,  // Load LOD of zero.
+    LZ = 1,  // Unknown, appears to be the same as none.
    LB = 2,  // Load Bias.
-    LL = 3,  // Load LOD.
-    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
-    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL.
+    LL = 3,  // Load LOD (LevelOfDetail)
+    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
 };

 enum class TextureMiscMode : u64 {
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };

 enum class IpaInterpMode : u64 {
-    Pass = 0,
-    Multiply = 1,
-    Constant = 2,
+    Linear = 0,
+    Perspective = 1,
+    Flat = 2,
    Sc = 3,
 };

@@ -1446,7 +1446,6 @@ public:
        Flow,
        Synch,
        Memory,
-        Texture,
        FloatSet,
        FloatSetPredicate,
        IntegerSet,
@@ -1577,14 +1576,14 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
-            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
-            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
-            INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
-            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
-            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
-            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
-            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
+            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
+            INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
+            INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
+            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
+            INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
+            INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
+            INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
+            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
            INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,13 +16,6 @@ enum class OutputTopology : u32 {
    TriangleStrip = 7,
 };

-enum class AttributeUse : u8 {
-    Unused = 0,
-    Constant = 1,
-    Perspective = 2,
-    ScreenLinear = 3,
-};
-
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -91,15 +84,9 @@ struct Header {
        } vtg;

        struct {
-            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
-            union {
-                BitField<0, 2, AttributeUse> x;
-                BitField<2, 2, AttributeUse> y;
-                BitField<4, 2, AttributeUse> w;
-                BitField<6, 2, AttributeUse> z;
-                u8 raw;
-            } imap_generic_vector[32];
+            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
            INSERT_PADDING_BYTES(2);  // ImapColor
            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -116,28 +103,6 @@ struct Header {
                const u32 bit = render_target * 4 + component;
                return omap.target & (1 << bit);
            }
-            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
-                return static_cast<AttributeUse>(
-                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
-            }
-            AttributeUse GetAttributeUse(u32 attribute) const {
-                AttributeUse result = AttributeUse::Unused;
-                for (u32 i = 0; i < 4; i++) {
-                    const auto index = GetAttributeIndexUse(attribute, i);
-                    if (index == AttributeUse::Unused) {
-                        continue;
-                    }
-                    if (result == AttributeUse::Unused || result == index) {
-                        result = index;
-                        continue;
-                    }
-                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
-                    if (index == AttributeUse::Perspective) {
-                        result = index;
-                    }
-                }
-                return result;
-            }
        } ps;
    };

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -1257,11 +1257,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        if (old_params.pixel_format == new_params.pixel_format)
-            FastLayeredCopySurface(old_surface, new_surface);
-        else {
-            AccurateCopySurface(old_surface, new_surface);
-        }
+        FastLayeredCopySurface(old_surface, new_surface);
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,9 +5,7 @@
 #include <array>
 #include <string>
 #include <string_view>
-#include <utility>
 #include <variant>
-#include <vector>

 #include <fmt/format.h>

@@ -22,7 +20,6 @@
 namespace OpenGL::GLShader {

 using Tegra::Shader::Attribute;
-using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -291,22 +288,34 @@ private:
        code.AddNewLine();
    }

-    std::string GetInputFlags(AttributeUse attribute) {
+    std::string GetInputFlags(const IpaMode& input_mode) {
+        const IpaSampleMode sample_mode = input_mode.sampling_mode;
+        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
        std::string out;

-        switch (attribute) {
-        case AttributeUse::Constant:
+        switch (interp_mode) {
+        case IpaInterpMode::Flat:
            out += "flat ";
            break;
-        case AttributeUse::ScreenLinear:
+        case IpaInterpMode::Linear:
            out += "noperspective ";
            break;
-        case AttributeUse::Perspective:
+        case IpaInterpMode::Perspective:
            // Default, Smooth
            break;
        default:
-            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
-            UNREACHABLE();
+            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
+        }
+        switch (sample_mode) {
+        case IpaSampleMode::Centroid:
+            // It can be implemented with the "centroid " keyword in GLSL
+            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
+            break;
+        case IpaSampleMode::Default:
+            // Default, n/a
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
        }
        return out;
    }
@@ -315,11 +324,16 @@ private:
        const auto& attributes = ir.GetInputAttributes();
        for (const auto element : attributes) {
            const Attribute::Index index = element.first;
+            const IpaMode& input_mode = *element.second.begin();
            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                // Skip when it's not a generic attribute
                continue;
            }

+            ASSERT(element.second.size() > 0);
+            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
+                                 "Multiple input flag modes are not supported in GLSL");
+
            // TODO(bunnei): Use proper number of elements for these
            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
            if (stage != ShaderStage::Vertex) {
@@ -331,14 +345,8 @@ private:
            if (stage == ShaderStage::Geometry) {
                attr = "gs_" + attr + "[]";
            }
-            std::string suffix;
-            if (stage == ShaderStage::Fragment) {
-                const auto input_mode =
-                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
-                suffix = GetInputFlags(input_mode);
-            }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
-                         attr + ';');
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
+                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
        }
        if (!attributes.empty())
            code.AddNewLine();
@@ -719,7 +727,7 @@ private:
    }

    std::string GenerateTexture(Operation operation, const std::string& func,
-                                const std::vector<std::pair<Type, Node>>& extras) {
+                                bool is_extra_int = false) {
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -740,47 +748,36 @@ private:
            expr += Visit(operation[i]);

            const std::size_t next = i + 1;
-            if (next < count)
+            if (next < count || has_array || has_shadow)
                expr += ", ";
        }
        if (has_array) {
-            expr += ", float(ftoi(" + Visit(meta->array) + "))";
+            expr += "float(ftoi(" + Visit(meta->array) + "))";
        }
        if (has_shadow) {
-            expr += ", " + Visit(meta->depth_compare);
+            if (has_array)
+                expr += ", ";
+            expr += Visit(meta->depth_compare);
        }
        expr += ')';

-        for (const auto& extra_pair : extras) {
-            const auto [type, operand] = extra_pair;
-            if (operand == nullptr) {
-                continue;
-            }
+        for (const Node extra : meta->extras) {
            expr += ", ";
-
-            switch (type) {
-            case Type::Int:
-                if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+            if (is_extra_int) {
+                if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
                    // Inline the string as an immediate integer in GLSL (some extra arguments are
                    // required to be constant)
                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
                } else {
-                    expr += "ftoi(" + Visit(operand) + ')';
+                    expr += "ftoi(" + Visit(extra) + ')';
                }
-                break;
-            case Type::Float:
-                expr += Visit(operand);
-                break;
-            default: {
-                const auto type_int = static_cast<u32>(type);
-                UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
-                expr += '0';
-                break;
-            }
+            } else {
+                expr += Visit(extra);
            }
        }

-        return expr + ')';
+        expr += ')';
+        return expr;
    }

    std::string Assign(Operation operation) {
@@ -1159,7 +1156,7 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+        std::string expr = GenerateTexture(operation, "texture");
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1170,7 +1167,7 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+        std::string expr = GenerateTexture(operation, "textureLod");
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1181,8 +1178,7 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+        return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
               GetSwizzle(meta->element);
    }

@@ -1211,8 +1207,8 @@ private:
        ASSERT(meta);

        if (meta->element < 2) {
-            return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
-                   " * vec2(256))" + GetSwizzle(meta->element) + "))";
+            return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
+                   GetSwizzle(meta->element) + "))";
        }
        return "0";
    }
@@ -1238,9 +1234,9 @@ private:
            else if (next < count)
                expr += ", ";
        }
-        if (meta->lod) {
+        for (std::size_t i = 0; i < meta->extras.size(); ++i) {
            expr += ", ";
-            expr += CastOperand(Visit(meta->lod), Type::Int);
+            expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
        }
        expr += ')';

@@ -1588,4 +1584,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (location = 0) in noperspective vec4 position;
+layout (location = 0) in vec4 position;

 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
    vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
    return {out, program.second};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
    protected_resources.push_back(resource);
 }

-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
-    if (it != protected_resources.end()) {
-        protected_resources.erase(it);
-    }
+    ASSERT(it != protected_resources.end());
+
+    resource->OnFenceRemoval(this);
+    protected_resources.erase(it);
 }

 VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
 }

 void VKFenceWatch::Wait() {
-    if (!fence) {
+    if (fence == nullptr) {
        return;
    }
    fence->Wait();
    fence->Unprotect(this);
-    fence = nullptr;
 }

 void VKFenceWatch::Watch(VKFence& new_fence) {
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
    void Protect(VKResource* resource);

    /// Removes protection for a resource.
-    void Unprotect(const VKResource* resource);
+    void Unprotect(VKResource* resource);

    /// Retreives the fence.
    operator vk::Fence() const {
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                                                                   pipeline_stage} {
+    CreateBuffers(memory_manager, usage);
+    ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+    ASSERT(size <= buffer_size);
+    mapped_size = size;
+
+    if (offset + size > buffer_size) {
+        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        // reset the state.
+        invalidation_mark = used_watches;
+        used_watches = 0;
+        offset = 0;
+    }
+
+    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    if (invalidation_mark) {
+        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        exctx = scheduler.Flush();
+        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+                      [&](auto& resource) { resource->Wait(); });
+        invalidation_mark = std::nullopt;
+    }
+
+    if (used_watches + 1 >= watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(WATCHES_RESERVE_CHUNK);
+    }
+    // Add a watch for this allocation.
+    watches[used_watches++]->Watch(exctx.GetFence());
+
+    offset += size;
+
+    return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    commit = memory_manager.Commit(*buffer, true);
+    mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+    const std::size_t previous_size = watches.size();
+    watches.resize(previous_size + grow_size);
+    std::generate(watches.begin() + previous_size, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    ~VKStreamBuffer();
+
+    /**
+     * Reserves a region of memory from the stream buffer.
+     * @param size Size to reserve.
+     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+     * offset and a boolean that's true when buffer has been invalidated.
+     */
+    std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+    vk::Buffer GetBuffer() const {
+        return *buffer;
+    }
+
+private:
+    /// Creates Vulkan buffer handles committing the required the required memory.
+    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+    /// Increases the amount of watches available.
+    void ReserveWatches(std::size_t grow_size);
+
+    const VKDevice& device;                      ///< Vulkan device manager.
+    VKScheduler& scheduler;                      ///< Command scheduler.
+    const u64 buffer_size;                       ///< Total size of the stream buffer.
+    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
+    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+    UniqueBuffer buffer;   ///< Mapped buffer.
+    VKMemoryCommit commit; ///< Memory commit.
+    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+
+    u64 offset{};      ///< Buffer iterator.
+    u64 mapped_size{}; ///< Size reserved for the current copy.
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t>
+        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,7 +165,6 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
-        {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,6 +17,24 @@ using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::TextureMiscMode;
+using Tegra::Shader::TextureProcessMode;
+using Tegra::Shader::TextureType;
+
+static std::size_t GetCoordCount(TextureType texture_type) {
+    switch (texture_type) {
+    case TextureType::Texture1D:
+        return 1;
+    case TextureType::Texture2D:
+        return 2;
+    case TextureType::Texture3D:
+    case TextureType::TextureCube:
+        return 3;
+    default:
+        UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
+        return 0;
+    }
+}

 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
@@ -30,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                          Tegra::Shader::IpaSampleMode::Default};

        u64 next_element = instr.attribute.fmt20.element;
@@ -229,6 +247,194 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        break;
    }
+    case OpCode::Id::TEX: {
+        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+        }
+
+        const TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        WriteTexInstructionFloat(
+            bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
+        break;
+    }
+    case OpCode::Id::TEXS: {
+        const TextureType texture_type{instr.texs.GetTextureType()};
+        const bool is_array{instr.texs.IsArrayTexture()};
+        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.texs.GetTextureProcessMode();
+
+        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
+        }
+
+        const Node4 components =
+            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
+
+        if (instr.texs.fp32_flag) {
+            WriteTexsInstructionFloat(bb, instr, components);
+        } else {
+            WriteTexsInstructionHalfFloat(bb, instr, components);
+        }
+        break;
+    }
+    case OpCode::Id::TLD4: {
+        ASSERT(instr.tld4.array == 0);
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
+                             "NDV is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
+                             "PTP is not implemented");
+
+        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
+        }
+
+        const auto texture_type = instr.tld4.texture_type.Value();
+        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        const bool is_array = instr.tld4.array != 0;
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTld4Code(instr, texture_type, depth_compare, is_array));
+        break;
+    }
+    case OpCode::Id::TLD4S: {
+        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
+        }
+
+        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_b = GetRegister(instr.gpr20);
+
+        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
+        if (depth_compare) {
+            // Note: TLD4S coordinate encoding works just like TEXS's
+            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+            coords.push_back(op_a);
+            coords.push_back(op_y);
+            coords.push_back(op_b);
+        } else {
+            coords.push_back(op_a);
+            coords.push_back(op_b);
+        }
+        std::vector<Node> extras;
+        extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+
+        const auto& sampler =
+            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+
+        Node4 values;
+        for (u32 element = 0; element < values.size(); ++element) {
+            auto coords_copy = coords;
+            MetaTexture meta{sampler, {}, {}, extras, element};
+            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+        }
+
+        WriteTexsInstructionFloat(bb, instr, values);
+        break;
+    }
+    case OpCode::Id::TXQ: {
+        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
+        }
+
+        // TODO: The new commits on the texture refactor, change the way samplers work.
+        // Sadly, not all texture instructions specify the type of texture their sampler
+        // uses. This must be fixed at a later instance.
+        const auto& sampler =
+            GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+
+        u32 indexer = 0;
+        switch (instr.txq.query_type) {
+        case Tegra::Shader::TextureQueryType::Dimension: {
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
+                }
+                MetaTexture meta{sampler, {}, {}, {}, element};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                SetTemporal(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+            }
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
+                              static_cast<u32>(instr.txq.query_type.Value()));
+        }
+        break;
+    }
+    case OpCode::Id::TMML: {
+        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                             "NDV is not implemented");
+
+        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+        }
+
+        auto texture_type = instr.tmml.texture_type.Value();
+        const bool is_array = instr.tmml.array != 0;
+        const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+        std::vector<Node> coords;
+
+        // TODO: Add coordinates for different samplers once other texture types are implemented.
+        switch (texture_type) {
+        case TextureType::Texture1D:
+            coords.push_back(GetRegister(instr.gpr8));
+            break;
+        case TextureType::Texture2D:
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
+
+            // Fallback to interpreting as a 2D texture for now
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            texture_type = TextureType::Texture2D;
+        }
+
+        for (u32 element = 0; element < 2; ++element) {
+            auto params = coords;
+            MetaTexture meta{sampler, {}, {}, {}, element};
+            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
+            SetTemporal(bb, element, value);
+        }
+        for (u32 element = 0; element < 2; ++element) {
+            SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+        }
+
+        break;
+    }
+    case OpCode::Id::TLDS: {
+        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+        const bool is_array{instr.tlds.IsArrayTexture()};
+
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
+
+        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
+        }
+
+        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
+        break;
+    }
    default:
        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
    }
@@ -236,4 +442,291 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    return pc;
 }

+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
+                                    bool is_array, bool is_shadow) {
+    const auto offset = static_cast<std::size_t>(sampler.index.Value());
+
+    // If this sampler has already been used, return the existing mapping.
+    const auto itr =
+        std::find_if(used_samplers.begin(), used_samplers.end(),
+                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
+    if (itr != used_samplers.end()) {
+        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+               itr->IsShadow() == is_shadow);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const std::size_t next_index = used_samplers.size();
+    const Sampler entry{offset, next_index, type, is_array, is_shadow};
+    return *used_samplers.emplace(entry).first;
+}
+
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
+    u32 dest_elem = 0;
+    for (u32 elem = 0; elem < 4; ++elem) {
+        if (!instr.tex.IsComponentEnabled(elem)) {
+            // Skip disabled components
+            continue;
+        }
+        SetTemporal(bb, dest_elem++, components[elem]);
+    }
+    // After writing values in temporals, move them to the real registers
+    for (u32 i = 0; i < dest_elem; ++i) {
+        SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+    }
+}
+
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
+                                         const Node4& components) {
+    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
+    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
+
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        SetTemporal(bb, dest_elem++, components[component]);
+    }
+
+    for (u32 i = 0; i < dest_elem; ++i) {
+        if (i < 2) {
+            // Write the first two swizzle components to gpr0 and gpr0+1
+            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
+        } else {
+            ASSERT(instr.texs.HasTwoDestinations());
+            // Write the rest of the swizzle components to gpr28 and gpr28+1
+            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
+        }
+    }
+}
+
+void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
+                                             const Node4& components) {
+    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+    // float instruction).
+
+    Node4 values;
+    u32 dest_elem = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        values[dest_elem++] = components[component];
+    }
+    if (dest_elem == 0)
+        return;
+
+    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
+
+    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
+    if (dest_elem <= 2) {
+        SetRegister(bb, instr.gpr0, first_value);
+        return;
+    }
+
+    SetTemporal(bb, 0, first_value);
+    SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+
+    SetRegister(bb, instr.gpr0, GetTemporal(0));
+    SetRegister(bb, instr.gpr28, GetTemporal(1));
+}
+
+Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
+                               TextureProcessMode process_mode, std::vector<Node> coords,
+                               Node array, Node depth_compare, u32 bias_offset) {
+    const bool is_array = array;
+    const bool is_shadow = depth_compare;
+
+    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
+                         "This method is not supported.");
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+
+    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
+                            process_mode == TextureProcessMode::LL ||
+                            process_mode == TextureProcessMode::LLA;
+
+    // LOD selection (either via bias or explicit textureLod) not supported in GL for
+    // sampler2DArrayShadow and samplerCubeArrayShadow.
+    const bool gl_lod_supported =
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
+
+    const OperationCode read_method =
+        lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
+
+    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
+
+    std::vector<Node> extras;
+    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
+        if (process_mode == TextureProcessMode::LZ) {
+            extras.push_back(Immediate(0.0f));
+        } else {
+            // If present, lod or bias are always stored in the register indexed by the gpr20
+            // field with an offset depending on the usage of the other registers
+            extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+        }
+    }
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto copy_coords = coords;
+        MetaTexture meta{sampler, array, depth_compare, extras, element};
+        values[element] = Operation(read_method, meta, std::move(copy_coords));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+    // 1D.DC in OpenGL the 2nd component is ignored.
+    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
+        coords.push_back(Immediate(0.0f));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        dc = GetRegister(depth_register);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
+}
+
+Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
+                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+    const u64 last_coord_register =
+        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        dc = GetRegister(depth_register);
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
+}
+
+Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
+                            bool is_array) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (size_t i = 0; i < coord_count; ++i)
+        coords.push_back(GetRegister(coord_register + i));
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
+    }
+
+    return values;
+}
+
+Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+    const std::size_t type_coord_count = GetCoordCount(texture_type);
+    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // if is array gpr20 is used
+    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+    const u64 last_coord_register =
+        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < type_coord_count; ++i) {
+        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+    // When lod is used always is in gpr20
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, array, {}, {lod}, element};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
+    }
+    return values;
+}
+
+std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
+    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
+    std::size_t max_coords, std::size_t max_inputs) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+
+    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+        UNIMPLEMENTED_MSG("Unsupported Texture operation");
+        total_coord_count = std::min(total_coord_count, max_coords);
+    }
+    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
+    total_coord_count +=
+        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
+
+    return {coord_count, total_coord_count};
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,18 +135,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                                                instr.ipa.sample_mode.Value()};

        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        Node value = attr;
-        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
-            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
-            // In theory by setting them as perspective, OpenGL does the perspective correction.
-            // A way must figured to reverse the last step of it.
-            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
-                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
-            }
-        }
-        value = GetSaturatedFloat(value, instr.ipa.saturate);
+        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);

        SetRegister(bb, instr.gpr0, value);
        break;
@@ -186,4 +175,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -1,534 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <vector>
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureMiscMode;
-using Tegra::Shader::TextureProcessMode;
-using Tegra::Shader::TextureType;
-
-static std::size_t GetCoordCount(TextureType texture_type) {
-    switch (texture_type) {
-    case TextureType::Texture1D:
-        return 1;
-    case TextureType::Texture2D:
-        return 2;
-    case TextureType::Texture3D:
-    case TextureType::TextureCube:
-        return 3;
-    default:
-        UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
-        return 0;
-    }
-}
-
-u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::TEX: {
-        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
-        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
-        }
-
-        const TextureType texture_type{instr.tex.texture_type};
-        const bool is_array = instr.tex.array != 0;
-        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.tex.GetTextureProcessMode();
-        WriteTexInstructionFloat(
-            bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
-        break;
-    }
-    case OpCode::Id::TEXS: {
-        const TextureType texture_type{instr.texs.GetTextureType()};
-        const bool is_array{instr.texs.IsArrayTexture()};
-        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.texs.GetTextureProcessMode();
-
-        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
-        }
-
-        const Node4 components =
-            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
-
-        if (instr.texs.fp32_flag) {
-            WriteTexsInstructionFloat(bb, instr, components);
-        } else {
-            WriteTexsInstructionHalfFloat(bb, instr, components);
-        }
-        break;
-    }
-    case OpCode::Id::TLD4: {
-        ASSERT(instr.tld4.array == 0);
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
-                             "NDV is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
-                             "PTP is not implemented");
-
-        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
-        }
-
-        const auto texture_type = instr.tld4.texture_type.Value();
-        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
-        const bool is_array = instr.tld4.array != 0;
-        WriteTexInstructionFloat(bb, instr,
-                                 GetTld4Code(instr, texture_type, depth_compare, is_array));
-        break;
-    }
-    case OpCode::Id::TLD4S: {
-        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
-        }
-
-        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
-        const Node op_a = GetRegister(instr.gpr8);
-        const Node op_b = GetRegister(instr.gpr20);
-
-        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
-        std::vector<Node> coords;
-        if (depth_compare) {
-            // Note: TLD4S coordinate encoding works just like TEXS's
-            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
-            coords.push_back(op_a);
-            coords.push_back(op_y);
-            coords.push_back(op_b);
-        } else {
-            coords.push_back(op_a);
-            coords.push_back(op_b);
-        }
-        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
-
-        const auto& sampler =
-            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
-
-        Node4 values;
-        for (u32 element = 0; element < values.size(); ++element) {
-            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
-            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
-        }
-
-        WriteTexsInstructionFloat(bb, instr, values);
-        break;
-    }
-    case OpCode::Id::TXQ: {
-        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
-        }
-
-        // TODO: The new commits on the texture refactor, change the way samplers work.
-        // Sadly, not all texture instructions specify the type of texture their sampler
-        // uses. This must be fixed at a later instance.
-        const auto& sampler =
-            GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
-
-        u32 indexer = 0;
-        switch (instr.txq.query_type) {
-        case Tegra::Shader::TextureQueryType::Dimension: {
-            for (u32 element = 0; element < 4; ++element) {
-                if (!instr.txq.IsComponentEnabled(element)) {
-                    continue;
-                }
-                MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
-                const Node value =
-                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
-                SetTemporal(bb, indexer++, value);
-            }
-            for (u32 i = 0; i < indexer; ++i) {
-                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
-            }
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
-                              static_cast<u32>(instr.txq.query_type.Value()));
-        }
-        break;
-    }
-    case OpCode::Id::TMML: {
-        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
-                             "NDV is not implemented");
-
-        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
-        }
-
-        auto texture_type = instr.tmml.texture_type.Value();
-        const bool is_array = instr.tmml.array != 0;
-        const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
-
-        std::vector<Node> coords;
-
-        // TODO: Add coordinates for different samplers once other texture types are implemented.
-        switch (texture_type) {
-        case TextureType::Texture1D:
-            coords.push_back(GetRegister(instr.gpr8));
-            break;
-        case TextureType::Texture2D:
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
-
-            // Fallback to interpreting as a 2D texture for now
-            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
-            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
-            texture_type = TextureType::Texture2D;
-        }
-
-        for (u32 element = 0; element < 2; ++element) {
-            auto params = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
-            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
-            SetTemporal(bb, element, value);
-        }
-        for (u32 element = 0; element < 2; ++element) {
-            SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
-        }
-
-        break;
-    }
-    case OpCode::Id::TLDS: {
-        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
-        const bool is_array{instr.tlds.IsArrayTexture()};
-
-        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
-
-        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
-        }
-
-        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
-                                    bool is_array, bool is_shadow) {
-    const auto offset = static_cast<std::size_t>(sampler.index.Value());
-
-    // If this sampler has already been used, return the existing mapping.
-    const auto itr =
-        std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
-    if (itr != used_samplers.end()) {
-        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-               itr->IsShadow() == is_shadow);
-        return *itr;
-    }
-
-    // Otherwise create a new mapping for this sampler
-    const std::size_t next_index = used_samplers.size();
-    const Sampler entry{offset, next_index, type, is_array, is_shadow};
-    return *used_samplers.emplace(entry).first;
-}
-
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
-    u32 dest_elem = 0;
-    for (u32 elem = 0; elem < 4; ++elem) {
-        if (!instr.tex.IsComponentEnabled(elem)) {
-            // Skip disabled components
-            continue;
-        }
-        SetTemporal(bb, dest_elem++, components[elem]);
-    }
-    // After writing values in temporals, move them to the real registers
-    for (u32 i = 0; i < dest_elem; ++i) {
-        SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
-    }
-}
-
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
-                                         const Node4& components) {
-    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
-    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
-
-    u32 dest_elem = 0;
-    for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
-            continue;
-        SetTemporal(bb, dest_elem++, components[component]);
-    }
-
-    for (u32 i = 0; i < dest_elem; ++i) {
-        if (i < 2) {
-            // Write the first two swizzle components to gpr0 and gpr0+1
-            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
-        } else {
-            ASSERT(instr.texs.HasTwoDestinations());
-            // Write the rest of the swizzle components to gpr28 and gpr28+1
-            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
-        }
-    }
-}
-
-void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
-                                             const Node4& components) {
-    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
-    // float instruction).
-
-    Node4 values;
-    u32 dest_elem = 0;
-    for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
-            continue;
-        values[dest_elem++] = components[component];
-    }
-    if (dest_elem == 0)
-        return;
-
-    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
-
-    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
-    if (dest_elem <= 2) {
-        SetRegister(bb, instr.gpr0, first_value);
-        return;
-    }
-
-    SetTemporal(bb, 0, first_value);
-    SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
-
-    SetRegister(bb, instr.gpr0, GetTemporal(0));
-    SetRegister(bb, instr.gpr28, GetTemporal(1));
-}
-
-Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                               TextureProcessMode process_mode, std::vector<Node> coords,
-                               Node array, Node depth_compare, u32 bias_offset) {
-    const bool is_array = array;
-    const bool is_shadow = depth_compare;
-
-    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
-                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
-                         "This method is not supported.");
-
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
-
-    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
-                            process_mode == TextureProcessMode::LL ||
-                            process_mode == TextureProcessMode::LLA;
-
-    // LOD selection (either via bias or explicit textureLod) not supported in GL for
-    // sampler2DArrayShadow and samplerCubeArrayShadow.
-    const bool gl_lod_supported =
-        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
-          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
-
-    const OperationCode read_method =
-        (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
-
-    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
-
-    Node bias = {};
-    Node lod = {};
-    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
-        switch (process_mode) {
-        case TextureProcessMode::LZ:
-            lod = Immediate(0.0f);
-            break;
-        case TextureProcessMode::LB:
-            // If present, lod or bias are always stored in the register indexed by the gpr20
-            // field with an offset depending on the usage of the other registers
-            bias = GetRegister(instr.gpr20.Value() + bias_offset);
-            break;
-        case TextureProcessMode::LL:
-            lod = GetRegister(instr.gpr20.Value() + bias_offset);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
-            break;
-        }
-    }
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto copy_coords = coords;
-        MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
-        values[element] = Operation(read_method, meta, std::move(copy_coords));
-    }
-
-    return values;
-}
-
-Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
-                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled =
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
-
-    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
-        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        coords.push_back(GetRegister(coord_register + i));
-    }
-    // 1D.DC in OpenGL the 2nd component is ignored.
-    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
-        coords.push_back(Immediate(0.0f));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-
-    Node dc{};
-    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or bias are used
-        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        dc = GetRegister(depth_register);
-    }
-
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
-}
-
-Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
-                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled =
-        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
-
-    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
-        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-    const u64 last_coord_register =
-        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
-            ? static_cast<u64>(instr.gpr20.Value())
-            : coord_register + 1;
-    const u32 bias_offset = coord_count > 2 ? 1 : 0;
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < coord_count; ++i) {
-        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
-        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-
-    Node dc{};
-    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
-        // or bias are used
-        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        dc = GetRegister(depth_register);
-    }
-
-    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
-}
-
-Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array) {
-    const std::size_t coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
-
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
-    const u64 coord_register = array_register + (is_array ? 1 : 0);
-
-    std::vector<Node> coords;
-    for (size_t i = 0; i < coord_count; ++i)
-        coords.push_back(GetRegister(coord_register + i));
-
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
-        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
-    }
-
-    return values;
-}
-
-Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
-    const std::size_t type_coord_count = GetCoordCount(texture_type);
-    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
-
-    // If enabled arrays index is always stored in the gpr8 field
-    const u64 array_register = instr.gpr8.Value();
-    // if is array gpr20 is used
-    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
-
-    const u64 last_coord_register =
-        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
-            ? static_cast<u64>(instr.gpr20.Value())
-            : coord_register + 1;
-
-    std::vector<Node> coords;
-    for (std::size_t i = 0; i < type_coord_count; ++i) {
-        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
-        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
-    }
-
-    const Node array = is_array ? GetRegister(array_register) : nullptr;
-    // When lod is used always is in gpr20
-    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
-
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
-
-    Node4 values;
-    for (u32 element = 0; element < values.size(); ++element) {
-        auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
-        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
-    }
-    return values;
-}
-
-std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
-    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
-    std::size_t max_coords, std::size_t max_inputs) {
-    const std::size_t coord_count = GetCoordCount(texture_type);
-
-    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
-    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
-        UNIMPLEMENTED_MSG("Unsupported Texture operation");
-        total_coord_count = std::min(total_coord_count, max_coords);
-    }
-    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
-    total_coord_count +=
-        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
-
-    return {coord_count, total_coord_count};
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -290,9 +290,7 @@ struct MetaTexture {
    const Sampler& sampler;
    Node array{};
    Node depth_compare{};
-    Node bias{};
-    Node lod{};
-    Node component{};
+    std::vector<Node> extras;
    u32 element{};
 };

@@ -616,7 +614,6 @@ private:
    u32 DecodeHfma2(NodeBlock& bb, u32 pc);
    u32 DecodeConversion(NodeBlock& bb, u32 pc);
    u32 DecodeMemory(NodeBlock& bb, u32 pc);
-    u32 DecodeTexture(NodeBlock& bb, u32 pc);
    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                return {node, cursor};
        }
        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
-            const auto& conditional_code = conditional->GetCode();
-            const auto [found, internal_cursor] = FindOperation(
-                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
+            const auto& code = conditional->GetCode();
+            const auto [found, internal_cursor] =
+                FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
            if (found)
                return {found, cursor};
        }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
        return nullptr;
    }
    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
-        const auto& conditional_code = conditional->GetCode();
-        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
+        const auto& code = conditional->GetCode();
+        return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
    }
    return nullptr;
 }