const correction

clang
Update configure_input_player.cpp
2020-01-03 10:30:51 +01:00 · 2020-01-03 09:31:54 +01:00 · 2020-01-03 09:11:34 +01:00 · 2020-01-03 08:54:57 +01:00 · 2020-01-01 20:33:33 -05:00 · 2019-12-31 20:37:16 -05:00
61 changed files with 2770 additions and 774 deletions
--- a/.appveyor/UtilityFunctions.ps1
+++ b/.appveyor/UtilityFunctions.ps1
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
-    if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
-    }
-}
-popd
-
-function Which ($search_path, $name) {
-    ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
-    ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
-        Which $search_path $_.Value
-    }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
-    $final_deps = @()
-    $deps_to_process = GetDeps $search_path $binary
-    while ($deps_to_process.Count -gt 0) {
-        $current, $deps_to_process = $deps_to_process
-        if ($final_deps -contains $current) { continue }
-
-        # Is this a system dll file?
-        # We use the same algorithm that cmake uses to determine this.
-        if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
-        if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
-        if ($current -match "\\msvc[^\\]+dll") { continue }
-        if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
-        $final_deps += $current
-        $new_deps = GetDeps $search_path $current
-        $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
-    }
-    return $final_deps
-}
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
-  - C:\ProgramData\chocolatey\bin -> appveyor.yml
-  - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
-  # Tell msys2 to add mingw64 to the path
-  MSYSTEM: MINGW64
-  # Tell msys2 to inherit the current directory when starting the shell
-  CHERE_INVOKING: 1
-  matrix:
-    - BUILD_TYPE: msvc
-    - BUILD_TYPE: mingw
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - git submodule update --init --recursive
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'mingw') {
-          $dependencies = "mingw64/mingw-w64-x86_64-cmake",
-                          "mingw64/mingw-w64-x86_64-qt5",
-                          "mingw64/mingw-w64-x86_64-SDL2"
-          # redirect err to null to prevent warnings from becoming errors
-          # workaround to prevent pacman from failing due to cyclical dependencies
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
-        }
-
-before_build:
-  - mkdir %BUILD_TYPE%_build
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
-        }
-  - cd ..
-
-build_script:
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # https://www.appveyor.com/docs/build-phase
-          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
-        }
-
-after_build:
-  - ps: |
-        $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
-        $GITREV = $(git show -s --format='%h')
-
-        # Find out which kind of release we are producing by tag name
-        if ($env:APPVEYOR_REPO_TAG_NAME) {
-          $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
-        } else {
-          # There is no repo tag - make assumptions
-          $RELEASE_DIST = "head"
-        }
-
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # Where are these spaces coming from? Regardless, let's remove them
-          $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
-          $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
-          # set the build names as env vars so the artifacts can upload them
-          $env:BUILD_ZIP = $MSVC_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
-          $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
-          $BUILD_DIR = ".\msvc_build\bin\Release"
-
-          # Make a debug symbol upload
-          mkdir pdb
-          Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
-          7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
-          rm "$BUILD_DIR\*.pdb"
-
-          mkdir $RELEASE_DIST
-          # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
-          Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
-          rm "$RELEASE_DIST\*.exe"
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item .\license.txt -Destination $RELEASE_DIST
-          Copy-Item .\README.md -Destination $RELEASE_DIST
-          7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MSVC_SEVENZIP $RELEASE_DIST
-        } else {
-          $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
-          # not going to bother adding separate debug symbols for mingw, so just upload a README for it
-          # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
-          $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
-          Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
-          # store the build information in env vars so we can use them as artifacts
-          $env:BUILD_ZIP = $MINGW_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
-          $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
-          $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
-          $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
-          $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
-          mkdir $RELEASE_DIST
-          mkdir $RELEASE_DIST/platforms
-          mkdir $RELEASE_DIST/styles
-          mkdir $RELEASE_DIST/imageformats
-
-          # copy the compiled binaries and other release files to the release folder
-          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
-          # copy the qt windows plugin dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
-          # copy the qt windows vista style dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
-          # copy the qt jpeg imageformat dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
-          # copy all the dll dependencies to the release folder
-          . "./.appveyor/UtilityFunctions.ps1"
-          $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
-          $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\yuzu_cmd.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\imageformats\qjpeg.dll"
-          Write-Host "Detected the following dependencies:"
-          Write-Host $MingwDLLs
-          foreach ($file in $MingwDLLs) {
-            Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
-          }
-
-          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MINGW_SEVENZIP $RELEASE_DIST
-        }
-
-test_script:
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          ctest -VV -C Release
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
-        }
-  - cd ..
-
-artifacts:
-  - path: $(BUILD_ZIP)
-    name: build
-    type: zip
-
-deploy:
-  provider: GitHub
-  release: $(appveyor_repo_tag_name)
-  auth_token:
-    secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
-  artifact: update,build
-  draft: false
-  prerelease: false
-  on:
-    appveyor_repo_tag: true
--- a/externals/boost
+++ b/externals/boost
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,9 +46,16 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        cv.notify_one();

-        ++size;
+        const size_t previous_size{size++};
+
+        // Acquire the mutex and then immediately release it as a fence.
+        // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
+        // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
+        if (previous_size == 0) {
+            std::lock_guard lock{cv_mutex};
+        }
+        cv.notify_one();
    }

    void Pop() {
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -17,10 +17,10 @@
 #include "core/memory.h"

 namespace Kernel {
-namespace {
+
 // Wake up num_to_wake (or all) threads in a vector.
-void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
-    auto& system = Core::System::GetInstance();
+void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
+                                 s32 num_to_wake) {
    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
    // them all.
    std::size_t last = waiting_threads.size();
@@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
    for (std::size_t i = 0; i < last; i++) {
        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        RemoveThread(waiting_threads[i]);
        waiting_threads[i]->SetArbiterWaitAddress(0);
        waiting_threads[i]->ResumeFromWait();
        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
    }
 }
-} // Anonymous namespace

 AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
 AddressArbiter::~AddressArbiter() = default;
@@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
 ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
+    InsertThread(SharedFrom(current_thread));
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();
    current_thread->WakeAfterDelay(timeout);
@@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    return RESULT_TIMEOUT;
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
+void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
+    RemoveThread(thread);
+    thread->SetArbiterWaitAddress(0);
+}

-    // Retrieve all threads that are waiting for this address.
-    std::vector<std::shared_ptr<Thread>> threads;
-    const auto& scheduler = system.GlobalScheduler();
-    const auto& thread_list = scheduler.GetThreadList();
-
-    for (const auto& thread : thread_list) {
-        if (thread->GetArbiterWaitAddress() == address) {
-            threads.push_back(thread);
+void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread->GetPriority() >= thread->GetPriority()) {
+            thread_list.insert(it, thread);
+            return;
        }
+        ++it;
    }
+    thread_list.push_back(std::move(thread));
+}

-    // Sort them by priority, such that the highest priority ones come first.
-    std::sort(threads.begin(), threads.end(),
-              [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
-                  return lhs->GetPriority() < rhs->GetPriority();
-              });
+void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread.get() == thread.get()) {
+            thread_list.erase(it);
+            return;
+        }
+        ++it;
+    }
+    UNREACHABLE();
+}

-    return threads;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
+    std::vector<std::shared_ptr<Thread>> result;
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        std::shared_ptr<Thread> current_thread = *it;
+        result.push_back(std::move(current_thread));
+        ++it;
+    }
+    return result;
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <list>
 #include <memory>
+#include <unordered_map>
 #include <vector>

 #include "common/common_types.h"
@@ -48,6 +50,9 @@ public:
    /// Waits on an address with a particular arbitration type.
    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);

+    /// Removes a thread from the container and resets its address arbiter adress to 0
+    void HandleWakeupThread(std::shared_ptr<Thread> thread);
+
 private:
    /// Signals an address being waited on.
    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
    // Waits on the given address with a timeout in nanoseconds
    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);

+    /// Wake up num_to_wake (or all) threads in a vector.
+    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
+
+    /// Insert a thread into the address arbiter container
+    void InsertThread(std::shared_ptr<Thread> thread);
+
+    /// Removes a thread from the address arbiter container
+    void RemoveThread(std::shared_ptr<Thread> thread);
+
    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+
+    /// List of threads waiting for a address arbiter
+    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;

    Core::System& system;
 };
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
        }
    }

-    if (thread->GetArbiterWaitAddress() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
-        thread->SetArbiterWaitAddress(0);
+    if (thread->GetStatus() == ThreadStatus::WaitArb) {
+        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
+        address_arbiter.HandleWakeupThread(thread);
    }

    if (resume) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 }

 /// Signal process wide key
-static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
-                                       s32 target) {
+static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
              condition_variable_addr, target);

@@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            system.PrepareReschedule(thread->GetProcessorID());
        }
    }
-
-    return RESULT_SUCCESS;
 }

 // Wait for an address (via Address Arbiter)
@@ -1781,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

+static void KernelDebug([[maybe_unused]] Core::System& system,
+                        [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
+                        [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
+static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
+                                   [[maybe_unused]] u32 trace_state) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick(Core::System& system) {
    LOG_TRACE(Kernel_SVC, "called");
@@ -2418,8 +2426,8 @@ static const FunctionDef SVC_Table[] = {
    {0x39, nullptr, "Unknown"},
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
-    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "DumpInfoNew"},
+    {0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
+    {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
    FuncReturn(system, retval);
 }

-template <ResultCode func(Core::System&, u64, s32)>
-void SvcWrap(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
-}
-
 template <ResultCode func(Core::System&, u64, u32)>
 void SvcWrap(Core::System& system) {
    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) {
    func(system);
 }

+template <void func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)));
+}
+
+template <void func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
+         Param(system, 3));
+}
+
 template <void func(Core::System&, s64)>
 void SvcWrap(Core::System& system) {
    func(system, static_cast<s64>(Param(system, 0)));
 }

+template <void func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
+}
+
 template <void func(Core::System&, u64, u64)>
 void SvcWrap(Core::System& system) {
    func(system, Param(system, 0), Param(system, 1));
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -151,12 +151,16 @@ add_library(video_core STATIC
 if (ENABLE_VULKAN)
    target_sources(video_core PRIVATE
        renderer_vulkan/declarations.h
+        renderer_vulkan/fixed_pipeline_state.cpp
+        renderer_vulkan/fixed_pipeline_state.h
        renderer_vulkan/maxwell_to_vk.cpp
        renderer_vulkan/maxwell_to_vk.h
        renderer_vulkan/vk_buffer_cache.cpp
        renderer_vulkan/vk_buffer_cache.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_image.cpp
+        renderer_vulkan/vk_image.h
        renderer_vulkan/vk_memory_manager.cpp
        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_resource_manager.cpp
@@ -167,6 +171,8 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_shader_decompiler.cpp
        renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_staging_buffer_pool.cpp
+        renderer_vulkan/vk_staging_buffer_pool.h
        renderer_vulkan/vk_stream_buffer.cpp
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -88,11 +88,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
        color_mask.A.Assign(1);
    }

-    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
+    // NVN games expect these values to be enabled at boot
+    regs.rasterize_enable = 1;
    regs.rt_separate_frag_data = 1;
-
-    // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
    regs.framebuffer_srgb = 1;
+
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -310,6 +310,11 @@ public:
            }
        };

+        enum class DepthMode : u32 {
+            MinusOneToOne = 0,
+            ZeroToOne = 1,
+        };
+
        enum class PrimitiveTopology : u32 {
            Points = 0x0,
            Lines = 0x1,
@@ -491,11 +496,6 @@ public:
            INSERT_UNION_PADDING_WORDS(1);
        };

-        enum class DepthMode : u32 {
-            MinusOneToOne = 0,
-            ZeroToOne = 1,
-        };
-
        enum class TessellationPrimitive : u32 {
            Isolines = 0,
            Triangles = 1,
@@ -657,7 +657,11 @@ public:
                std::array<f32, 4> tess_level_outer;
                std::array<f32, 2> tess_level_inner;

-                INSERT_UNION_PADDING_WORDS(0x102);
+                INSERT_UNION_PADDING_WORDS(0x10);
+
+                u32 rasterize_enable;
+
+                INSERT_UNION_PADDING_WORDS(0xF1);

                u32 tfb_enabled;

@@ -676,7 +680,7 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_UNION_PADDING_WORDS(1);
+                DepthMode depth_mode;

                float clear_color[4];
                float clear_depth;
@@ -707,13 +711,15 @@ public:

                u32 color_mask_common;

-                INSERT_UNION_PADDING_WORDS(0x6);
-
-                u32 rt_separate_frag_data;
+                INSERT_UNION_PADDING_WORDS(0x2);

                f32 depth_bounds[2];

-                INSERT_UNION_PADDING_WORDS(0xA);
+                INSERT_UNION_PADDING_WORDS(0x2);
+
+                u32 rt_separate_frag_data;
+
+                INSERT_UNION_PADDING_WORDS(0xC);

                struct {
                    u32 address_high;
@@ -1030,7 +1036,12 @@ public:
                    BitField<4, 1, u32> depth_clamp_far;
                } view_volume_clip_control;

-                INSERT_UNION_PADDING_WORDS(0x21);
+                INSERT_UNION_PADDING_WORDS(0x1F);
+
+                u32 depth_bounds_enable;
+
+                INSERT_UNION_PADDING_WORDS(1);
+
                struct {
                    u32 enable;
                    LogicOperation operation;
@@ -1420,11 +1431,13 @@ ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tess_mode, 0xC8);
 ASSERT_REG_POSITION(tess_level_outer, 0xC9);
 ASSERT_REG_POSITION(tess_level_inner, 0xCD);
+ASSERT_REG_POSITION(rasterize_enable, 0xDF);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
 ASSERT_REG_POSITION(viewports, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(depth_mode, 0x35F);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
@@ -1438,7 +1451,7 @@ ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
-ASSERT_REG_POSITION(depth_bounds, 0x3EC);
+ASSERT_REG_POSITION(depth_bounds, 0x3E7);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1494,6 +1507,7 @@ ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
 ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(color_mask, 0x680);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -384,6 +384,15 @@ enum class IsberdMode : u64 {

 enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };

+enum class MembarType : u64 {
+    CTA = 0,
+    GL = 1,
+    SYS = 2,
+    VC = 3,
+};
+
+enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
+
 enum class HalfType : u64 {
    H0_H1 = 0,
    F32 = 1,
@@ -1042,7 +1051,7 @@ union Instruction {
        BitField<40, 1, R2pMode> mode;
        BitField<41, 2, u64> byte;
        BitField<20, 7, u64> immediate_mask;
-    } r2p;
+    } p2r_r2p;

    union {
        BitField<39, 3, u64> pred39;
@@ -1230,7 +1239,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<54, 2, u64> info;
+        BitField<54, 2, u64> offset_mode;
        BitField<56, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1242,9 +1251,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1256,7 +1265,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<33, 2, u64> info;
+        BitField<33, 2, u64> offset_mode;
        BitField<37, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1268,9 +1277,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1283,6 +1292,7 @@ union Instruction {
        BitField<50, 1, u64> dc_flag;
        BitField<51, 1, u64> aoffi_flag;
        BitField<52, 2, u64> component;
+        BitField<55, 1, u64> fp16_flag;

        bool UsesMiscMode(TextureMiscMode mode) const {
            switch (mode) {
@@ -1545,6 +1555,11 @@ union Instruction {
        BitField<47, 2, IsberdShift> shift;
    } isberd;

+    union {
+        BitField<8, 2, MembarType> type;
+        BitField<0, 2, MembarUnknown> unknown;
+    } membar;
+
    union {
        BitField<48, 1, u64> signed_a;
        BitField<38, 1, u64> is_byte_chunk_a;
@@ -1669,6 +1684,7 @@ public:
        IPA,
        OUT_R, // Emit vertex/primitive
        ISBERD,
+        MEMBAR,
        VMAD,
        VSETP,
        FFMA_IMM, // Fused Multiply and Add
@@ -1785,6 +1801,7 @@ public:
        PSET,
        CSETP,
        R2P_IMM,
+        P2R_IMM,
        XMAD_IMM,
        XMAD_CR,
        XMAD_RC,
@@ -1930,7 +1947,7 @@ private:
            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
+            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1957,7 +1974,7 @@ private:
            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
@@ -1969,6 +1986,7 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -2089,6 +2107,7 @@ private:
            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
            INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
+            INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
            INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
            INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
            INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -5,6 +5,7 @@
 #include <mutex>

 #include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>

 #include "common/assert.h"
 #include "common/common_types.h"
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <array>
 #include <cstddef>
+#include <cstring>
 #include <optional>
 #include <vector>

@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin

 Device::Device() : base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::vector extensions = GetExtensions();

    const bool is_nvidia = vendor == "NVIDIA Corporation";
    const bool is_amd = vendor == "ATI Technologies Inc.";
    const bool is_intel = vendor == "Intel";
+    const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;

    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_variable_aoffi = TestVariableAoffi();
    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_broken_compute = is_intel;
+    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -271,12 +271,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            case Maxwell::ShaderProgram::Geometry:
                shader_program_manager->UseTrivialGeometryShader();
                break;
+            case Maxwell::ShaderProgram::Fragment:
+                shader_program_manager->UseTrivialFragmentShader();
+                break;
            default:
                break;
            }
            continue;
        }

+        // Currently this stages are not supported in the OpenGL backend.
+        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+        if (program == Maxwell::ShaderProgram::TesselationControl) {
+            continue;
+        } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+            continue;
+        }
+
        Shader shader{shader_cache.GetStageProgram(program)};

        // Stage indices are 0 - 5
@@ -506,6 +517,7 @@ void RasterizerOpenGL::Clear() {
    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);

    SyncViewport(clear_state);
+    SyncRasterizeEnable(clear_state);
    if (regs.clear_flags.scissor) {
        SyncScissorTest(clear_state);
    }
@@ -533,6 +545,7 @@ void RasterizerOpenGL::Clear() {
 void RasterizerOpenGL::DrawPrelude() {
    auto& gpu = system.GPU().Maxwell3D();

+    SyncRasterizeEnable(state);
    SyncColorMask();
    SyncFragmentColorClampState();
    SyncMultiSampleState();
@@ -1028,6 +1041,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        flip_y = !flip_y;
    }
    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+    state.clip_control.depth_mode =
+        regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+            ? GL_ZERO_TO_ONE
+            : GL_NEGATIVE_ONE_TO_ONE;
 }

 void RasterizerOpenGL::SyncClipEnabled(
@@ -1121,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
    }
 }

+void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    current_state.rasterizer_discard = regs.rasterize_enable == 0;
+}
+
 void RasterizerOpenGL::SyncColorMask() {
    auto& maxwell3d = system.GPU().Maxwell3D();
    if (!maxwell3d.dirty.color_mask) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -168,6 +168,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs the rasterizer enable state to match the guest state
+    void SyncRasterizeEnable(OpenGLState& current_state);
+
    /// Syncs Color Mask
    void SyncColorMask();

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -112,25 +112,25 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
 }

 /// Describes primitive behavior on geometry shaders
-constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
    switch (primitive_mode) {
    case GL_POINTS:
-        return {"points", "Points", 1};
+        return {"points", 1};
    case GL_LINES:
    case GL_LINE_STRIP:
-        return {"lines", "Lines", 2};
+        return {"lines", 2};
    case GL_LINES_ADJACENCY:
    case GL_LINE_STRIP_ADJACENCY:
-        return {"lines_adjacency", "LinesAdj", 4};
+        return {"lines_adjacency", 4};
    case GL_TRIANGLES:
    case GL_TRIANGLE_STRIP:
    case GL_TRIANGLE_FAN:
-        return {"triangles", "Triangles", 3};
+        return {"triangles", 3};
    case GL_TRIANGLES_ADJACENCY:
    case GL_TRIANGLE_STRIP_ADJACENCY:
-        return {"triangles_adjacency", "TrianglesAdj", 6};
+        return {"triangles_adjacency", 6};
    default:
-        return {"points", "Invalid", 1};
+        return {"points", 1};
    }
 }

@@ -264,29 +264,24 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
                  "#extension GL_NV_shader_thread_group : require\n"
                  "#extension GL_NV_shader_thread_shuffle : require\n";
    }
-    source += '\n';

    if (shader_type == ShaderType::Geometry) {
-        const auto [glsl_topology, debug_name, max_vertices] =
-            GetPrimitiveDescription(variant.primitive_mode);
-
-        source += fmt::format("layout ({}) in;\n\n", glsl_topology);
+        const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
        source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
+        source += fmt::format("layout ({}) in;\n", glsl_topology);
    }
    if (shader_type == ShaderType::Compute) {
+        if (variant.local_memory_size > 0) {
+            source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
+                                  Common::AlignUp(variant.local_memory_size, 4) / 4);
+        }
        source +=
            fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
                        variant.block_x, variant.block_y, variant.block_z);

        if (variant.shared_memory_size > 0) {
-            // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
-            // avoids out of bound stores. Find out why shared memory size is being invalid.
-            source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
-        }
-
-        if (variant.local_memory_size > 0) {
-            source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
-                                  Common::AlignUp(variant.local_memory_size, 4) / 4);
+            // shared_memory_size is described in number of words
+            source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
        }
    }

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -48,10 +48,10 @@ class ExprDecompiler;

 enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };

-struct TextureAoffi {};
+struct TextureOffset {};
 struct TextureDerivates {};
 using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
+using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;

 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
    static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -399,6 +399,7 @@ public:
        DeclareConstantBuffers();
        DeclareGlobalMemory();
        DeclareSamplers();
+        DeclareImages();
        DeclarePhysicalAttributeReader();

        code.AddLine("void execute_{}() {{", suffix);
@@ -1076,7 +1077,7 @@ private:
    }

    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
-                                const std::vector<TextureIR>& extras) {
+                                const std::vector<TextureIR>& extras, bool separate_dc = false) {
        constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1089,9 +1090,12 @@ private:
        std::string expr = "texture" + function_suffix;
        if (!meta->aoffi.empty()) {
            expr += "Offset";
+        } else if (!meta->ptp.empty()) {
+            expr += "Offsets";
        }
        expr += '(' + GetSampler(meta->sampler) + ", ";
-        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) +
+                                      (has_shadow && !separate_dc ? 1 : 0) - 1);
        expr += '(';
        for (std::size_t i = 0; i < count; ++i) {
            expr += Visit(operation[i]).AsFloat();
@@ -1104,15 +1108,24 @@ private:
            expr += ", float(" + Visit(meta->array).AsInt() + ')';
        }
        if (has_shadow) {
-            expr += ", " + Visit(meta->depth_compare).AsFloat();
+            if (separate_dc) {
+                expr += "), " + Visit(meta->depth_compare).AsFloat();
+            } else {
+                expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
+            }
+        } else {
+            expr += ')';
        }
-        expr += ')';

        for (const auto& variant : extras) {
            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
                expr += GenerateTextureArgument(*argument);
-            } else if (std::holds_alternative<TextureAoffi>(variant)) {
-                expr += GenerateTextureAoffi(meta->aoffi);
+            } else if (std::holds_alternative<TextureOffset>(variant)) {
+                if (!meta->aoffi.empty()) {
+                    expr += GenerateTextureAoffi(meta->aoffi);
+                } else if (!meta->ptp.empty()) {
+                    expr += GenerateTexturePtp(meta->ptp);
+                }
            } else if (std::holds_alternative<TextureDerivates>(variant)) {
                expr += GenerateTextureDerivates(meta->derivates);
            } else {
@@ -1153,6 +1166,20 @@ private:
        return expr;
    }

+    std::string ReadTextureOffset(const Node& value) {
+        if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
+            // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+            // to be constant by the standard).
+            return std::to_string(static_cast<s32>(immediate->GetValue()));
+        } else if (device.HasVariableAoffi()) {
+            // Avoid using variable AOFFI on unsupported devices.
+            return Visit(value).AsInt();
+        } else {
+            // Insert 0 on devices not supporting variable AOFFI.
+            return "0";
+        }
+    }
+
    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
        if (aoffi.empty()) {
            return {};
@@ -1163,18 +1190,7 @@ private:
        expr += '(';

        for (std::size_t index = 0; index < aoffi.size(); ++index) {
-            const auto operand{aoffi.at(index)};
-            if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
-                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
-                // to be constant by the standard).
-                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-            } else if (device.HasVariableAoffi()) {
-                // Avoid using variable AOFFI on unsupported devices.
-                expr += Visit(operand).AsInt();
-            } else {
-                // Insert 0 on devices not supporting variable AOFFI.
-                expr += '0';
-            }
+            expr += ReadTextureOffset(aoffi.at(index));
            if (index + 1 < aoffi.size()) {
                expr += ", ";
            }
@@ -1184,6 +1200,20 @@ private:
        return expr;
    }

+    std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
+        static constexpr std::size_t num_vectors = 4;
+        ASSERT(ptp.size() == num_vectors * 2);
+
+        std::string expr = ", ivec2[](";
+        for (std::size_t vector = 0; vector < num_vectors; ++vector) {
+            const bool has_next = vector + 1 < num_vectors;
+            expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
+                                ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
+        }
+        expr += ')';
+        return expr;
+    }
+
    std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
        if (derivates.empty()) {
            return {};
@@ -1682,7 +1712,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+            operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1694,7 +1724,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1702,13 +1732,18 @@ private:
    }

    Expression TextureGather(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
+        const auto& meta = std::get<MetaTexture>(operation.GetMeta());

-        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        return {GenerateTexture(operation, "Gather",
-                                {TextureAoffi{}, TextureArgument{type, meta->component}}) +
-                    GetSwizzle(meta->element),
+        const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
+        const bool separate_dc = meta.sampler.IsShadow();
+
+        std::vector<TextureIR> ir;
+        if (meta.sampler.IsShadow()) {
+            ir = {TextureOffset{}};
+        } else {
+            ir = {TextureOffset{}, TextureArgument{type, meta.component}};
+        }
+        return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
                Type::Float};
    }

@@ -1780,7 +1815,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
+        std::string expr =
+            GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
        return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
    }

@@ -1992,6 +2028,11 @@ private:
        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
    }

+    Expression MemoryBarrierGL(Operation) {
+        code.AddLine("memoryBarrier();");
+        return {};
+    }
+
    struct Func final {
        Func() = delete;
        ~Func() = delete;
@@ -2173,6 +2214,8 @@ private:

        &GLSLDecompiler::ThreadId,
        &GLSLDecompiler::ShuffleIndexed,
+
+        &GLSLDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -50,6 +50,10 @@ public:
        current_state.geometry_shader = 0;
    }

+    void UseTrivialFragmentShader() {
+        current_state.fragment_shader = 0;
+    }
+
 private:
    struct PipelineState {
        bool operator==(const PipelineState& rhs) const {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -182,6 +182,10 @@ void OpenGLState::ApplyCulling() {
    }
 }

+void OpenGLState::ApplyRasterizerDiscard() {
+    Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
+}
+
 void OpenGLState::ApplyColorMask() {
    if (!dirty.color_mask) {
        return;
@@ -411,8 +415,9 @@ void OpenGLState::ApplyAlphaTest() {
 }

 void OpenGLState::ApplyClipControl() {
-    if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
-        glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+    if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
+                  std::tie(clip_control.origin, clip_control.depth_mode))) {
+        glClipControl(clip_control.origin, clip_control.depth_mode);
    }
 }

@@ -454,6 +459,7 @@ void OpenGLState::Apply() {
    ApplyPointSize();
    ApplyFragmentColorClamp();
    ApplyMultisample();
+    ApplyRasterizerDiscard();
    ApplyColorMask();
    ApplyDepthClamp();
    ApplyViewport();
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -48,6 +48,8 @@ public:
        GLuint index = 0;
    } primitive_restart; // GL_PRIMITIVE_RESTART

+    bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
+
    struct ColorMask {
        GLboolean red_enabled = GL_TRUE;
        GLboolean green_enabled = GL_TRUE;
@@ -56,6 +58,7 @@ public:
    };
    std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
        color_mask; // GL_COLOR_WRITEMASK
+
    struct {
        bool test_enabled = false; // GL_STENCIL_TEST
        struct {
@@ -150,6 +153,7 @@ public:

    struct {
        GLenum origin = GL_LOWER_LEFT;
+        GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
    } clip_control;

    OpenGLState();
@@ -173,6 +177,7 @@ public:
    void ApplyMultisample();
    void ApplySRgb();
    void ApplyCulling();
+    void ApplyRasterizerDiscard();
    void ApplyColorMask();
    void ApplyDepth();
    void ApplyPrimitiveRestart();
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_POINTS;
    case Maxwell::PrimitiveTopology::Lines:
        return GL_LINES;
+    case Maxwell::PrimitiveTopology::LineLoop:
+        return GL_LINE_LOOP;
    case Maxwell::PrimitiveTopology::LineStrip:
        return GL_LINE_STRIP;
    case Maxwell::PrimitiveTopology::Triangles:
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_TRIANGLE_FAN;
    case Maxwell::PrimitiveTopology::Quads:
        return GL_QUADS;
-    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
-        UNREACHABLE();
-        return {};
+    case Maxwell::PrimitiveTopology::QuadStrip:
+        return GL_QUAD_STRIP;
+    case Maxwell::PrimitiveTopology::Polygon:
+        return GL_POLYGON;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+        return GL_LINES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        return GL_LINE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+        return GL_TRIANGLES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        return GL_TRIANGLE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::Patches:
+        return GL_PATCHES;
    }
+    UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+    return GL_POINTS;
 }

 inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,19 +24,21 @@

 namespace OpenGL {

-static const char vertex_shader[] = R"(
-#version 150 core
+namespace {

-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
+constexpr char vertex_shader[] = R"(
+#version 430 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;

 // This is a truncated 3x3 matrix for 2D transformations:
 // The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
 // The third column performs translation.
 // The third row could be used for projection, which we don't need in 2D. It hence is assumed to
 // implicitly be [0, 0, 1]
-uniform mat3x2 modelview_matrix;
+layout (location = 0) uniform mat3x2 modelview_matrix;

 void main() {
    // Multiply input position by the rotscale part of the matrix and then manually translate by
@@ -47,34 +49,29 @@ void main() {
 }
 )";

-static const char fragment_shader[] = R"(
-#version 150 core
+constexpr char fragment_shader[] = R"(
+#version 430 core

-in vec2 frag_tex_coord;
-out vec4 color;
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;

-uniform sampler2D color_texture;
+layout (binding = 0) uniform sampler2D color_texture;

 void main() {
-    // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
-    // support more framebuffer pixel formats.
    color = texture(color_texture, frag_tex_coord);
 }
 )";

-/**
- * Vertex structure that the drawn screen rectangles are composed of.
- */
-struct ScreenRectVertex {
-    ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
-        position[0] = x;
-        position[1] = y;
-        tex_coord[0] = u;
-        tex_coord[1] = v;
-    }
+constexpr GLint PositionLocation = 0;
+constexpr GLint TexCoordLocation = 1;
+constexpr GLint ModelViewMatrixLocation = 0;

-    GLfloat position[2];
-    GLfloat tex_coord[2];
+struct ScreenRectVertex {
+    constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
+        : position{{x, y}}, tex_coord{{u, v}} {}
+
+    std::array<GLfloat, 2> position;
+    std::array<GLfloat, 2> tex_coord;
 };

 /**
@@ -84,18 +81,82 @@ struct ScreenRectVertex {
 * The projection part of the matrix is trivial, hence these operations are represented
 * by a 3x2 matrix.
 */
-static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
+std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
    std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order

    // clang-format off
-    matrix[0] = 2.f / width; matrix[2] = 0.f;           matrix[4] = -1.f;
-    matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] = 1.f;
+    matrix[0] = 2.f / width; matrix[2] =  0.f;          matrix[4] = -1.f;
+    matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] =  1.f;
    // Last matrix row is implicitly assumed to be [0, 0, 1].
    // clang-format on

    return matrix;
 }

+const char* GetSource(GLenum source) {
+    switch (source) {
+    case GL_DEBUG_SOURCE_API:
+        return "API";
+    case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
+        return "WINDOW_SYSTEM";
+    case GL_DEBUG_SOURCE_SHADER_COMPILER:
+        return "SHADER_COMPILER";
+    case GL_DEBUG_SOURCE_THIRD_PARTY:
+        return "THIRD_PARTY";
+    case GL_DEBUG_SOURCE_APPLICATION:
+        return "APPLICATION";
+    case GL_DEBUG_SOURCE_OTHER:
+        return "OTHER";
+    default:
+        UNREACHABLE();
+        return "Unknown source";
+    }
+}
+
+const char* GetType(GLenum type) {
+    switch (type) {
+    case GL_DEBUG_TYPE_ERROR:
+        return "ERROR";
+    case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
+        return "DEPRECATED_BEHAVIOR";
+    case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
+        return "UNDEFINED_BEHAVIOR";
+    case GL_DEBUG_TYPE_PORTABILITY:
+        return "PORTABILITY";
+    case GL_DEBUG_TYPE_PERFORMANCE:
+        return "PERFORMANCE";
+    case GL_DEBUG_TYPE_OTHER:
+        return "OTHER";
+    case GL_DEBUG_TYPE_MARKER:
+        return "MARKER";
+    default:
+        UNREACHABLE();
+        return "Unknown type";
+    }
+}
+
+void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
+                           const GLchar* message, const void* user_param) {
+    const char format[] = "{} {} {}: {}";
+    const char* const str_source = GetSource(source);
+    const char* const str_type = GetType(type);
+
+    switch (severity) {
+    case GL_DEBUG_SEVERITY_HIGH:
+        LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    case GL_DEBUG_SEVERITY_MEDIUM:
+        LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    case GL_DEBUG_SEVERITY_NOTIFICATION:
+    case GL_DEBUG_SEVERITY_LOW:
+        LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    }
+}
+
+} // Anonymous namespace
+
 RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
    : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}

@@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    prev_state.Apply();
 }

-/**
- * Loads framebuffer from emulated memory into the active OpenGL texture.
- */
 void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
    // Framebuffer orientation handling
    framebuffer_transform_flags = framebuffer.transform_flags;
@@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }

-/**
- * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
- * be 1x1 but will stretch across whatever it's rendered on.
- */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                const TextureInfo& texture) {
    const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
    glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 }

-/**
- * Initializes the OpenGL state and creates persistent objects.
- */
 void RendererOpenGL::InitOpenGLObjects() {
    glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
                 0.0f);
@@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() {
    state.draw.shader_program = shader.handle;
    state.AllDirty();
    state.Apply();
-    uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
-    uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
-    attrib_position = glGetAttribLocation(shader.handle, "vert_position");
-    attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");

    // Generate VBO handle for drawing
    vertex_buffer.Create();
@@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {

    // Attach vertex data to VAO
    glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
+    glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
                              offsetof(ScreenRectVertex, position));
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
+    glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
                              offsetof(ScreenRectVertex, tex_coord));
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
+    glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
+    glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
+    glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
+    glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
    glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
                              sizeof(ScreenRectVertex));

@@ -331,18 +378,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
                  static_cast<f32>(screen_info.texture.height);
    }

-    std::array<ScreenRectVertex, 4> vertices = {{
+    const std::array vertices = {
        ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
        ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
        ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
        ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
-    }};
+    };

    state.textures[0] = screen_info.display_texture;
    state.framebuffer_srgb.enabled = screen_info.display_srgb;
    state.AllDirty();
    state.Apply();
-    glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
+    glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
    // Restore default state
    state.framebuffer_srgb.enabled = false;
@@ -351,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
    state.Apply();
 }

-/**
- * Draws the emulated screens to the emulator window.
- */
 void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
    if (renderer_settings.set_background_color) {
        // Update background color before drawing
@@ -367,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
    glClear(GL_COLOR_BUFFER_BIT);

    // Set projection matrix
-    std::array<GLfloat, 3 * 2> ortho_matrix =
-        MakeOrthographicMatrix((float)layout.width, (float)layout.height);
-    glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
+    const std::array ortho_matrix =
+        MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+    glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());

-    // Bind texture in Texture Unit 0
-    glActiveTexture(GL_TEXTURE0);
-    glUniform1i(uniform_color_texture, 0);
-
-    DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
-                        (float)screen.GetWidth(), (float)screen.GetHeight());
+    DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
+                        static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
+                        static_cast<float>(screen.GetHeight()));

    m_current_frame++;
 }

-/// Updates the framerate
 void RendererOpenGL::UpdateFramerate() {}

 void RendererOpenGL::CaptureScreenshot() {
@@ -418,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() {
    renderer_settings.screenshot_requested = false;
 }

-static const char* GetSource(GLenum source) {
-#define RET(s)                                                                                     \
-    case GL_DEBUG_SOURCE_##s:                                                                      \
-        return #s
-    switch (source) {
-        RET(API);
-        RET(WINDOW_SYSTEM);
-        RET(SHADER_COMPILER);
-        RET(THIRD_PARTY);
-        RET(APPLICATION);
-        RET(OTHER);
-    default:
-        UNREACHABLE();
-        return "Unknown source";
-    }
-#undef RET
-}
-
-static const char* GetType(GLenum type) {
-#define RET(t)                                                                                     \
-    case GL_DEBUG_TYPE_##t:                                                                        \
-        return #t
-    switch (type) {
-        RET(ERROR);
-        RET(DEPRECATED_BEHAVIOR);
-        RET(UNDEFINED_BEHAVIOR);
-        RET(PORTABILITY);
-        RET(PERFORMANCE);
-        RET(OTHER);
-        RET(MARKER);
-    default:
-        UNREACHABLE();
-        return "Unknown type";
-    }
-#undef RET
-}
-
-static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
-                                  GLsizei length, const GLchar* message, const void* user_param) {
-    const char format[] = "{} {} {}: {}";
-    const char* const str_source = GetSource(source);
-    const char* const str_type = GetType(type);
-
-    switch (severity) {
-    case GL_DEBUG_SEVERITY_HIGH:
-        LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    case GL_DEBUG_SEVERITY_MEDIUM:
-        LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    case GL_DEBUG_SEVERITY_NOTIFICATION:
-    case GL_DEBUG_SEVERITY_LOW:
-        LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    }
-}
-
 bool RendererOpenGL::Init() {
    Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

@@ -495,7 +478,6 @@ bool RendererOpenGL::Init() {
    return true;
 }

-/// Shutdown the renderer
 void RendererOpenGL::ShutDown() {}

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -59,21 +59,31 @@ public:
    void ShutDown() override;

 private:
+    /// Initializes the OpenGL state and creates persistent objects.
    void InitOpenGLObjects();
+
    void AddTelemetryFields();
+
    void CreateRasterizer();

    void ConfigureFramebufferTexture(TextureInfo& texture,
                                     const Tegra::FramebufferConfig& framebuffer);
+
+    /// Draws the emulated screens to the emulator window.
    void DrawScreen(const Layout::FramebufferLayout& layout);
+
    void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
+
+    /// Updates the framerate.
    void UpdateFramerate();

    void CaptureScreenshot();

-    // Loads framebuffer from emulated memory into the display information structure
+    /// Loads framebuffer from emulated memory into the active OpenGL texture.
    void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
-    // Fills active OpenGL texture with the given RGBA color.
+
+    /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
+    /// can be 1x1 but will stretch across whatever it's rendered on.
    void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                    const TextureInfo& texture);

@@ -94,14 +104,6 @@ private:
    /// OpenGL framebuffer data
    std::vector<u8> gl_framebuffer_data;

-    // Shader uniform location indices
-    GLuint uniform_modelview_matrix;
-    GLuint uniform_color_texture;
-
-    // Shader attribute input indices
-    GLuint attrib_position;
-    GLuint attrib_tex_coord;
-
    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
    Common::Rectangle<int> framebuffer_crop_rect;
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -0,0 +1,296 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+
+namespace Vulkan {
+
+namespace {
+
+constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) {
+    const FixedPipelineState::StencilFace front_stencil(
+        regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass,
+        regs.stencil_front_func_func);
+    const FixedPipelineState::StencilFace back_stencil =
+        regs.stencil_two_side_enable
+            ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail,
+                                              regs.stencil_back_op_zpass,
+                                              regs.stencil_back_func_func)
+            : front_stencil;
+    return FixedPipelineState::DepthStencil(
+        regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1,
+        regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil);
+}
+
+constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) {
+    return FixedPipelineState::InputAssembly(
+        regs.draw.topology, regs.primitive_restart.enabled,
+        regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f);
+}
+
+constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState(
+    const Maxwell& regs, std::size_t render_target) {
+    const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target];
+    const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0};
+
+    const FixedPipelineState::BlendingAttachment default_blending(
+        false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
+        Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
+        Maxwell::Blend::Factor::Zero, components);
+    if (render_target >= regs.rt_control.count) {
+        return default_blending;
+    }
+
+    if (!regs.independent_blend_enable) {
+        const auto& src = regs.blend;
+        if (!src.enable[render_target]) {
+            return default_blending;
+        }
+        return FixedPipelineState::BlendingAttachment(
+            true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
+            src.factor_source_a, src.factor_dest_a, components);
+    }
+
+    if (!regs.blend.enable[render_target]) {
+        return default_blending;
+    }
+    const auto& src = regs.independent_blend[render_target];
+    return FixedPipelineState::BlendingAttachment(
+        true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
+        src.factor_source_a, src.factor_dest_a, components);
+}
+
+constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) {
+    return FixedPipelineState::ColorBlending(
+        {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a},
+        regs.rt_control.count,
+        {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1),
+         GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3),
+         GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5),
+         GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)});
+}
+
+constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) {
+    return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim,
+                                            regs.tess_mode.spacing, regs.tess_mode.cw != 0);
+}
+
+constexpr std::size_t Point = 0;
+constexpr std::size_t Line = 1;
+constexpr std::size_t Polygon = 2;
+constexpr std::array PolygonOffsetEnableLUT = {
+    Point,   // Points
+    Line,    // Lines
+    Line,    // LineLoop
+    Line,    // LineStrip
+    Polygon, // Triangles
+    Polygon, // TriangleStrip
+    Polygon, // TriangleFan
+    Polygon, // Quads
+    Polygon, // QuadStrip
+    Polygon, // Polygon
+    Line,    // LinesAdjacency
+    Line,    // LineStripAdjacency
+    Polygon, // TrianglesAdjacency
+    Polygon, // TriangleStripAdjacency
+    Polygon, // Patches
+};
+
+constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) {
+    const std::array enabled_lut = {regs.polygon_offset_point_enable,
+                                    regs.polygon_offset_line_enable,
+                                    regs.polygon_offset_fill_enable};
+    const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
+    const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
+
+    Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
+    if (regs.screen_y_control.triangle_rast_flip != 0 &&
+        regs.viewport_transform[0].scale_y > 0.0f) {
+        if (front_face == Maxwell::Cull::FrontFace::CounterClockWise)
+            front_face = Maxwell::Cull::FrontFace::ClockWise;
+        else if (front_face == Maxwell::Cull::FrontFace::ClockWise)
+            front_face = Maxwell::Cull::FrontFace::CounterClockWise;
+    }
+
+    const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
+                                          regs.cull.cull_face, front_face);
+}
+
+} // Anonymous namespace
+
+std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept {
+    return (index << stride) ^ divisor;
+}
+
+bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept {
+    return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor);
+}
+
+std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept {
+    return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^
+           (static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^
+           (static_cast<std::size_t>(offset) << 36);
+}
+
+bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept {
+    return std::tie(index, buffer, type, size, offset) ==
+           std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset);
+}
+
+std::size_t FixedPipelineState::StencilFace::Hash() const noexcept {
+    return static_cast<std::size_t>(action_stencil_fail) ^
+           (static_cast<std::size_t>(action_depth_fail) << 4) ^
+           (static_cast<std::size_t>(action_depth_fail) << 20) ^
+           (static_cast<std::size_t>(action_depth_pass) << 36);
+}
+
+bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept {
+    return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) ==
+           std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass,
+                    rhs.test_func);
+}
+
+std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept {
+    return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^
+           (static_cast<std::size_t>(src_rgb_func) << 10) ^
+           (static_cast<std::size_t>(dst_rgb_func) << 15) ^
+           (static_cast<std::size_t>(a_equation) << 20) ^
+           (static_cast<std::size_t>(src_a_func) << 25) ^
+           (static_cast<std::size_t>(dst_a_func) << 30) ^
+           (static_cast<std::size_t>(components[0]) << 35) ^
+           (static_cast<std::size_t>(components[1]) << 36) ^
+           (static_cast<std::size_t>(components[2]) << 37) ^
+           (static_cast<std::size_t>(components[3]) << 38);
+}
+
+bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const
+    noexcept {
+    return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func,
+                    dst_a_func, components) ==
+           std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func,
+                    rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components);
+}
+
+std::size_t FixedPipelineState::VertexInput::Hash() const noexcept {
+    std::size_t hash = num_bindings ^ (num_attributes << 32);
+    for (std::size_t i = 0; i < num_bindings; ++i) {
+        boost::hash_combine(hash, bindings[i].Hash());
+    }
+    for (std::size_t i = 0; i < num_attributes; ++i) {
+        boost::hash_combine(hash, attributes[i].Hash());
+    }
+    return hash;
+}
+
+bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept {
+    return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(),
+                      rhs.bindings.begin() + rhs.num_bindings) &&
+           std::equal(attributes.begin(), attributes.begin() + num_attributes,
+                      rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes);
+}
+
+std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept {
+    std::size_t point_size_int = 0;
+    std::memcpy(&point_size_int, &point_size, sizeof(point_size));
+    return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^
+           static_cast<std::size_t>(primitive_restart_enable);
+}
+
+bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept {
+    return std::tie(topology, primitive_restart_enable, point_size) ==
+           std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size);
+}
+
+std::size_t FixedPipelineState::Tessellation::Hash() const noexcept {
+    return static_cast<std::size_t>(patch_control_points) ^
+           (static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^
+           (static_cast<std::size_t>(clockwise) << 10);
+}
+
+bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept {
+    return std::tie(patch_control_points, primitive, spacing, clockwise) ==
+           std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise);
+}
+
+std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
+    return static_cast<std::size_t>(cull_enable) ^
+           (static_cast<std::size_t>(depth_bias_enable) << 1) ^
+           (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
+           (static_cast<std::size_t>(cull_face) << 24) ^
+           (static_cast<std::size_t>(front_face) << 48);
+}
+
+bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
+    return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
+           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
+                    rhs.front_face);
+}
+
+std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
+    std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^
+                       (static_cast<std::size_t>(depth_write_enable) << 1) ^
+                       (static_cast<std::size_t>(depth_bounds_enable) << 2) ^
+                       (static_cast<std::size_t>(stencil_enable) << 3) ^
+                       (static_cast<std::size_t>(depth_test_function) << 4);
+    boost::hash_combine(hash, front_stencil.Hash());
+    boost::hash_combine(hash, back_stencil.Hash());
+    return hash;
+}
+
+bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept {
+    return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function,
+                    stencil_enable, front_stencil, back_stencil) ==
+           std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable,
+                    rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil,
+                    rhs.back_stencil);
+}
+
+std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept {
+    std::size_t hash = attachments_count << 13;
+    for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) {
+        boost::hash_combine(hash, attachments[rt].Hash());
+    }
+    return hash;
+}
+
+bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept {
+    return std::equal(attachments.begin(), attachments.begin() + attachments_count,
+                      rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count);
+}
+
+std::size_t FixedPipelineState::Hash() const noexcept {
+    std::size_t hash = 0;
+    boost::hash_combine(hash, vertex_input.Hash());
+    boost::hash_combine(hash, input_assembly.Hash());
+    boost::hash_combine(hash, tessellation.Hash());
+    boost::hash_combine(hash, rasterizer.Hash());
+    boost::hash_combine(hash, depth_stencil.Hash());
+    boost::hash_combine(hash, color_blending.Hash());
+    return hash;
+}
+
+bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
+    return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil,
+                    color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly,
+                                                rhs.tessellation, rhs.rasterizer, rhs.depth_stencil,
+                                                rhs.color_blending);
+}
+
+FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
+    FixedPipelineState fixed_state;
+    fixed_state.input_assembly = GetInputAssemblyState(regs);
+    fixed_state.tessellation = GetTessellationState(regs);
+    fixed_state.rasterizer = GetRasterizerState(regs);
+    fixed_state.depth_stencil = GetDepthStencilState(regs);
+    fixed_state.color_blending = GetColorBlendingState(regs);
+    return fixed_state;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -0,0 +1,282 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+
+#include "common/common_types.h"
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Optimize this structure.
+
+struct FixedPipelineState {
+    using PixelFormat = VideoCore::Surface::PixelFormat;
+
+    struct VertexBinding {
+        constexpr VertexBinding(u32 index, u32 stride, u32 divisor)
+            : index{index}, stride{stride}, divisor{divisor} {}
+        VertexBinding() = default;
+
+        u32 index;
+        u32 stride;
+        u32 divisor;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexBinding& rhs) const noexcept;
+
+        bool operator!=(const VertexBinding& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct VertexAttribute {
+        constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
+                                  Maxwell::VertexAttribute::Size size, u32 offset)
+            : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
+        VertexAttribute() = default;
+
+        u32 index;
+        u32 buffer;
+        Maxwell::VertexAttribute::Type type;
+        Maxwell::VertexAttribute::Size size;
+        u32 offset;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexAttribute& rhs) const noexcept;
+
+        bool operator!=(const VertexAttribute& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct StencilFace {
+        constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
+                              Maxwell::StencilOp action_depth_fail,
+                              Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func)
+            : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail},
+              action_depth_pass{action_depth_pass}, test_func{test_func} {}
+        StencilFace() = default;
+
+        Maxwell::StencilOp action_stencil_fail;
+        Maxwell::StencilOp action_depth_fail;
+        Maxwell::StencilOp action_depth_pass;
+        Maxwell::ComparisonOp test_func;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const StencilFace& rhs) const noexcept;
+
+        bool operator!=(const StencilFace& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct BlendingAttachment {
+        constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation,
+                                     Maxwell::Blend::Factor src_rgb_func,
+                                     Maxwell::Blend::Factor dst_rgb_func,
+                                     Maxwell::Blend::Equation a_equation,
+                                     Maxwell::Blend::Factor src_a_func,
+                                     Maxwell::Blend::Factor dst_a_func,
+                                     std::array<bool, 4> components)
+            : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func},
+              dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func},
+              dst_a_func{dst_a_func}, components{components} {}
+        BlendingAttachment() = default;
+
+        bool enable;
+        Maxwell::Blend::Equation rgb_equation;
+        Maxwell::Blend::Factor src_rgb_func;
+        Maxwell::Blend::Factor dst_rgb_func;
+        Maxwell::Blend::Equation a_equation;
+        Maxwell::Blend::Factor src_a_func;
+        Maxwell::Blend::Factor dst_a_func;
+        std::array<bool, 4> components;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const BlendingAttachment& rhs) const noexcept;
+
+        bool operator!=(const BlendingAttachment& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct VertexInput {
+        std::size_t num_bindings = 0;
+        std::size_t num_attributes = 0;
+        std::array<VertexBinding, Maxwell::NumVertexArrays> bindings;
+        std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexInput& rhs) const noexcept;
+
+        bool operator!=(const VertexInput& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct InputAssembly {
+        constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable,
+                                float point_size)
+            : topology{topology}, primitive_restart_enable{primitive_restart_enable},
+              point_size{point_size} {}
+        InputAssembly() = default;
+
+        Maxwell::PrimitiveTopology topology;
+        bool primitive_restart_enable;
+        float point_size;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const InputAssembly& rhs) const noexcept;
+
+        bool operator!=(const InputAssembly& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct Tessellation {
+        constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive,
+                               Maxwell::TessellationSpacing spacing, bool clockwise)
+            : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing},
+              clockwise{clockwise} {}
+        Tessellation() = default;
+
+        u32 patch_control_points;
+        Maxwell::TessellationPrimitive primitive;
+        Maxwell::TessellationSpacing spacing;
+        bool clockwise;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const Tessellation& rhs) const noexcept;
+
+        bool operator!=(const Tessellation& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct Rasterizer {
+        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
+                             Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
+            : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
+              ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
+                                                                                    front_face} {}
+        Rasterizer() = default;
+
+        bool cull_enable;
+        bool depth_bias_enable;
+        bool ndc_minus_one_to_one;
+        Maxwell::Cull::CullFace cull_face;
+        Maxwell::Cull::FrontFace front_face;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const Rasterizer& rhs) const noexcept;
+
+        bool operator!=(const Rasterizer& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct DepthStencil {
+        constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable,
+                               bool depth_bounds_enable, bool stencil_enable,
+                               Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil,
+                               StencilFace back_stencil)
+            : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable},
+              depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable},
+              depth_test_function{depth_test_function}, front_stencil{front_stencil},
+              back_stencil{back_stencil} {}
+        DepthStencil() = default;
+
+        bool depth_test_enable;
+        bool depth_write_enable;
+        bool depth_bounds_enable;
+        bool stencil_enable;
+        Maxwell::ComparisonOp depth_test_function;
+        StencilFace front_stencil;
+        StencilFace back_stencil;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const DepthStencil& rhs) const noexcept;
+
+        bool operator!=(const DepthStencil& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct ColorBlending {
+        constexpr ColorBlending(
+            std::array<float, 4> blend_constants, std::size_t attachments_count,
+            std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments)
+            : attachments_count{attachments_count}, attachments{attachments} {}
+        ColorBlending() = default;
+
+        std::size_t attachments_count;
+        std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const ColorBlending& rhs) const noexcept;
+
+        bool operator!=(const ColorBlending& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    std::size_t Hash() const noexcept;
+
+    bool operator==(const FixedPipelineState& rhs) const noexcept;
+
+    bool operator!=(const FixedPipelineState& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    VertexInput vertex_input;
+    InputAssembly input_assembly;
+    Tessellation tessellation;
+    Rasterizer rasterizer;
+    DepthStencil depth_stencil;
+    ColorBlending color_blending;
+};
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
+
+FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::FixedPipelineState> {
+    std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,8 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
    return {};
 }

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter) {
    switch (wrap_mode) {
    case Tegra::Texture::WrapMode::Wrap:
        return vk::SamplerAddressMode::eRepeat;
@@ -55,10 +56,15 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    case Tegra::Texture::WrapMode::Border:
        return vk::SamplerAddressMode::eClampToBorder;
    case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
-        // eClampToBorder to get the border color of the texture, and then sample the edge to
-        // manually mix them. However the shader part of this is not yet implemented.
-        return vk::SamplerAddressMode::eClampToBorder;
+        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        switch (filter) {
+        case Tegra::Texture::TextureFilter::Nearest:
+            return vk::SamplerAddressMode::eClampToEdge;
+        case Tegra::Texture::TextureFilter::Linear:
+            return vk::SamplerAddressMode::eClampToBorder;
+        }
+        UNREACHABLE();
+        return vk::SamplerAddressMode::eClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
        return vk::SamplerAddressMode::eMirrorClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceBorder:
@@ -96,106 +102,140 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar

 } // namespace Sampler

+namespace {
+
+enum : u32 { Attachable = 1, Storage = 2 };
+
 struct FormatTuple {
    vk::Format format; ///< Vulkan format
-    bool attachable;   ///< True when this format can be used as an attachment
-};
-
-static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {vk::Format::eA8B8G8R8UnormPack32, true},    // ABGR8U
-    {vk::Format::eUndefined, false},             // ABGR8S
-    {vk::Format::eUndefined, false},             // ABGR8UI
-    {vk::Format::eB5G6R5UnormPack16, false},     // B5G6R5U
-    {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
-    {vk::Format::eUndefined, false},             // A1B5G5R5U
-    {vk::Format::eR8Unorm, true},                // R8U
-    {vk::Format::eUndefined, false},             // R8UI
-    {vk::Format::eUndefined, false},             // RGBA16F
-    {vk::Format::eUndefined, false},             // RGBA16U
-    {vk::Format::eUndefined, false},             // RGBA16UI
-    {vk::Format::eUndefined, false},             // R11FG11FB10F
-    {vk::Format::eUndefined, false},             // RGBA32UI
-    {vk::Format::eBc1RgbaUnormBlock, false},     // DXT1
-    {vk::Format::eBc2UnormBlock, false},         // DXT23
-    {vk::Format::eBc3UnormBlock, false},         // DXT45
-    {vk::Format::eBc4UnormBlock, false},         // DXN1
-    {vk::Format::eUndefined, false},             // DXN2UNORM
-    {vk::Format::eUndefined, false},             // DXN2SNORM
-    {vk::Format::eUndefined, false},             // BC7U
-    {vk::Format::eUndefined, false},             // BC6H_UF16
-    {vk::Format::eUndefined, false},             // BC6H_SF16
-    {vk::Format::eUndefined, false},             // ASTC_2D_4X4
-    {vk::Format::eUndefined, false},             // BGRA8
-    {vk::Format::eUndefined, false},             // RGBA32F
-    {vk::Format::eUndefined, false},             // RG32F
-    {vk::Format::eUndefined, false},             // R32F
-    {vk::Format::eUndefined, false},             // R16F
-    {vk::Format::eUndefined, false},             // R16U
-    {vk::Format::eUndefined, false},             // R16S
-    {vk::Format::eUndefined, false},             // R16UI
-    {vk::Format::eUndefined, false},             // R16I
-    {vk::Format::eUndefined, false},             // RG16
-    {vk::Format::eUndefined, false},             // RG16F
-    {vk::Format::eUndefined, false},             // RG16UI
-    {vk::Format::eUndefined, false},             // RG16I
-    {vk::Format::eUndefined, false},             // RG16S
-    {vk::Format::eUndefined, false},             // RGB32F
-    {vk::Format::eA8B8G8R8SrgbPack32, true},     // RGBA8_SRGB
-    {vk::Format::eUndefined, false},             // RG8U
-    {vk::Format::eUndefined, false},             // RG8S
-    {vk::Format::eUndefined, false},             // RG32UI
-    {vk::Format::eUndefined, false},             // RGBX16F
-    {vk::Format::eUndefined, false},             // R32UI
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X8
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X5
-    {vk::Format::eUndefined, false},             // ASTC_2D_5X4
-
-    // Compressed sRGB formats
-    {vk::Format::eUndefined, false}, // BGRA8_SRGB
-    {vk::Format::eUndefined, false}, // DXT1_SRGB
-    {vk::Format::eUndefined, false}, // DXT23_SRGB
-    {vk::Format::eUndefined, false}, // DXT45_SRGB
-    {vk::Format::eUndefined, false}, // BC7U_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
+    int usage;         ///< Describes image format usage
+} constexpr tex_format_tuples[] = {
+    {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage},    // ABGR8U
+    {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage},    // ABGR8S
+    {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage},     // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, {}},                        // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
+    {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage},    // A1B5G5R5U (flipped with swizzle)
+    {vk::Format::eR8Unorm, Attachable | Storage},                // R8U
+    {vk::Format::eR8Uint, Attachable | Storage},                 // R8UI
+    {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage},     // RGBA16F
+    {vk::Format::eR16G16B16A16Unorm, Attachable | Storage},      // RGBA16U
+    {vk::Format::eR16G16B16A16Uint, Attachable | Storage},       // RGBA16UI
+    {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage},  // R11FG11FB10F
+    {vk::Format::eR32G32B32A32Uint, Attachable | Storage},       // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, {}},                        // DXT1
+    {vk::Format::eBc2UnormBlock, {}},                            // DXT23
+    {vk::Format::eBc3UnormBlock, {}},                            // DXT45
+    {vk::Format::eBc4UnormBlock, {}},                            // DXN1
+    {vk::Format::eBc5UnormBlock, {}},                            // DXN2UNORM
+    {vk::Format::eBc5SnormBlock, {}},                            // DXN2SNORM
+    {vk::Format::eBc7UnormBlock, {}},                            // BC7U
+    {vk::Format::eBc6HUfloatBlock, {}},                          // BC6H_UF16
+    {vk::Format::eBc6HSfloatBlock, {}},                          // BC6H_SF16
+    {vk::Format::eAstc4x4UnormBlock, {}},                        // ASTC_2D_4X4
+    {vk::Format::eB8G8R8A8Unorm, {}},                            // BGRA8
+    {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage},     // RGBA32F
+    {vk::Format::eR32G32Sfloat, Attachable | Storage},           // RG32F
+    {vk::Format::eR32Sfloat, Attachable | Storage},              // R32F
+    {vk::Format::eR16Sfloat, Attachable | Storage},              // R16F
+    {vk::Format::eR16Unorm, Attachable | Storage},               // R16U
+    {vk::Format::eUndefined, {}},                                // R16S
+    {vk::Format::eUndefined, {}},                                // R16UI
+    {vk::Format::eUndefined, {}},                                // R16I
+    {vk::Format::eR16G16Unorm, Attachable | Storage},            // RG16
+    {vk::Format::eR16G16Sfloat, Attachable | Storage},           // RG16F
+    {vk::Format::eUndefined, {}},                                // RG16UI
+    {vk::Format::eUndefined, {}},                                // RG16I
+    {vk::Format::eR16G16Snorm, Attachable | Storage},            // RG16S
+    {vk::Format::eUndefined, {}},                                // RGB32F
+    {vk::Format::eR8G8B8A8Srgb, Attachable},                     // RGBA8_SRGB
+    {vk::Format::eR8G8Unorm, Attachable | Storage},              // RG8U
+    {vk::Format::eR8G8Snorm, Attachable | Storage},              // RG8S
+    {vk::Format::eR32G32Uint, Attachable | Storage},             // RG32UI
+    {vk::Format::eUndefined, {}},                                // RGBX16F
+    {vk::Format::eR32Uint, Attachable | Storage},                // R32UI
+    {vk::Format::eAstc8x8UnormBlock, {}},                        // ASTC_2D_8X8
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_8X5
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_5X4
+    {vk::Format::eUndefined, {}},                                // BGRA8_SRGB
+    {vk::Format::eBc1RgbaSrgbBlock, {}},                         // DXT1_SRGB
+    {vk::Format::eUndefined, {}},                                // DXT23_SRGB
+    {vk::Format::eBc3SrgbBlock, {}},                             // DXT45_SRGB
+    {vk::Format::eBc7SrgbBlock, {}},                             // BC7U_SRGB
+    {vk::Format::eR4G4B4A4UnormPack16, Attachable},              // R4G4B4A4U
+    {vk::Format::eAstc4x4SrgbBlock, {}},                         // ASTC_2D_4X4_SRGB
+    {vk::Format::eAstc8x8SrgbBlock, {}},                         // ASTC_2D_8X8_SRGB
+    {vk::Format::eAstc8x5SrgbBlock, {}},                         // ASTC_2D_8X5_SRGB
+    {vk::Format::eAstc5x4SrgbBlock, {}},                         // ASTC_2D_5X4_SRGB
+    {vk::Format::eAstc5x5UnormBlock, {}},                        // ASTC_2D_5X5
+    {vk::Format::eAstc5x5SrgbBlock, {}},                         // ASTC_2D_5X5_SRGB
+    {vk::Format::eAstc10x8UnormBlock, {}},                       // ASTC_2D_10X8
+    {vk::Format::eAstc10x8SrgbBlock, {}},                        // ASTC_2D_10X8_SRGB
+    {vk::Format::eAstc6x6UnormBlock, {}},                        // ASTC_2D_6X6
+    {vk::Format::eAstc6x6SrgbBlock, {}},                         // ASTC_2D_6X6_SRGB
+    {vk::Format::eAstc10x10UnormBlock, {}},                      // ASTC_2D_10X10
+    {vk::Format::eAstc10x10SrgbBlock, {}},                       // ASTC_2D_10X10_SRGB
+    {vk::Format::eAstc12x12UnormBlock, {}},                      // ASTC_2D_12X12
+    {vk::Format::eAstc12x12SrgbBlock, {}},                       // ASTC_2D_12X12_SRGB
+    {vk::Format::eAstc8x6UnormBlock, {}},                        // ASTC_2D_8X6
+    {vk::Format::eAstc8x6SrgbBlock, {}},                         // ASTC_2D_8X6_SRGB
+    {vk::Format::eAstc6x5UnormBlock, {}},                        // ASTC_2D_6X5
+    {vk::Format::eAstc6x5SrgbBlock, {}},                         // ASTC_2D_6X5_SRGB
+    {vk::Format::eE5B9G9R9UfloatPack32, {}},                     // E5B9G9R9F

    // Depth formats
-    {vk::Format::eD32Sfloat, true}, // Z32F
-    {vk::Format::eD16Unorm, true},  // Z16
+    {vk::Format::eD32Sfloat, Attachable}, // Z32F
+    {vk::Format::eD16Unorm, Attachable},  // Z16

    // DepthStencil formats
-    {vk::Format::eD24UnormS8Uint, true}, // Z24S8
-    {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
-    {vk::Format::eUndefined, false},     // Z32FS8
-}};
+    {vk::Format::eD24UnormS8Uint, Attachable},  // Z24S8
+    {vk::Format::eD24UnormS8Uint, Attachable},  // S8Z24 (emulated)
+    {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8
+};
+static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);

-static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+constexpr bool IsZetaFormat(PixelFormat pixel_format) {
    return pixel_format >= PixelFormat::MaxColorFormat &&
           pixel_format < PixelFormat::MaxDepthStencilFormat;
 }

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format) {
-    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+} // Anonymous namespace

-    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
-    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
-                         "Unimplemented texture format with pixel format={}",
-                         static_cast<u32>(pixel_format));
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));

-    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
-                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
-    if (tuple.attachable) {
-        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
-                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+    if (tuple.format == vk::Format::eUndefined) {
+        UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
+                          static_cast<u32>(pixel_format));
+        return {vk::Format::eA8B8G8R8UnormPack32, true, true};
    }
-    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+
+    // Use ABGR8 on hardware that doesn't support ASTC natively
+    if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
+        tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
+                           ? vk::Format::eA8B8G8R8SrgbPack32
+                           : vk::Format::eA8B8G8R8UnormPack32;
+    }
+    const bool attachable = tuple.usage & Attachable;
+    const bool storage = tuple.usage & Storage;
+
+    vk::FormatFeatureFlags usage;
+    if (format_type == FormatType::Buffer) {
+        usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer |
+                vk::FormatFeatureFlagBits::eUniformTexelBuffer;
+    } else {
+        usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst |
+                vk::FormatFeatureFlagBits::eTransferSrc;
+        if (attachable) {
+            usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                                : vk::FormatFeatureFlagBits::eColorAttachment;
+        }
+        if (storage) {
+            usage |= vk::FormatFeatureFlagBits::eStorageImage;
+        }
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
 }

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
@@ -215,7 +255,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
    return {};
 }

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology) {
    switch (topology) {
    case Maxwell::PrimitiveTopology::Points:
        return vk::PrimitiveTopology::ePointList;
@@ -227,6 +268,13 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return vk::PrimitiveTopology::eTriangleList;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return vk::PrimitiveTopology::eTriangleStrip;
+    case Maxwell::PrimitiveTopology::TriangleFan:
+        return vk::PrimitiveTopology::eTriangleFan;
+    case Maxwell::PrimitiveTopology::Quads:
+        // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::Patches:
+        return vk::PrimitiveTopology::ePatchList;
    default:
        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
        return {};
@@ -236,37 +284,111 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
    switch (type) {
    case Maxwell::VertexAttribute::Type::SignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+            return vk::Format::eA2B10G10R10SnormPack32;
+        default:
+            break;
+        }
        break;
    case Maxwell::VertexAttribute::Type::UnsignedNorm:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Unorm;
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return vk::Format::eR8G8B8A8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Unorm;
        default:
            break;
        }
        break;
    case Maxwell::VertexAttribute::Type::SignedInt:
-        break;
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sint;
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Sint;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sint;
+        default:
+            break;
+        }
    case Maxwell::VertexAttribute::Type::UnsignedInt:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Uint;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Uint;
        default:
            break;
        }
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uscaled;
+        default:
+            break;
+        }
+        break;
    case Maxwell::VertexAttribute::Type::SignedScaled:
        break;
    case Maxwell::VertexAttribute::Type::Float:
        switch (size) {
-        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
-            return vk::Format::eR32G32B32A32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32_32:
-            return vk::Format::eR32G32B32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32:
-            return vk::Format::eR32G32Sfloat;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sfloat;
        default:
            break;
        }
@@ -308,11 +430,14 @@ vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
    return {};
 }

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
    switch (index_format) {
    case Maxwell::IndexFormat::UnsignedByte:
-        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
-        return vk::IndexType::eUint16;
+        if (!device.IsExtIndexTypeUint8Supported()) {
+            UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
+            return vk::IndexType::eUint16;
+        }
+        return vk::IndexType::eUint8EXT;
    case Maxwell::IndexFormat::UnsignedShort:
        return vk::IndexType::eUint16;
    case Maxwell::IndexFormat::UnsignedInt:
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <utility>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/declarations.h"
@@ -23,24 +22,31 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);

 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter);

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);

 } // namespace Sampler

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format);
+struct FormatInfo {
+    vk::Format format;
+    bool attachable;
+    bool storage;
+};
+
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology);

 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);

 vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);

 vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);

--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/renderer_vulkan/shaders/blit.frag
@@ -0,0 +1,24 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+
+layout (location = 0) out vec4 color;
+
+layout (binding = 1) uniform sampler2D color_texture;
+
+void main() {
+    color = texture(color_texture, frag_tex_coord);
+}
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/renderer_vulkan/shaders/blit.vert
@@ -0,0 +1,28 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+
+layout (location = 0) out vec2 frag_tex_coord;
+
+layout (set = 0, binding = 0) uniform MatrixBlock {
+    mat4 modelview_matrix;
+};
+
+void main() {
+    gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) buffer OutputBuffer {
+    uint output_indexes[];
+};
+
+layout (push_constant) uniform PushConstants {
+    uint first;
+};
+
+void main() {
+    uint primitive = gl_GlobalInvocationID.x;
+    if (primitive * 6 >= output_indexes.length()) {
+        return;
+    }
+
+    const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
+    for (uint vertex = 0; vertex < 6; ++vertex) {
+        uint index = first + primitive * 4 + quad_map[vertex];
+        output_indexes[primitive * 6 + vertex] = index;
+    }
+}
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/renderer_vulkan/shaders/uint8.comp
@@ -0,0 +1,33 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
+    uint8_t input_indexes[];
+};
+
+layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
+    uint16_t output_indexes[];
+};
+
+void main() {
+    uint id = gl_GlobalInvocationID.x;
+    if (id < input_indexes.length()) {
+        output_indexes[id] = uint16_t(input_indexes[id]);
+    }
+}
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -3,12 +3,15 @@
 // Refer to the license.txt file included.

 #include <bitset>
+#include <chrono>
 #include <cstdlib>
 #include <optional>
 #include <set>
 #include <string_view>
+#include <thread>
 #include <vector>
 #include "common/assert.h"
+#include "core/settings.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"

@@ -201,6 +204,22 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
    return wanted_format;
 }

+void VKDevice::ReportLoss() const {
+    LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
+
+    // Wait some time to let the log flush
+    std::this_thread::sleep_for(std::chrono::seconds{1});
+
+    if (!nv_device_diagnostic_checkpoints) {
+        return;
+    }
+
+    [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld);
+    // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
+    // executed. It can be done on a debugger by evaluating the expression:
+    // *(VKGraphicsPipeline*)data[0]
+}
+
 bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
                                      const vk::DispatchLoaderDynamic& dldi) const {
    // Disable for now to avoid converting ASTC twice.
@@ -381,6 +400,8 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
             VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
        Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
             false);
+        Test(extension, nv_device_diagnostic_checkpoints,
+             VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
    }

    if (khr_shader_float16_int8) {
@@ -464,6 +485,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
 std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
    static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+                                        vk::Format::eA8B8G8R8UintPack32,
                                        vk::Format::eA8B8G8R8SnormPack32,
                                        vk::Format::eA8B8G8R8SrgbPack32,
                                        vk::Format::eB5G6R5UnormPack16,
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -39,6 +39,9 @@ public:
    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
                                  FormatType format_type) const;

+    /// Reports a device loss.
+    void ReportLoss() const;
+
    /// Returns the dispatch loader with direct function pointers of the device.
    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
        return dld;
@@ -159,6 +162,11 @@ public:
        return ext_shader_viewport_index_layer;
    }

+    /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
+    bool IsNvDeviceDiagnosticCheckpoints() const {
+        return nv_device_diagnostic_checkpoints;
+    }
+
    /// Returns the vendor name reported from Vulkan.
    std::string_view GetVendorName() const {
        return vendor_name;
@@ -218,6 +226,7 @@ private:
    bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
    bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
    bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool nv_device_diagnostic_checkpoints{};   ///< Support for VK_NV_device_diagnostic_checkpoints.

    // Telemetry parameters
    std::string vendor_name;                      ///< Device's driver name.
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -0,0 +1,106 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
+                 const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask)
+    : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
+      image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
+    UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
+                         "Queue family tracking is not implemented");
+
+    const auto dev = device.GetLogical();
+    image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
+
+    const u32 num_ranges = image_num_layers * image_num_levels;
+    barriers.resize(num_ranges);
+    subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
+}
+
+VKImage::~VKImage() = default;
+
+void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                         vk::ImageLayout new_layout) {
+    if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
+        return;
+    }
+
+    std::size_t cursor = 0;
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            barriers[cursor] = vk::ImageMemoryBarrier(
+                state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED,
+                VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1});
+            state.access = new_access;
+            state.layout = new_layout;
+        }
+    }
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) {
+        // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
+        constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands;
+        cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr,
+                               static_cast<u32>(cursor), barriers.data(), dld);
+    });
+}
+
+bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept {
+    const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
+                               base_level == 0 && num_levels == image_num_levels;
+    if (!is_full_range) {
+        state_diverged = true;
+    }
+
+    if (!state_diverged) {
+        auto& state = GetSubrangeState(0, 0);
+        if (state.access != new_access || state.layout != new_layout) {
+            return true;
+        }
+    }
+
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            if (state.access != new_access || state.layout != new_layout) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void VKImage::CreatePresentView() {
+    // Image type has to be 2D to be presented.
+    const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {},
+                                                {aspect_mask, 0, 1, 0, 1});
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+}
+
+VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
+    return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
+                           static_cast<std::size_t>(level)];
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ b/src/video_core/renderer_vulkan/vk_image.h
@@ -0,0 +1,84 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class VKImage {
+public:
+    explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
+                     const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask);
+    ~VKImage();
+
+    /// Records in the passed command buffer an image transition and updates the state of the image.
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout);
+
+    /// Returns a view compatible with presentation, the image has to be 2D.
+    vk::ImageView GetPresentView() {
+        if (!present_view) {
+            CreatePresentView();
+        }
+        return *present_view;
+    }
+
+    /// Returns the Vulkan image handler.
+    vk::Image GetHandle() const {
+        return *image;
+    }
+
+    /// Returns the Vulkan format for this image.
+    vk::Format GetFormat() const {
+        return format;
+    }
+
+    /// Returns the Vulkan aspect mask.
+    vk::ImageAspectFlags GetAspectMask() const {
+        return aspect_mask;
+    }
+
+private:
+    struct SubrangeState final {
+        vk::AccessFlags access{};                             ///< Current access bits.
+        vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout.
+    };
+
+    bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept;
+
+    /// Creates a presentation view.
+    void CreatePresentView();
+
+    /// Returns the subrange state for a layer and layer.
+    SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
+
+    const VKDevice& device; ///< Device handler.
+    VKScheduler& scheduler; ///< Device scheduler.
+
+    const vk::Format format;                ///< Vulkan format.
+    const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
+    const u32 image_num_layers;             ///< Number of layers.
+    const u32 image_num_levels;             ///< Number of mipmap levels.
+
+    UniqueImage image;            ///< Image handle.
+    UniqueImageView present_view; ///< Image view compatible with presentation.
+
+    std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers.
+    std::vector<SubrangeState> subrange_states;   ///< Current subrange state.
+
+    bool state_diverged = false; ///< True when subresources mismatch in layout.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -72,12 +72,22 @@ VKFence::VKFence(const VKDevice& device, UniqueFence handle)
 VKFence::~VKFence() = default;

 void VKFence::Wait() {
+    static constexpr u64 timeout = std::numeric_limits<u64>::max();
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+    switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) {
+    case vk::Result::eSuccess:
+        return;
+    case vk::Result::eErrorDeviceLost:
+        device.ReportLoss();
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::waitForFences");
+    }
 }

 void VKFence::Release() {
+    ASSERT(is_owned);
    is_owned = false;
 }

@@ -133,8 +143,32 @@ void VKFence::Unprotect(VKResource* resource) {
    protected_resources.erase(it);
 }

+void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
+    std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
+                 new_resource);
+}
+
 VKFenceWatch::VKFenceWatch() = default;

+VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
+    Watch(initial_fence);
+}
+
+VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
+    fence = std::exchange(rhs.fence, nullptr);
+    if (fence) {
+        fence->RedirectProtection(&rhs, this);
+    }
+}
+
+VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
+    fence = std::exchange(rhs.fence, nullptr);
+    if (fence) {
+        fence->RedirectProtection(&rhs, this);
+    }
+    return *this;
+}
+
 VKFenceWatch::~VKFenceWatch() {
    if (fence) {
        fence->Unprotect(this);
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -65,6 +65,9 @@ public:
    /// Removes protection for a resource.
    void Unprotect(VKResource* resource);

+    /// Redirects one protected resource to a new address.
+    void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
+
    /// Retreives the fence.
    operator vk::Fence() const {
        return *handle;
@@ -97,8 +100,13 @@ private:
 class VKFenceWatch final : public VKResource {
 public:
    explicit VKFenceWatch();
+    VKFenceWatch(VKFence& initial_fence);
+    VKFenceWatch(VKFenceWatch&&) noexcept;
+    VKFenceWatch(const VKFenceWatch&) = delete;
    ~VKFenceWatch() override;

+    VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
+
    /// Waits for the fence to be released.
    void Wait();

@@ -116,6 +124,14 @@ public:

    void OnFenceRemoval(VKFence* signaling_fence) override;

+    /**
+     * Do not use it paired with Watch. Use TryWatch instead.
+     * Returns true when the watch is free.
+     */
+    bool IsUsed() const {
+        return fence != nullptr;
+    }
+
 private:
    VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
 };
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,10 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
        MaxwellToVK::Sampler::Filter(tsc.min_filter),
        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
-        max_anisotropy, tsc.depth_compare_enabled,
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
        unnormalized_coords);
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@

 namespace Vulkan {

-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
-    : device{device}, resource_manager{resource_manager} {
-    next_fence = &resource_manager.CommitFence();
-    AllocateNewContext();
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+                                           const vk::DispatchLoaderDynamic& dld) {
+    auto command = first;
+    while (command != nullptr) {
+        auto next = command->GetNext();
+        command->Execute(cmdbuf, dld);
+        command->~Command();
+        command = next;
+    }
+
+    command_offset = 0;
+    first = nullptr;
+    last = nullptr;
 }

-VKScheduler::~VKScheduler() = default;
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager}, next_fence{
+                                                              &resource_manager.CommitFence()} {
+    AcquireNewChunk();
+    AllocateNewContext();
+    worker_thread = std::thread(&VKScheduler::WorkerThread, this);
+}
+
+VKScheduler::~VKScheduler() {
+    quit = true;
+    cv.notify_all();
+    worker_thread.join();
+}

 void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

 void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
    current_fence->Wait();
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

+void VKScheduler::WaitWorker() {
+    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+    DispatchWork();
+
+    bool finished = false;
+    do {
+        cv.notify_all();
+        std::unique_lock lock{mutex};
+        finished = chunk_queue.Empty();
+    } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+    if (chunk->Empty()) {
+        return;
+    }
+    chunk_queue.Push(std::move(chunk));
+    cv.notify_all();
+    AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+    if (state.renderpass && renderpass_bi == *state.renderpass) {
+        return;
+    }
+    const bool end_renderpass = state.renderpass.has_value();
+    state.renderpass = renderpass_bi;
+    Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+        if (end_renderpass) {
+            cmdbuf.endRenderPass(dld);
+        }
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+    });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+    EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+    if (state.graphics_pipeline == pipeline) {
+        return;
+    }
+    state.graphics_pipeline = pipeline;
+    Record([pipeline](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+    });
+}
+
+void VKScheduler::WorkerThread() {
+    std::unique_lock lock{mutex};
+    do {
+        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+        if (quit) {
+            continue;
+        }
+        auto extracted_chunk = std::move(chunk_queue.Front());
+        chunk_queue.Pop();
+        extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+        chunk_reserve.Push(std::move(extracted_chunk));
+    } while (!quit);
+}
+
 void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    EndPendingOperations();
+    InvalidateState();
+    WaitWorker();
+
+    std::unique_lock lock{mutex};
+
+    const auto queue = device.GetGraphicsQueue();
    const auto& dld = device.GetDispatchLoader();
    current_cmdbuf.end(dld);

-    const auto queue = device.GetGraphicsQueue();
-    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
                                     &semaphore);
-    queue.submit({submit_info}, *current_fence, dld);
+    queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
 }

 void VKScheduler::AllocateNewContext() {
+    std::unique_lock lock{mutex};
    current_fence = next_fence;
-    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
    next_fence = &resource_manager.CommitFence();

-    const auto& dld = device.GetDispatchLoader();
-    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+                         device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+    state.graphics_pipeline = nullptr;
+    state.viewports = false;
+    state.scissors = false;
+    state.depth_bias = false;
+    state.blend_constants = false;
+    state.depth_bounds = false;
+    state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+    EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+    if (!state.renderpass) {
+        return;
+    }
+    state.renderpass = std::nullopt;
+    Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+    if (chunk_reserve.Empty()) {
+        chunk = std::make_unique<CommandChunk>();
+        return;
+    }
+    chunk = std::move(chunk_reserve.Front());
+    chunk_reserve.Pop();
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@

 #pragma once

+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
 #include "common/common_types.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/declarations.h"

 namespace Vulkan {
@@ -30,23 +37,6 @@ private:
    VKFence* const& fence;
 };

-class VKCommandBufferView {
-public:
-    VKCommandBufferView() = default;
-    VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
-
-    const vk::CommandBuffer* operator->() const noexcept {
-        return &cmdbuf;
-    }
-
-    operator vk::CommandBuffer() const noexcept {
-        return cmdbuf;
-    }
-
-private:
-    const vk::CommandBuffer& cmdbuf;
-};
-
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
@@ -54,32 +44,190 @@ public:
    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
    ~VKScheduler();

-    /// Gets a reference to the current fence.
-    VKFenceView GetFence() const {
-        return current_fence;
-    }
-
-    /// Gets a reference to the current command buffer.
-    VKCommandBufferView GetCommandBuffer() const {
-        return current_cmdbuf;
-    }
-
    /// Sends the current execution context to the GPU.
    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);

+    /// Waits for the worker thread to finish executing everything. After this function returns it's
+    /// safe to touch worker resources.
+    void WaitWorker();
+
+    /// Sends currently recorded work to the worker thread.
+    void DispatchWork();
+
+    /// Requests to begin a renderpass.
+    void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+    /// Requests the current executino context to be able to execute operations only allowed outside
+    /// of a renderpass.
+    void RequestOutsideRenderPassOperationContext();
+
+    /// Binds a pipeline to the current execution context.
+    void BindGraphicsPipeline(vk::Pipeline pipeline);
+
+    /// Returns true when viewports have been set in the current command buffer.
+    bool TouchViewports() {
+        return std::exchange(state.viewports, true);
+    }
+
+    /// Returns true when scissors have been set in the current command buffer.
+    bool TouchScissors() {
+        return std::exchange(state.scissors, true);
+    }
+
+    /// Returns true when depth bias have been set in the current command buffer.
+    bool TouchDepthBias() {
+        return std::exchange(state.depth_bias, true);
+    }
+
+    /// Returns true when blend constants have been set in the current command buffer.
+    bool TouchBlendConstants() {
+        return std::exchange(state.blend_constants, true);
+    }
+
+    /// Returns true when depth bounds have been set in the current command buffer.
+    bool TouchDepthBounds() {
+        return std::exchange(state.depth_bounds, true);
+    }
+
+    /// Returns true when stencil values have been set in the current command buffer.
+    bool TouchStencilValues() {
+        return std::exchange(state.stencil_values, true);
+    }
+
+    /// Send work to a separate thread.
+    template <typename T>
+    void Record(T&& command) {
+        if (chunk->Record(command)) {
+            return;
+        }
+        DispatchWork();
+        (void)chunk->Record(command);
+    }
+
+    /// Gets a reference to the current fence.
+    VKFenceView GetFence() const {
+        return current_fence;
+    }
+
 private:
+    class Command {
+    public:
+        virtual ~Command() = default;
+
+        virtual void Execute(vk::CommandBuffer cmdbuf,
+                             const vk::DispatchLoaderDynamic& dld) const = 0;
+
+        Command* GetNext() const {
+            return next;
+        }
+
+        void SetNext(Command* next_) {
+            next = next_;
+        }
+
+    private:
+        Command* next = nullptr;
+    };
+
+    template <typename T>
+    class TypedCommand final : public Command {
+    public:
+        explicit TypedCommand(T&& command) : command{std::move(command)} {}
+        ~TypedCommand() override = default;
+
+        TypedCommand(TypedCommand&&) = delete;
+        TypedCommand& operator=(TypedCommand&&) = delete;
+
+        void Execute(vk::CommandBuffer cmdbuf,
+                     const vk::DispatchLoaderDynamic& dld) const override {
+            command(cmdbuf, dld);
+        }
+
+    private:
+        T command;
+    };
+
+    class CommandChunk final {
+    public:
+        void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+        template <typename T>
+        bool Record(T& command) {
+            using FuncType = TypedCommand<T>;
+            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+            if (command_offset > sizeof(data) - sizeof(FuncType)) {
+                return false;
+            }
+
+            Command* current_last = last;
+
+            last = new (data.data() + command_offset) FuncType(std::move(command));
+
+            if (current_last) {
+                current_last->SetNext(last);
+            } else {
+                first = last;
+            }
+
+            command_offset += sizeof(FuncType);
+            return true;
+        }
+
+        bool Empty() const {
+            return command_offset == 0;
+        }
+
+    private:
+        Command* first = nullptr;
+        Command* last = nullptr;
+
+        std::size_t command_offset = 0;
+        std::array<u8, 0x8000> data{};
+    };
+
+    void WorkerThread();
+
    void SubmitExecution(vk::Semaphore semaphore);

    void AllocateNewContext();

+    void InvalidateState();
+
+    void EndPendingOperations();
+
+    void EndRenderPass();
+
+    void AcquireNewChunk();
+
    const VKDevice& device;
    VKResourceManager& resource_manager;
    vk::CommandBuffer current_cmdbuf;
    VKFence* current_fence = nullptr;
    VKFence* next_fence = nullptr;
+
+    struct State {
+        std::optional<vk::RenderPassBeginInfo> renderpass;
+        vk::Pipeline graphics_pipeline;
+        bool viewports = false;
+        bool scissors = false;
+        bool depth_bias = false;
+        bool blend_constants = false;
+        bool depth_bounds = false;
+        bool stencil_values = false;
+    } state;
+
+    std::unique_ptr<CommandChunk> chunk;
+    std::thread worker_thread;
+
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool quit = false;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -543,7 +543,7 @@ private:
        }

        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
-            if (!IsRenderTargetUsed(rt)) {
+            if (!specialization.enabled_rendertargets[rt]) {
                continue;
            }

@@ -1555,26 +1555,11 @@ private:

    Expression Texture(Operation operation) {
        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(!meta.aoffi.empty());

        const bool can_implicit = stage == ShaderType::Fragment;
        const Id sampler = GetTextureSampler(operation);
        const Id coords = GetCoordinates(operation, Type::Float);

-        if (meta.depth_compare) {
-            // Depth sampling
-            UNIMPLEMENTED_IF(meta.bias);
-            const Id dref = AsFloat(Visit(meta.depth_compare));
-            if (can_implicit) {
-                return {OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, {}),
-                        Type::Float};
-            } else {
-                return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref,
-                                                     spv::ImageOperandsMask::Lod, v_float_zero),
-                        Type::Float};
-            }
-        }
-
        std::vector<Id> operands;
        spv::ImageOperandsMask mask{};
        if (meta.bias) {
@@ -1582,13 +1567,36 @@ private:
            operands.push_back(AsFloat(Visit(meta.bias)));
        }

+        if (!can_implicit) {
+            mask = mask | spv::ImageOperandsMask::Lod;
+            operands.push_back(v_float_zero);
+        }
+
+        if (!meta.aoffi.empty()) {
+            mask = mask | spv::ImageOperandsMask::Offset;
+            operands.push_back(GetOffsetCoordinates(operation));
+        }
+
+        if (meta.depth_compare) {
+            // Depth sampling
+            UNIMPLEMENTED_IF(meta.bias);
+            const Id dref = AsFloat(Visit(meta.depth_compare));
+            if (can_implicit) {
+                return {
+                    OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
+            } else {
+                return {
+                    OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
+            }
+        }
+
        Id texture;
        if (can_implicit) {
            texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
        } else {
-            texture = OpImageSampleExplicitLod(t_float4, sampler, coords,
-                                               mask | spv::ImageOperandsMask::Lod, v_float_zero,
-                                               operands);
+            texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
        }
        return GetTextureElement(operation, texture, Type::Float);
    }
@@ -1601,7 +1609,8 @@ private:
        const Id lod = AsFloat(Visit(meta.lod));

        spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
-        std::vector<Id> operands;
+        std::vector<Id> operands{lod};
+
        if (!meta.aoffi.empty()) {
            mask = mask | spv::ImageOperandsMask::Offset;
            operands.push_back(GetOffsetCoordinates(operation));
@@ -1609,11 +1618,10 @@ private:

        if (meta.sampler.IsShadow()) {
            const Id dref = AsFloat(Visit(meta.depth_compare));
-            return {
-                OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, lod, operands),
-                Type::Float};
+            return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
        }
-        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, lod, operands);
+        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
        return GetTextureElement(operation, texture, Type::Float);
    }

@@ -1722,7 +1730,7 @@ private:
        const std::vector grad = {dx, dy};

        static constexpr auto mask = spv::ImageOperandsMask::Grad;
-        const Id texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, grad);
+        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
        return GetTextureElement(operation, texture, Type::Float);
    }

@@ -1833,7 +1841,7 @@ private:
    }

    void PreExit() {
-        if (stage == ShaderType::Vertex) {
+        if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
            const u32 position_index = out_indices.position.value();
            const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
            const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
@@ -1860,12 +1868,18 @@ private:
            // rendertargets/components are skipped in the register assignment.
            u32 current_reg = 0;
            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+                if (!specialization.enabled_rendertargets[rt]) {
+                    // Skip rendertargets that are not enabled
+                    continue;
+                }
                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
                for (u32 component = 0; component < 4; ++component) {
+                    const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component);
                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                        OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
-                                SafeGetRegister(current_reg));
+                        OpStore(pointer, SafeGetRegister(current_reg));
                        ++current_reg;
+                    } else {
+                        OpStore(pointer, component == 3 ? v_float_one : v_float_zero);
                    }
                }
            }
@@ -1971,6 +1985,18 @@ private:
        return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
    }

+    Expression MemoryBarrierGL(Operation) {
+        const auto scope = spv::Scope::Device;
+        const auto semantics =
+            spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+            spv::MemorySemanticsMask::WorkgroupMemory |
+            spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
+
+        OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
+                        Constant(t_uint, static_cast<u32>(semantics)));
+        return {};
+    }
+
    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
        const Id id = OpVariable(type, storage);
        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
@@ -1983,15 +2009,6 @@ private:
        return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
    }

-    bool IsRenderTargetUsed(u32 rt) const {
-        for (u32 component = 0; component < 4; ++component) {
-            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
    template <typename... Args>
    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
        std::vector<Id> members;
@@ -2374,6 +2391,8 @@ private:

        &SPIRVDecompiler::ThreadId,
        &SPIRVDecompiler::ShuffleIndexed,
+
+        &SPIRVDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

@@ -2538,29 +2557,7 @@ public:
    }

    Id operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        Id target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = decomp.v_false;
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = decomp.v_true;
-                break;
-            default:
-                target = decomp.predicates.at(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
-        } else {
-            UNREACHABLE();
-        }
-
-        return decomp.OpLoad(decomp.t_bool, target);
+        return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
    }

    Id operator()(const ExprVar& expr) {
@@ -2575,7 +2572,7 @@ public:
        const Id target = decomp.Constant(decomp.t_uint, expr.value);
        Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
        gpr = decomp.OpBitcast(decomp.t_uint, gpr);
-        return decomp.OpLogicalEqual(decomp.t_uint, gpr, target);
+        return decomp.OpIEqual(decomp.t_bool, gpr, target);
    }

    Id Visit(const Expr& node) {
@@ -2645,11 +2642,11 @@ public:
        const Id loop_label = decomp.OpLabel();
        const Id endloop_label = decomp.OpLabel();
        const Id loop_start_block = decomp.OpLabel();
-        const Id loop_end_block = decomp.OpLabel();
+        const Id loop_continue_block = decomp.OpLabel();
        current_loop_exit = endloop_label;
        decomp.OpBranch(loop_label);
        decomp.AddLabel(loop_label);
-        decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone);
+        decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
        decomp.OpBranch(loop_start_block);
        decomp.AddLabel(loop_start_block);
        ASTNode current = ast.nodes.GetFirst();
@@ -2657,6 +2654,8 @@ public:
            Visit(current);
            current = current->GetNext();
        }
+        decomp.OpBranch(loop_continue_block);
+        decomp.AddLabel(loop_continue_block);
        ExprDecompiler expr_parser{decomp};
        const Id condition = expr_parser.Visit(ast.condition);
        decomp.OpBranchConditional(condition, loop_label, endloop_label);
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -94,6 +94,7 @@ struct Specialization final {
    Maxwell::PrimitiveTopology primitive_topology{};
    std::optional<float> point_size{};
    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
+    bool ndc_minus_one_to_one{};

    // Tessellation specific
    struct {
@@ -101,6 +102,9 @@ struct Specialization final {
        Maxwell::TessellationSpacing spacing{};
        bool clockwise{};
    } tessellation;
+
+    // Fragment specific
+    std::bitset<8> enabled_rendertargets;
 };
 // Old gcc versions don't consider this trivially copyable.
 // static_assert(std::is_trivially_copyable_v<Specialization>);
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -0,0 +1,127 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+
+namespace Vulkan {
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
+                                                  u64 last_epoch)
+    : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
+    buffer = std::move(rhs.buffer);
+    watch = std::move(rhs.watch);
+    last_epoch = rhs.last_epoch;
+}
+
+VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
+
+VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
+    StagingBuffer&& rhs) noexcept {
+    buffer = std::move(rhs.buffer);
+    watch = std::move(rhs.watch);
+    last_epoch = rhs.last_epoch;
+    return *this;
+}
+
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+                                         VKScheduler& scheduler)
+    : device{device}, memory_manager{memory_manager}, scheduler{scheduler},
+      is_device_integrated{device.IsIntegrated()} {}
+
+VKStagingBufferPool::~VKStagingBufferPool() = default;
+
+VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
+    if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
+        return *buffer;
+    }
+    return CreateStagingBuffer(size, host_visible);
+}
+
+void VKStagingBufferPool::TickFrame() {
+    ++epoch;
+    current_delete_level = (current_delete_level + 1) % NumLevels;
+
+    ReleaseCache(true);
+    if (!is_device_integrated) {
+        ReleaseCache(false);
+    }
+}
+
+VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
+    for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+        if (entry.watch.TryWatch(scheduler.GetFence())) {
+            entry.last_epoch = epoch;
+            return &*entry.buffer;
+        }
+    }
+    return nullptr;
+}
+
+VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
+    const auto usage =
+        vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
+        vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
+    const u32 log2 = Common::Log2Ceil64(size);
+    const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+    const auto dev = device.GetLogical();
+    auto buffer = std::make_unique<VKBuffer>();
+    buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader());
+    buffer->commit = memory_manager.Commit(*buffer->handle, host_visible);
+
+    auto& entries = GetCache(host_visible)[log2].entries;
+    return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+}
+
+VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
+    return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
+}
+
+void VKStagingBufferPool::ReleaseCache(bool host_visible) {
+    auto& cache = GetCache(host_visible);
+    const u64 size = ReleaseLevel(cache, current_delete_level);
+    if (size == 0) {
+        return;
+    }
+}
+
+u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
+    static constexpr u64 epochs_to_destroy = 180;
+    static constexpr std::size_t deletions_per_tick = 16;
+
+    auto& staging = cache[log2];
+    auto& entries = staging.entries;
+    const std::size_t old_size = entries.size();
+
+    const auto is_deleteable = [this](const auto& entry) {
+        return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+    };
+    const std::size_t begin_offset = staging.delete_index;
+    const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
+    const auto begin = std::begin(entries) + begin_offset;
+    const auto end = std::begin(entries) + end_offset;
+    entries.erase(std::remove_if(begin, end, is_deleteable), end);
+
+    const std::size_t new_size = entries.size();
+    staging.delete_index += deletions_per_tick;
+    if (staging.delete_index >= new_size) {
+        staging.delete_index = 0;
+    }
+
+    return (1ULL << log2) * (old_size - new_size);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -0,0 +1,83 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <climits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFenceWatch;
+class VKScheduler;
+
+struct VKBuffer final {
+    UniqueBuffer handle;
+    VKMemoryCommit commit;
+};
+
+class VKStagingBufferPool final {
+public:
+    explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+                                 VKScheduler& scheduler);
+    ~VKStagingBufferPool();
+
+    VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
+
+    void TickFrame();
+
+private:
+    struct StagingBuffer final {
+        explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
+        StagingBuffer(StagingBuffer&& rhs) noexcept;
+        StagingBuffer(const StagingBuffer&) = delete;
+        ~StagingBuffer();
+
+        StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+
+        std::unique_ptr<VKBuffer> buffer;
+        VKFenceWatch watch;
+        u64 last_epoch = 0;
+    };
+
+    struct StagingBuffers final {
+        std::vector<StagingBuffer> entries;
+        std::size_t delete_index = 0;
+    };
+
+    static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
+    using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
+
+    VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
+
+    VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
+
+    StagingBuffersCache& GetCache(bool host_visible);
+
+    void ReleaseCache(bool host_visible);
+
+    u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
+
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    const bool is_device_integrated;
+
+    StagingBuffersCache host_staging_buffers;
+    StagingBuffersCache device_staging_buffers;
+
+    u64 epoch = 0;
+
+    std::size_t current_delete_level = 0;
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    case OpCode::Id::I2F_R:
    case OpCode::Id::I2F_C:
    case OpCode::Id::I2F_IMM: {
-        UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in I2F is not implemented");

-        Node value = [&]() {
+        Node value = [&] {
            switch (opcode->get().GetId()) {
            case OpCode::Id::I2F_R:
                return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
                return Immediate(0);
            }
        }();
+
        const bool input_signed = instr.conversion.is_input_signed;
+
+        if (instr.conversion.src_size == Register::Size::Byte) {
+            const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+            if (offset > 0) {
+                value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+                                        std::move(value), Immediate(offset));
+            }
+        } else {
+            UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+        }
+
        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -22,7 +22,23 @@ using Tegra::Shader::Register;

 namespace {

-u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+    case Tegra::Shader::UniformType::Single:
+        return 1;
+    case Tegra::Shader::UniformType::Double:
+        return 2;
+    case Tegra::Shader::UniformType::Quad:
+    case Tegra::Shader::UniformType::UnsignedQuad:
+        return 4;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+        return 1;
+    }
+}
+
+u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
        return 1;
@@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        const auto [real_address_base, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, false);

-        const u32 count = GetUniformTypeElementsCount(type);
+        const u32 count = GetLdgMemorySize(type);
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
@@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
-            const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
-            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
+            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+
+            if (type == Tegra::Shader::UniformType::UnsignedByte) {
+                // To handle unaligned loads get the byte used to dereferenced global memory
+                // and extract that byte from the loaded uint32.
+                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
+                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
+                                 Immediate(8));
+            }

            SetTemporary(bb, i, gmem);
        }
+
        for (u32 i = 0; i < count; ++i) {
            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
@@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            break;
        }

-        const u32 count = GetUniformTypeElementsCount(type);
+        const u32 count = GetStgMemorySize(type);
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -257,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
        break;
    }
+    case OpCode::Id::MEMBAR: {
+        UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
+        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+        bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+        break;
+    }
    case OpCode::Id::DEPBAR: {
        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
        break;
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -13,37 +13,65 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

+namespace {
+constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7;
+}
+
 u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

-    UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+    UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr);

-    const Node apply_mask = [&]() {
+    const Node apply_mask = [&] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::R2P_IMM:
-            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        case OpCode::Id::P2R_IMM:
+            return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
        default:
            UNREACHABLE();
-            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+            return Immediate(0);
        }
    }();
-    const Node mask = GetRegister(instr.gpr8);
-    const auto offset = static_cast<u32>(instr.r2p.byte) * 8;

-    constexpr u32 programmable_preds = 7;
-    for (u64 pred = 0; pred < programmable_preds; ++pred) {
-        const auto shift = static_cast<u32>(pred);
+    const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;

-        const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
-        const Node condition =
-            Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::R2P_IMM: {
+        const Node mask = GetRegister(instr.gpr8);

-        const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
-        const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+            const auto shift = static_cast<u32>(pred);

-        const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
-        bb.push_back(Conditional(condition, {code}));
+            const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
+            const Node condition =
+                Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
+
+            const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
+            const Node value =
+                Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
+
+            const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
+            bb.push_back(Conditional(condition, {code}));
+        }
+        break;
+    }
+    case OpCode::Id::P2R_IMM: {
+        Node value = Immediate(0);
+        for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) {
+            Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred),
+                                 Immediate(0));
+            value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit));
+        }
+        value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask);
+        value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8);
+
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
+        break;
    }

    return pc;
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -89,56 +89,70 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        [[fallthrough]];
    }
    case OpCode::Id::TLD4: {
-        ASSERT(instr.tld4.array == 0);
        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
                             "NDV is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
-                             "PTP is not implemented");
-
        const auto texture_type = instr.tld4.texture_type.Value();
        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
        const bool is_array = instr.tld4.array != 0;
        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
-        WriteTexInstructionFloat(
-            bb, instr,
-            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
+        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
+                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
+                                             is_ptp, is_bindless));
        break;
    }
    case OpCode::Id::TLD4S: {
-        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
-                             "AOFFI is not implemented");
-
-        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        constexpr std::size_t num_coords = 2;
+        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);

        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
        std::vector<Node> coords;
-        if (depth_compare) {
+        std::vector<Node> aoffi;
+        Node depth_compare;
+        if (is_depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
            coords.push_back(op_a);
            coords.push_back(op_y);
-            coords.push_back(op_b);
+            if (is_aoffi) {
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+                depth_compare = GetRegister(instr.gpr20.Value() + 1);
+            } else {
+                depth_compare = op_b;
+            }
        } else {
+            // There's no depth compare
            coords.push_back(op_a);
-            coords.push_back(op_b);
+            if (is_aoffi) {
+                coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+            } else {
+                coords.push_back(op_b);
+            }
        }
        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));

-        const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
-        const auto& sampler = GetSampler(instr.sampler, info);
+        const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
+        const Sampler& sampler = *GetSampler(instr.sampler, info);

        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
            auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

-        WriteTexsInstructionFloat(bb, instr, values, true);
+        if (instr.tld4s.fp16_flag) {
+            WriteTexsInstructionHalfFloat(bb, instr, values, true);
+        } else {
+            WriteTexsInstructionFloat(bb, instr, values, true);
+        }
        break;
    }
    case OpCode::Id::TXD_B:
@@ -154,9 +168,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        const auto texture_type = instr.txd.texture_type.Value();
        const auto coord_count = GetCoordCount(texture_type);

-        const auto& sampler = is_bindless
-                                  ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
-                                  : GetSampler(instr.sampler, {{texture_type, false, false}});
+        const Sampler* sampler = is_bindless
+                                     ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
+                                     : GetSampler(instr.sampler, {{texture_type, false, false}});
+        Node4 values;
+        if (sampler == nullptr) {
+            for (u32 element = 0; element < values.size(); ++element) {
+                values[element] = Immediate(0);
+            }
+            WriteTexInstructionFloat(bb, instr, values);
+            break;
+        }
        if (is_bindless) {
            base_reg++;
        }
@@ -170,9 +192,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
            derivates.push_back(GetRegister(derivate_reg + derivate + 1));
        }

-        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
        }

@@ -187,9 +208,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        // TODO: The new commits on the texture refactor, change the way samplers work.
        // Sadly, not all texture instructions specify the type of texture their sampler
        // uses. This must be fixed at a later instance.
-        const auto& sampler =
+        const Sampler* sampler =
            is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);

+        if (sampler == nullptr) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 4; ++element) {
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
+                }
+                const Node value = Immediate(0);
+                SetTemporary(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+
        u32 indexer = 0;
        switch (instr.txq.query_type) {
        case Tegra::Shader::TextureQueryType::Dimension: {
@@ -197,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
-                MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta,
                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -223,9 +259,24 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {

        auto texture_type = instr.tmml.texture_type.Value();
        const bool is_array = instr.tmml.array != 0;
-        const auto& sampler =
+        const Sampler* sampler =
            is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);

+        if (sampler == nullptr) {
+            u32 indexer = 0;
+            for (u32 element = 0; element < 2; ++element) {
+                if (!instr.tmml.IsComponentEnabled(element)) {
+                    continue;
+                }
+                const Node value = Immediate(0);
+                SetTemporary(bb, indexer++, value);
+            }
+            for (u32 i = 0; i < indexer; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+            }
+            break;
+        }
+
        std::vector<Node> coords;

        // TODO: Add coordinates for different samplers once other texture types are implemented.
@@ -251,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                continue;
            }
            auto params = coords;
-            MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporary(bb, indexer++, value);
        }
@@ -307,7 +358,7 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
                       sampler->is_buffer != 0};
 }

-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
                                    std::optional<SamplerInfo> sampler_info) {
    const auto offset = static_cast<u32>(sampler.index.Value());
    const auto info = GetSamplerInfo(sampler_info, offset);
@@ -319,21 +370,24 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
    if (it != used_samplers.end()) {
        ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
               it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
-        return *it;
+        return &*it;
    }

    // Otherwise create a new mapping for this sampler
    const auto next_index = static_cast<u32>(used_samplers.size());
-    return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
-                                      info.is_buffer);
+    return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
+                                       info.is_buffer);
 }

-const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
                                            std::optional<SamplerInfo> sampler_info) {
    const Node sampler_register = GetRegister(reg);
    const auto [base_sampler, buffer, offset] =
        TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
    ASSERT(base_sampler != nullptr);
+    if (base_sampler == nullptr) {
+        return nullptr;
+    }

    const auto info = GetSamplerInfo(sampler_info, offset, buffer);

@@ -346,13 +400,13 @@ const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
    if (it != used_samplers.end()) {
        ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
               it->IsShadow() == info.is_shadow);
-        return *it;
+        return &*it;
    }

    // Otherwise create a new mapping for this sampler
    const auto next_index = static_cast<u32>(used_samplers.size());
-    return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                      info.is_shadow, info.is_buffer);
+    return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+                                       info.is_shadow, info.is_buffer);
 }

 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -395,14 +449,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const
 }

 void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
-                                             const Node4& components) {
+                                             const Node4& components, bool ignore_mask) {
    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
    // float instruction).

    Node4 values;
    u32 dest_elem = 0;
    for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
            continue;
        values[dest_elem++] = components[component];
    }
@@ -438,8 +492,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                         "This method is not supported.");

    const SamplerInfo info{texture_type, is_array, is_shadow, false};
-    const auto& sampler =
+    const Sampler* sampler =
        is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+    Node4 values;
+    if (sampler == nullptr) {
+        for (u32 element = 0; element < values.size(); ++element) {
+            values[element] = Immediate(0);
+        }
+        return values;
+    }

    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
@@ -478,10 +539,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
        }
    }

-    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto copy_coords = coords;
-        MetaTexture meta{sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
+        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

@@ -578,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 }

 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array, bool is_aoffi, bool is_bindless) {
+                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
+    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
+
    const std::size_t coord_count = GetCoordCount(texture_type);

    // If enabled arrays index is always stored in the gpr8 field
@@ -594,15 +656,25 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    u64 parameter_register = instr.gpr20.Value();

    const SamplerInfo info{texture_type, is_array, depth_compare, false};
-    const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
-                                      : GetSampler(instr.sampler, info);
-
-    std::vector<Node> aoffi;
-    if (is_aoffi) {
-        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+                                         : GetSampler(instr.sampler, info);
+    Node4 values;
+    if (sampler == nullptr) {
+        for (u32 element = 0; element < values.size(); ++element) {
+            values[element] = Immediate(0);
+        }
+        return values;
    }

-    Node dc{};
+    std::vector<Node> aoffi, ptp;
+    if (is_aoffi) {
+        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+    } else if (is_ptp) {
+        ptp = GetPtpCoordinates(
+            {GetRegister(parameter_register++), GetRegister(parameter_register++)});
+    }
+
+    Node dc;
    if (depth_compare) {
        dc = GetRegister(parameter_register++);
    }
@@ -610,11 +682,10 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
                                       : Immediate(static_cast<u32>(instr.tld4.component));

-    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
-                         element};
+        MetaTexture meta{
+            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }

@@ -642,12 +713,12 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
    // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
    // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};

-    const auto& sampler = GetSampler(instr.sampler);
+    const auto& sampler = *GetSampler(instr.sampler);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }

@@ -655,7 +726,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
 }

 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
-    const auto& sampler = GetSampler(instr.sampler);
+    const Sampler& sampler = *GetSampler(instr.sampler);

    const std::size_t type_coord_count = GetCoordCount(texture_type);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
@@ -680,19 +751,24 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
    // When lod is used always is in gpr20
    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);

-    // Fill empty entries from the guest sampler.
+    // Fill empty entries from the guest sampler
    const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
    if (type_coord_count != entry_coord_count) {
        LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
-    }
-    for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
-        coords.push_back(GetRegister(Register::ZeroIndex));
+
+        // When the size is higher we insert zeroes
+        for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+            coords.push_back(GetRegister(Register::ZeroIndex));
+        }
+
+        // Then we ensure the size matches the number of entries (dropping unused values)
+        coords.resize(entry_coord_count);
    }

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
@@ -757,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
    return aoffi;
 }

+std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
+    static constexpr u32 num_entries = 8;
+
+    std::vector<Node> ptp;
+    ptp.reserve(num_entries);
+
+    const auto global_size = static_cast<s64>(global_code.size());
+    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
+    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
+    if (!low || !high) {
+        for (u32 entry = 0; entry < num_entries; ++entry) {
+            const u32 reg = entry / 4;
+            const u32 offset = entry % 4;
+            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
+            const Node condition =
+                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
+            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
+            ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
+        }
+        return ptp;
+    }
+
+    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
+    for (u32 entry = 0; entry < num_entries; ++entry) {
+        s32 value = (immediate >> (entry * 8)) & 0b111111;
+        if (value >= 32) {
+            value -= 64;
+        }
+        ptp.push_back(Immediate(value));
+    }
+
+    return ptp;
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -189,6 +189,8 @@ enum class OperationCode {
    ThreadId,       /// () -> uint
    ShuffleIndexed, /// (uint value, uint index) -> uint

+    MemoryBarrierGL, /// () -> void
+
    Amount,
 };

@@ -372,6 +374,7 @@ struct MetaTexture {
    Node array;
    Node depth_compare;
    std::vector<Node> aoffi;
+    std::vector<Node> ptp;
    std::vector<Node> derivates;
    Node bias;
    Node lod;
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -313,11 +313,11 @@ private:
                               std::optional<u32> buffer = std::nullopt);

    /// Accesses a texture sampler
-    const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
+    const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler,
                              std::optional<SamplerInfo> sampler_info = std::nullopt);

    /// Accesses a texture sampler for a bindless texture.
-    const Sampler& GetBindlessSampler(Tegra::Shader::Register reg,
+    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
                                      std::optional<SamplerInfo> sampler_info = std::nullopt);

    /// Accesses an image.
@@ -338,7 +338,7 @@ private:
    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                   const Node4& components, bool ignore_mask = false);
    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                       const Node4& components);
+                                       const Node4& components, bool ignore_mask = false);

    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -350,7 +350,8 @@ private:
                      bool is_array);

    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
+                      bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
+                      bool is_bindless);

    Node4 GetTldCode(Tegra::Shader::Instruction instr);

@@ -363,6 +364,8 @@ private:

    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);

+    std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
+
    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const {
    }
 }

+u32 SurfaceParams::GetBlockSize() const {
+    const u32 x = 64U << block_width;
+    const u32 y = 8U << block_height;
+    const u32 z = 1U << block_depth;
+    return x * y * z;
+}
+
+std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
+    const u32 x_pixels = 64U / GetBytesPerPixel();
+    const u32 x = x_pixels << block_width;
+    const u32 y = 8U << block_height;
+    return {x, y};
+}
+
+std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
+    const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
+    const u32 block_size = GetBlockSize();
+    const u32 block_index = offset / block_size;
+    const u32 gob_offset = offset % block_size;
+    const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
+    const u32 x_gob_pixels = 64U / GetBytesPerPixel();
+    const u32 x_block_pixels = x_gob_pixels << block_width;
+    const u32 y_block_pixels = 8U << block_height;
+    const u32 z_block_pixels = 1U << block_depth;
+    const u32 x_blocks = div_ceil(width, x_block_pixels);
+    const u32 y_blocks = div_ceil(height, y_block_pixels);
+    const u32 z_blocks = div_ceil(depth, z_block_pixels);
+    const u32 base_x = block_index % x_blocks;
+    const u32 base_y = (block_index / x_blocks) % y_blocks;
+    const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
+    u32 x = base_x * x_block_pixels;
+    u32 y = base_y * y_block_pixels;
+    u32 z = base_z * z_block_pixels;
+    z += gob_index >> block_height;
+    y += (gob_index * 8U) % y_block_pixels;
+    return {x, y, z};
+}
+
 } // namespace VideoCommon
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <utility>
+
 #include "common/alignment.h"
 #include "common/bit_util.h"
 #include "common/cityhash.h"
@@ -136,6 +138,15 @@ public:

    std::size_t GetConvertedMipmapSize(u32 level) const;

+    /// Get this texture Tegra Block size in guest memory layout
+    u32 GetBlockSize() const;
+
+    /// Get X, Y coordinates max sizes of a single block.
+    std::pair<u32, u32> GetBlockXY() const;
+
+    /// Get the offset in x, y, z coordinates from a memory offset
+    std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
+
    /// Returns the size of a layer in bytes in guest memory.
    std::size_t GetGuestLayerSize() const {
        return GetLayerSize(false, false);
@@ -269,7 +280,8 @@ private:

    /// Returns the size of all mipmap levels and aligns as needed.
    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
-        return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
+        return GetLayerSize(as_host_size, uncompressed) *
+               (layer_only ? 1U : (is_layered ? depth : 1U));
    }

    /// Returns the size of a layer
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -615,6 +615,86 @@ private:
        return {{new_surface, new_surface->GetMainView()}};
    }

+    /**
+     * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
+     * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
+     * the HLE methods.
+     *
+     * @param overlaps          The overlapping surfaces registered in the cache.
+     * @param params            The parameters on the new surface.
+     * @param gpu_addr          The starting address of the new surface.
+     * @param cache_addr        The starting address of the new surface on physical memory.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or
+     *                          left blank.
+     */
+    std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
+                                                               const SurfaceParams& params,
+                                                               const GPUVAddr gpu_addr,
+                                                               const CacheAddr cache_addr,
+                                                               bool preserve_contents) {
+        if (params.target == SurfaceTarget::Texture3D) {
+            bool failed = false;
+            if (params.num_levels > 1) {
+                // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
+                return std::nullopt;
+            }
+            TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+            bool modified = false;
+            for (auto& surface : overlaps) {
+                const SurfaceParams& src_params = surface->GetSurfaceParams();
+                if (src_params.target != SurfaceTarget::Texture2D) {
+                    failed = true;
+                    break;
+                }
+                if (src_params.height != params.height) {
+                    failed = true;
+                    break;
+                }
+                if (src_params.block_depth != params.block_depth ||
+                    src_params.block_height != params.block_height) {
+                    failed = true;
+                    break;
+                }
+                const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
+                const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
+                modified |= surface->IsModified();
+                const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
+                                             1);
+                ImageCopy(surface, new_surface, copy_params);
+            }
+            if (failed) {
+                return std::nullopt;
+            }
+            for (const auto& surface : overlaps) {
+                Unregister(surface);
+            }
+            new_surface->MarkAsModified(modified, Tick());
+            Register(new_surface);
+            auto view = new_surface->GetMainView();
+            return {{std::move(new_surface), view}};
+        } else {
+            for (const auto& surface : overlaps) {
+                if (!surface->MatchTarget(params.target)) {
+                    if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
+                        if (Settings::values.use_accurate_gpu_emulation) {
+                            return std::nullopt;
+                        }
+                        Unregister(surface);
+                        return InitializeSurface(gpu_addr, params, preserve_contents);
+                    }
+                    return std::nullopt;
+                }
+                if (surface->GetCacheAddr() != cache_addr) {
+                    continue;
+                }
+                if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
+                    return {{surface, surface->GetMainView()}};
+                }
+            }
+            return InitializeSurface(gpu_addr, params, preserve_contents);
+        }
+    }
+
    /**
     * Gets the starting address and parameters of a candidate surface and tries
     * to find a matching surface within the cache. This is done in 3 big steps:
@@ -687,6 +767,15 @@ private:
            }
        }

+        // Check if it's a 3D texture
+        if (params.block_depth > 0) {
+            auto surface =
+                Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
+            if (surface) {
+                return *surface;
+            }
+        }
+
        // Split cases between 1 overlap or many.
        if (overlaps.size() == 1) {
            TSurface current_surface = overlaps[0];
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -12,6 +12,10 @@ namespace Tegra::Texture {

 // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
 // an small rect of (64/bytes_per_pixel)X8.
+inline std::size_t GetGOBSize() {
+    return 512;
+}
+
 inline std::size_t GetGOBSizeShift() {
    return 9;
 }
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -236,6 +236,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
        widget->setVisible(false);

    analog_map_stick = {ui->buttonLStickAnalog, ui->buttonRStickAnalog};
+    analog_map_deadzone = {ui->sliderLStickDeadzone, ui->sliderRStickDeadzone};
+    analog_map_deadzone_label = {ui->labelLStickDeadzone, ui->labelRStickDeadzone};

    for (int button_id = 0; button_id < Settings::NativeButton::NumButtons; button_id++) {
        auto* const button = button_map[button_id];
@@ -326,6 +328,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
                    InputCommon::Polling::DeviceType::Analog);
            }
        });
+        connect(analog_map_deadzone[analog_id], &QSlider::valueChanged, [=] {
+            const float deadzone = analog_map_deadzone[analog_id]->value() / 100.0f;
+            analog_map_deadzone_label[analog_id]->setText(tr("Deadzone: %1").arg(deadzone));
+            analogs_param[analog_id].Set("deadzone", deadzone);
+        });
    }

    connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
@@ -484,7 +491,7 @@ void ConfigureInputPlayer::ClearAll() {
                continue;
            }

-            analogs_param[analog_id].Erase(analog_sub_buttons[sub_button_id]);
+            analogs_param[analog_id].Clear();
        }
    }

@@ -508,6 +515,23 @@ void ConfigureInputPlayer::UpdateButtonLabels() {
                AnalogToText(analogs_param[analog_id], analog_sub_buttons[sub_button_id]));
        }
        analog_map_stick[analog_id]->setText(tr("Set Analog Stick"));
+
+        auto& param = analogs_param[analog_id];
+        auto* const analog_deadzone_slider = analog_map_deadzone[analog_id];
+        auto* const analog_deadzone_label = analog_map_deadzone_label[analog_id];
+
+        if (param.Has("engine") && param.Get("engine", "") == "sdl") {
+            if (!param.Has("deadzone")) {
+                param.Set("deadzone", 0.1f);
+            }
+
+            analog_deadzone_slider->setValue(static_cast<int>(param.Get("deadzone", 0.1f) * 100));
+            analog_deadzone_slider->setVisible(true);
+            analog_deadzone_label->setVisible(true);
+        } else {
+            analog_deadzone_slider->setVisible(false);
+            analog_deadzone_label->setVisible(false);
+        }
    }
 }

--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -97,6 +97,8 @@ private:
    /// Analog inputs are also represented each with a single button, used to configure with an
    /// actual analog stick
    std::array<QPushButton*, Settings::NativeAnalog::NumAnalogs> analog_map_stick;
+    std::array<QSlider*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone;
+    std::array<QLabel*, Settings::NativeAnalog::NumAnalogs> analog_map_deadzone_label;

    static const std::array<std::string, ANALOG_SUB_BUTTONS_NUM> analog_sub_buttons;

--- a/src/yuzu/configuration/configure_input_player.ui
+++ b/src/yuzu/configuration/configure_input_player.ui
@@ -170,6 +170,44 @@
          </item>
         </layout>
        </item>
+        <item row="4" column="0" colspan="2">
+         <layout class="QVBoxLayout" name="sliderRStickDeadzoneVerticalLayout">
+          <item>
+           <layout class="QHBoxLayout" name="sliderRStickDeadzoneHorizontalLayout">
+            <item>
+             <widget class="QLabel" name="labelRStickDeadzone">
+              <property name="text">
+               <string>Deadzone: 0</string>
+              </property>
+              <property name="alignment">
+               <enum>Qt::AlignHCenter</enum>
+              </property>
+             </widget>
+            </item>
+           </layout>
+          </item>
+          <item>
+           <widget class="QSlider" name="sliderRStickDeadzone">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+        <item row="5" column="0">
+         <spacer name="RStick_verticalSpacer">
+          <property name="orientation">
+           <enum>Qt::Vertical</enum>
+          </property>
+          <property name="sizeHint" stdset="0">
+           <size>
+            <width>0</width>
+            <height>0</height>
+           </size>
+          </property>
+         </spacer>
+        </item>
       </layout>
      </widget>
     </item>
@@ -745,6 +783,47 @@
          </item>
         </layout>
        </item>
+        <item row="5" column="1" colspan="2">
+         <layout class="QVBoxLayout" name="sliderLStickDeadzoneVerticalLayout">
+          <property name="sizeConstraint">
+           <enum>QLayout::SetDefaultConstraint</enum>
+          </property>
+          <item>
+           <layout class="QHBoxLayout" name="sliderLStickDeadzoneHorizontalLayout">
+            <item>
+             <widget class="QLabel" name="labelLStickDeadzone">
+              <property name="text">
+               <string>Deadzone: 0</string>
+              </property>
+              <property name="alignment">
+               <enum>Qt::AlignHCenter</enum>
+              </property>
+             </widget>
+            </item>
+           </layout>
+          </item>
+          <item>
+           <widget class="QSlider" name="sliderLStickDeadzone">
+            <property name="orientation">
+             <enum>Qt::Horizontal</enum>
+            </property>
+           </widget>
+          </item>
+         </layout>
+        </item>
+        <item row="6" column="1">
+         <spacer name="LStick_verticalSpacer">
+          <property name="orientation">
+           <enum>Qt::Vertical</enum>
+          </property>
+          <property name="sizeHint" stdset="0">
+           <size>
+            <width>0</width>
+            <height>0</height>
+           </size>
+          </property>
+         </spacer>
+        </item>
       </layout>
      </widget>
     </item>
Author	SHA1	Message	Date
CJBok	2fa9a96309	const correction	2020-01-03 10:30:51 +01:00
CJBok	90f9c830ca	clang	2020-01-03 09:31:54 +01:00
CJBok	351e3fb72e	Update configure_input_player.cpp	2020-01-03 09:11:34 +01:00
CJBok	4a566b9828	Added deadzone controls for sdl engine at input settings	2020-01-03 08:54:57 +01:00
bunnei	ae0e481677	Merge pull request #3243 from ReinUsesLisp/topologies maxwell_to_gl: Implement missing primitive topologies	2020-01-01 20:33:33 -05:00
bunnei	028b2718ed	Merge pull request #3239 from ReinUsesLisp/p2r shader/p2r: Implement P2R Pr	2019-12-31 20:37:16 -05:00
Fernando Sahmkow	7bd447355f	Merge pull request #3248 from ReinUsesLisp/vk-image vk_image: Add an image object abstraction	2019-12-30 14:25:14 -04:00
Rodrigo Locatti	4cbb363d3f	vk_image: Avoid unnecesary equals	2019-12-30 13:28:23 -03:00
Fernando Sahmkow	287d5921cf	Merge pull request #3249 from ReinUsesLisp/vk-staging-buffer-pool vk_staging_buffer_pool: Add a staging pool for temporary operations	2019-12-30 12:25:59 -04:00
Rodrigo Locatti	f2c61bbe13	vk_staging_buffer_pool: Initialize last epoch to zero	2019-12-29 19:19:43 -03:00
Fernando Sahmkow	f846e3d6d0	Merge pull request #3250 from ReinUsesLisp/empty-fragment gl_rasterizer: Allow rendering without fragment shader	2019-12-28 14:33:53 -04:00
bunnei	8a76f816a4	Merge pull request #3228 from ReinUsesLisp/ptp shader/texture: Implement AOFFI and PTP for TLD4 and TLD4S	2019-12-26 21:43:44 -05:00
ReinUsesLisp	5b989f189f	gl_rasterizer: Allow rendering without fragment shader Rendering without a fragment shader is usually used in depth-only passes.	2019-12-26 16:38:49 -03:00
ReinUsesLisp	3813af2f3c	vk_staging_buffer_pool: Add a staging pool for temporary operations The job of this abstraction is to provide staging buffers for temporary operations. Think of image uploads or buffer uploads to device memory. It automatically deletes unused buffers.	2019-12-25 18:12:17 -03:00
ReinUsesLisp	c83bf7cd1e	vk_image: Add an image object abstraction This object's job is to contain an image and manage its transitions. Since Nvidia hardware doesn't know what a transition is but Vulkan requires them anyway, we have to state track image subresources individually. To avoid the overhead of tracking each subresource in images with many subresources (think of cubemap arrays with several mipmaps), this commit tracks when subresources have diverged. As long as this doesn't happen we can check the state of the first subresource (that will be shared with all subresources) and update accordingly. Image transitions are deferred to the scheduler command buffer.	2019-12-25 18:00:16 -03:00
Fernando Sahmkow	5619d24377	Merge pull request #3244 from ReinUsesLisp/vk-fps fixed_pipeline_state: Define structure and loaders	2019-12-25 14:31:29 -04:00
bunnei	4af569ee47	Merge pull request #3236 from ReinUsesLisp/rasterize-enable gl_rasterizer: Implement RASTERIZE_ENABLE	2019-12-24 22:54:10 -05:00
ReinUsesLisp	b9e3f5eb36	fixed_pipeline_state: Define symetric operator!= and mark as noexcept Marks as noexcept Hash, operator== and operator!= for consistency.	2019-12-24 18:24:08 -03:00
ReinUsesLisp	4a3026b16b	fixed_pipeline_state: Define structure and loaders The intention behind this hasheable structure is to describe the state of fixed function pipeline state that gets compiled to a single graphics pipeline state object. This is all dynamic state in OpenGL but Vulkan wants it in an immutable state, even if hardware can edit it freely. In this commit the structure is defined in an optimized state (it uses booleans, has paddings and many data entries that can be packed to single integers). This is intentional as an initial implementation that is easier to debug, implement and review. It will be optimized in later stages, or it might change if Vulkan gets more dynamic states.	2019-12-22 22:59:11 -03:00
ReinUsesLisp	5770418fb3	maxwell_3d: Add depth bounds registers	2019-12-22 22:55:06 -03:00
ReinUsesLisp	91d35559e5	maxwell_to_gl: Implement missing primitive topologies Many of these topologies are exclusively available in OpenGL.	2019-12-22 22:33:01 -03:00
bunnei	e976d0e924	Merge pull request #3241 from ReinUsesLisp/gl-shader-cache gl_shader_cache: Style changes	2019-12-22 16:23:46 -05:00
bunnei	1e76655f83	Merge pull request #3238 from ReinUsesLisp/vk-resource-manager vk_resource_manager: Catch device losses and other changes	2019-12-22 15:57:16 -05:00
bunnei	0f3ac9cfeb	Merge pull request #3203 from FernandoS27/tex-cache-fixes Texture Cache: Add HLE methods for building 3D textures	2019-12-22 14:25:13 -05:00
Fernando Sahmkow	3dc585d011	Merge pull request #3237 from ReinUsesLisp/vk-shader-decompiler vk_shader_decompiler: Misc changes	2019-12-22 12:36:56 -04:00
Fernando Sahmkow	218ee18417	Texture Cache: Improve documentation	2019-12-22 12:29:23 -04:00
Fernando Sahmkow	a3916588b6	Texture Cache: Address Feedback	2019-12-22 12:24:34 -04:00
Fernando Sahmkow	51c9e98677	Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios. This commit adds a series of HLE methods for handling 3D textures in general. This helps games that generate 3D textures on every frame and may reduce loading times for certain games.	2019-12-22 12:24:34 -04:00
Fernando Sahmkow	aea978e037	Merge pull request #3230 from ReinUsesLisp/vk-emu-shaders renderer_vulkan/shader: Add helper GLSL shaders	2019-12-22 11:23:09 -04:00
Fernando Sahmkow	27efcc15e9	Merge pull request #3240 from ReinUsesLisp/decomp-cond-code vk_shader_decompiler: Use Visit instead of reimplementing it	2019-12-22 11:20:55 -04:00
bunnei	16dcfacbfc	Merge pull request #3235 from ReinUsesLisp/ldg-u8 shader/memory: Implement LDG.U8 and unaligned U8 loads	2019-12-21 22:50:28 -05:00
ReinUsesLisp	1e16023d60	gl_shader_cache: Update commentary for shared memory Remove false commentary. Not dividing by 4 the size of shared memory is not a hack; it describes the number of integers, not bytes. While we are at it sort the generated code to put preprocessor lines on the top.	2019-12-20 22:51:21 -03:00
ReinUsesLisp	486c6a5316	gl_shader_cache: Remove unused entry in GetPrimitiveDescription	2019-12-20 22:49:30 -03:00
ReinUsesLisp	af93909c9c	vk_shader_decompiler: Use Visit instead of reimplementing it ExprCondCode visit implements the generic Visit. Use this instead of that one. As an intended side effect this fixes unwritten memory usages in cases when a negation of a condition code is used.	2019-12-20 21:36:25 -03:00
ReinUsesLisp	38d3a48873	shader/p2r: Implement P2R Pr P2R dumps predicate or condition codes state to a register. This is useful for unit testing.	2019-12-20 18:02:41 -03:00
ReinUsesLisp	cf27b59493	shader/r2p: Refactor P2R to support P2R	2019-12-20 17:55:42 -03:00
bunnei	7be65c6a68	Merge pull request #3234 from ReinUsesLisp/i2f-u8-selector shader/conversion: Implement byte selector in I2F	2019-12-19 22:36:26 -05:00
bunnei	6d55b14cc0	Merge pull request #3233 from ReinUsesLisp/mismatch-sizes shader/texture: Properly shrink unused entries in size mismatches	2019-12-19 20:40:27 -05:00
ReinUsesLisp	e41da22c8d	vk_resource_manager: Add entry to VKFence to test its usage	2019-12-19 16:31:34 -03:00
ReinUsesLisp	ec983a2451	vk_reosurce_manager: Add assert for releasing fences Notify the programmer when a request to release a fence is invalid because the fence is already free.	2019-12-19 16:31:34 -03:00
ReinUsesLisp	6ddffa010a	vk_resource_manager: Implement VKFenceWatch move constructor This allows us to put VKFenceWatch inside a std::vector without storing it in heap. On move we have to signal the fences where the new protected resource is, adding some overhead.	2019-12-19 16:31:34 -03:00
ReinUsesLisp	54747d60bc	vk_device: Add entry to catch device losses VK_NV_device_diagnostic_checkpoints allows us to push data to a Vulkan queue and then query it even after a device loss. This allows us to push the current pipeline object and see what was the call that killed the device.	2019-12-19 16:31:33 -03:00
ReinUsesLisp	2a63b3bdb9	vk_shader_decompiler: Fix full decompilation When full decompilation was enabled, labels were not being inserted and instructions were misused. Fix these bugs.	2019-12-19 16:24:45 -03:00
ReinUsesLisp	de918ebeb0	vk_shader_decompiler: Skip NDC correction when it is native Avoid changing gl_Position when the NDC used by the game is [0, 1] (Vulkan's native).	2019-12-19 16:24:45 -03:00
ReinUsesLisp	485c21eac3	vk_shader_decompiler: Normalize output fragment attachments Some games write from fragment shaders to an unexistant framebuffer attachment or they don't write to one when it exists in the framebuffer. Fix this by skipping writes or adding zeroes.	2019-12-19 16:24:45 -03:00
bunnei	1eb4a95d2b	Merge pull request #3232 from ReinUsesLisp/gl-decompiler-images gl_shader_decompiler: Add missing DeclareImages	2019-12-19 11:32:47 -05:00
bunnei	253aa52351	Merge pull request #3231 from ReinUsesLisp/tld4s-encoding shader_bytecode: Fix TLD4S encoding	2019-12-19 11:32:25 -05:00
ReinUsesLisp	f4a25f854c	vk_device: Add query for RGBA8Uint	2019-12-19 02:08:29 -03:00
ReinUsesLisp	abb33d4aec	vk_shader_decompiler: Update sirit and implement Texture AOFFI	2019-12-19 01:42:13 -03:00
bunnei	d53cf05513	Merge pull request #3221 from ReinUsesLisp/vk-scheduler vk_scheduler: Delegate commands to a worker thread and state track	2019-12-18 22:04:08 -05:00
ReinUsesLisp	da0aa4da6b	gl_rasterizer: Implement RASTERIZE_ENABLE RASTERIZE_ENABLE is the opposite of GL_RASTERIZER_DISCARD. Implement it naturally using this. NVN games expect rasterize to be enabled by default, reflect that in our initial GPU state.	2019-12-18 19:28:23 -03:00
ReinUsesLisp	ae8d4b6c0c	shader/memory: Implement LDG.U8 and unaligned U8 loads LDG can load single bytes instead of full integers or packs of integers. These have the advantage of loading bytes that are not aligned to 4 bytes. To emulate these this commit gets the byte being referenced (by doing "address & 3" and then using that to extract the byte from the loaded integer: result = bitfieldExtract(loaded_integer, (address % 4) * 8, 8)	2019-12-18 01:21:46 -03:00
ReinUsesLisp	a7d6bd1ef1	shader/conversion: Implement byte selector in I2F I2F's byte selector is used to choose what bytes to convert to float. e.g. if the input is 0xaabbccdd and the selector is ".B3" it will convert 0xaa. The default (when it's not shown in nvdisasm) is ".B0", in that example the default would convert 0xdd to float.	2019-12-18 00:41:22 -03:00
bunnei	c053269017	Merge pull request #3227 from amilajack/patch-1 delete appveyor config	2019-12-17 21:49:22 -05:00
ReinUsesLisp	15a753b9a5	shader/texture: Properly shrink unused entries in size mismatches When a image format mismatches we were inserting zeroes to the texture itself. This was not handling cases were the mismatch uses less coordinates than the guest shader code. Address that by resizing the vector.	2019-12-17 23:38:10 -03:00
ReinUsesLisp	e438079b50	gl_shader_decompiler: Add missing DeclareImages	2019-12-17 23:34:15 -03:00
ReinUsesLisp	8b26b4228b	shader_bytecode: Fix TLD4S encoding	2019-12-17 23:32:10 -03:00
bunnei	8825b88a45	Merge pull request #3173 from yuzu-emu/bunnei-spscqueue common: SPSCQueue: Notify after incrementing queue size.	2019-12-17 14:11:20 -05:00
Amila Welihinda	8a23c32cf0	delete .appeveyor dir	2019-12-17 00:20:34 -08:00
bunnei	67b8ecc73e	common: SPSCQueue: Notify after incrementing queue size.	2019-12-16 20:39:53 -05:00
ReinUsesLisp	b52297767e	renderer_vulkan/shader: Add helper GLSL shaders These shaders are used to specify code that is not dynamically generated in the Vulkan backend. Instead of packing it inside the build system, it's manually built and copied to the C++ file to avoid adding unnecessary build time dependencies. quad_array should be dropped in the future since it can be emulated with a memory pool generated from the CPU.	2019-12-16 17:59:08 -03:00
bunnei	65b1b05e05	Merge pull request #3182 from ReinUsesLisp/renderer-opengl renderer_opengl: Miscellaneous clean ups	2019-12-16 13:01:04 -05:00
ReinUsesLisp	e09c1fbc1f	shader/texture: Implement TLD4.PTP	2019-12-16 04:09:24 -03:00
ReinUsesLisp	844e4a297b	shader/texture: Enable arrayed TLD4	2019-12-16 02:37:21 -03:00
ReinUsesLisp	a87c85eba2	gl_shader_decompiler: Rename "sepparate" to "separate"	2019-12-16 02:12:51 -03:00
ReinUsesLisp	3d2c44848b	shader/texture: Implement AOFFI for TLD4S	2019-12-16 02:06:42 -03:00
ReinUsesLisp	3d9fff82c0	shader/texture: Remove unnecesary parenthesis	2019-12-16 01:52:33 -03:00
Rodrigo Locatti	eac075692b	Merge pull request #3219 from FernandoS27/fix-bindless Corrections and fixes to TLD4S & bindless samplers failing	2019-12-16 01:26:11 -03:00
Amila Welihinda	0471eb6dc7	delete appveyor config	2019-12-15 11:16:39 -08:00
bunnei	3d51153611	Merge pull request #3222 from ReinUsesLisp/maxwell-to-vk maxwell_to_vk: Use VK_EXT_index_type_uint8 and misc changes	2019-12-14 22:30:12 -05:00
bunnei	ccda77c8c4	Merge pull request #3224 from bunnei/boost-ext-update externals: Update boost-ext to include safe_numerics.	2019-12-14 16:13:47 -05:00
bunnei	035ec7d9de	Merge pull request #3213 from ReinUsesLisp/intel-mesa gl_device: Enable compute shaders for Intel Mesa drivers	2019-12-14 16:04:31 -05:00
bunnei	285705b5f4	externals: Update boost-ext to include safe_numerics. - This is useful to me for an upcoming change.	2019-12-14 03:04:42 -05:00
bunnei	2b650543c6	Merge pull request #3212 from ReinUsesLisp/fix-smem-lmem gl_shader_cache: Add missing new-line on emitted GLSL	2019-12-13 21:35:29 -05:00
ReinUsesLisp	e3ea583893	maxwell_to_vk: Improve image format table and add more formats A1B5G5R5 uses A1R5G5B5. This is flipped with image view swizzles; flushing is still not properly implemented on Vulkan for this particular format.	2019-12-13 03:12:29 -03:00
ReinUsesLisp	f27b21077d	maxwell_to_vk: Implement more vertex formats	2019-12-13 03:12:28 -03:00
ReinUsesLisp	8db8631d81	maxwell_to_vk: Implement more primitive topologies Add an extra argument to query device capabilities in the future. The intention behind this is to use native quads, quad strips, line loops and polygons if these are released for Vulkan.	2019-12-13 03:12:28 -03:00
ReinUsesLisp	15513f0801	maxwell_to_vk: Approach GL_CLAMP closer to the GL spec The OpenGL spec defines GL_CLAMP's formula similarly to CLAMP_TO_EDGE and CLAMP_TO_BORDER depending on the filter mode used. It doesn't exactly behave like this, but it's the closest we can get with what Vulkan offers without emulating it by injecting shader code.	2019-12-13 03:12:28 -03:00
ReinUsesLisp	f845df8651	maxwell_to_vk: Use VK_EXT_index_type_uint8 when available	2019-12-13 02:37:23 -03:00
ReinUsesLisp	2df9a2dcaf	vk_scheduler: Delegate commands to a worker thread and state track Introduce a worker thread approach for delegating Vulkan work derived from dxvk's approach. https://github.com/doitsujin/dxvk Now that the scheduler is what handles all Vulkan work related to command streaming, store state tracking in itself. This way we can know when to reupload Vulkan dynamic state to the queue (since this one is invalidated between command buffers unlike NVN). We can also store the renderpass state and graphics pipeline bound to avoid redundant binds and renderpass begins/ends.	2019-12-13 02:24:48 -03:00
bunnei	6d0d79109b	Merge pull request #3214 from lioncash/svc-func kernel/svc: Amend function signature of SignalProcessWideKey	2019-12-12 21:32:36 -05:00
bunnei	8fc49a83b6	Merge pull request #3217 from jhol/fix-boost-include Added missing include	2019-12-11 22:21:24 -05:00
Fernando Sahmkow	c0ee0aa1a8	Shader_IR: Correct TLD4S Depth Compare.	2019-12-11 19:53:17 -04:00
Fernando Sahmkow	af89723fa3	Shader_Ir: Correct TLD4S encoding and implement f16 flag.	2019-12-11 19:53:17 -04:00
Fernando Sahmkow	84a158c977	Gl_Shader_compiler: Correct Depth Compare for Texture Gather operations.	2019-12-11 19:53:16 -04:00
Fernando Sahmkow	271a3264f3	Shader_Ir: default failed tracks on bindless samplers to null values.	2019-12-11 19:53:16 -04:00
Fernando Sahmkow	900b2e5cae	Merge pull request #3218 from FernandoS27/tess-gl Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are unimplemented	2019-12-11 17:50:09 -04:00
Fernando Sahmkow	1d2ba3cc97	Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are un implemented. This commit ensures the OGL backend does not execute tesselation shader stages as they are currently unimplemented.	2019-12-11 15:41:26 -04:00
bunnei	1a66cde175	Merge pull request #3210 from ReinUsesLisp/memory-barrier shader: Implement MEMBAR.GL	2019-12-11 14:24:39 -05:00
Joel Holdsworth	e9faa1617c	Added missing include	2019-12-11 18:11:49 +00:00
Fernando Sahmkow	22c6b9fab2	Kernel: Correct behavior of Address Arbiter threads. (#3165 ) * Kernel: Correct behavior of Address Arbiter threads. This corrects arbitration threads to behave just like in Horizon OS. They are added into a container and released according to what priority they had when added. Horizon OS does not reorder them if their priority changes. * Kernel: Address Feedback.	2019-12-11 10:55:38 -05:00
Lioncash	30e365e4fc	kernel/svc: Correct function signature of SignalProcessWideKey This function doesn't actually return a result code, so we can amend the signature of it to match.	2019-12-11 07:13:27 -05:00
ReinUsesLisp	f564eaebed	gl_device: Enable compute shaders for Intel Mesa drivers Previously we naively checked for "Intel" in GL_VENDOR, but this includes both Intel's proprietary driver and the mesa driver. Re-enable compute shaders for mesa.	2019-12-11 00:00:30 -03:00
ReinUsesLisp	48e16c4c49	gl_shader_cache: Add missing new-line on emitted GLSL Add missing new-line. This caused shaders using local memory and shared memory to inject a preprocessor GLSL line after an expression (resulting in invalid code). It looked like this: shared uint smem[8];#define LOCAL_MEMORY_SIZE 16 It should look like this (addressed by this commit): shared uint smem[8]; \#define LOCAL_MEMORY_SIZE 16	2019-12-10 23:52:51 -03:00
bunnei	34f8881d3e	Merge pull request #3201 from lioncash/dump kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew	2019-12-10 21:48:37 -05:00
Rodrigo Locatti	c8db7d1399	Merge pull request #3211 from FernandoS27/depth-mode Maxwell3D: Implement Depth Mode.	2019-12-10 21:20:52 -03:00
Fernando Sahmkow	7ffb672f61	Maxwell3D: Implement Depth Mode. This commit finishes adding depth mode that was reverted before due to other unresolved issues.	2019-12-10 19:51:46 -04:00
ReinUsesLisp	425a254fa2	shader: Implement MEMBAR.GL Implement using memoryBarrier in GLSL and OpMemoryBarrier on SPIR-V.	2019-12-10 16:45:03 -03:00
Fernando Sahmkow	6edadef96d	Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler vk_shader_decompiler: Add tessellation and misc changes	2019-12-10 08:01:41 -04:00
Lioncash	67b8265bd6	kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew These are fairly trivial to implement, we can just do nothing. This also provides a spot for us to potentially dump out any relevant info in the future (e.g. for debugging purposes with homebrew, etc). While we're at it, we can also correct the names of both of these supervisor calls.	2019-12-07 22:01:17 -05:00
ReinUsesLisp	e6a0a30334	renderer_opengl: Make ScreenRectVertex's constructor constexpr	2019-11-28 20:36:02 -03:00
ReinUsesLisp	dee7844443	renderer_opengl: Remove C casts	2019-11-28 20:28:27 -03:00
ReinUsesLisp	3a44faff11	renderer_opengl: Use explicit binding for presentation shaders	2019-11-28 20:25:56 -03:00
ReinUsesLisp	75cc501d52	renderer_opengl: Drop macros for message decorations	2019-11-28 20:15:25 -03:00
ReinUsesLisp	056f049b26	renderer_opengl: Move static definitions to anonymous namespace	2019-11-28 20:14:40 -03:00
ReinUsesLisp	4589582eaf	renderer_opengl: Move commentaries to header file	2019-11-28 20:11:03 -03:00