vk_blit_screen: Address feedback

vk_blit_screen: Initial implementation
This abstraction takes care of presenting accelerated and non-accelerated or "framebuffer" images to the Vulkan swapchain.
2020-01-20 18:43:11 -03:00 · 2020-01-19 21:12:43 -03:00 · 2020-01-19 18:09:01 -05:00 · 2020-01-19 15:58:14 -05:00 · 2020-01-19 15:57:14 -05:00 · 2020-01-19 15:56:49 -05:00
138 changed files with 8611 additions and 2044 deletions
--- a/.appveyor/UtilityFunctions.ps1
+++ b/.appveyor/UtilityFunctions.ps1
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
-    if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
-    }
-}
-popd
-
-function Which ($search_path, $name) {
-    ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
-    ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
-        Which $search_path $_.Value
-    }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
-    $final_deps = @()
-    $deps_to_process = GetDeps $search_path $binary
-    while ($deps_to_process.Count -gt 0) {
-        $current, $deps_to_process = $deps_to_process
-        if ($final_deps -contains $current) { continue }
-
-        # Is this a system dll file?
-        # We use the same algorithm that cmake uses to determine this.
-        if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
-        if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
-        if ($current -match "\\msvc[^\\]+dll") { continue }
-        if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
-        $final_deps += $current
-        $new_deps = GetDeps $search_path $current
-        $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
-    }
-    return $final_deps
-}
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -5,6 +5,10 @@ function(get_timestamp _var)
 endfunction()

 list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
+
+# Find the package here with the known path so that the GetGit commands can find it as well
+find_package(Git QUIET PATHS "${GIT_EXECUTABLE}")
+
 # generate git/build information
 include(GetGitRevisionDescription)
 get_git_head_revision(GIT_REF_SPEC GIT_REV)
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
-  - C:\ProgramData\chocolatey\bin -> appveyor.yml
-  - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
-  # Tell msys2 to add mingw64 to the path
-  MSYSTEM: MINGW64
-  # Tell msys2 to inherit the current directory when starting the shell
-  CHERE_INVOKING: 1
-  matrix:
-    - BUILD_TYPE: msvc
-    - BUILD_TYPE: mingw
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - git submodule update --init --recursive
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'mingw') {
-          $dependencies = "mingw64/mingw-w64-x86_64-cmake",
-                          "mingw64/mingw-w64-x86_64-qt5",
-                          "mingw64/mingw-w64-x86_64-SDL2"
-          # redirect err to null to prevent warnings from becoming errors
-          # workaround to prevent pacman from failing due to cyclical dependencies
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
-        }
-
-before_build:
-  - mkdir %BUILD_TYPE%_build
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
-        }
-  - cd ..
-
-build_script:
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # https://www.appveyor.com/docs/build-phase
-          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
-        }
-
-after_build:
-  - ps: |
-        $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
-        $GITREV = $(git show -s --format='%h')
-
-        # Find out which kind of release we are producing by tag name
-        if ($env:APPVEYOR_REPO_TAG_NAME) {
-          $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
-        } else {
-          # There is no repo tag - make assumptions
-          $RELEASE_DIST = "head"
-        }
-
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # Where are these spaces coming from? Regardless, let's remove them
-          $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
-          $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
-          # set the build names as env vars so the artifacts can upload them
-          $env:BUILD_ZIP = $MSVC_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
-          $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
-          $BUILD_DIR = ".\msvc_build\bin\Release"
-
-          # Make a debug symbol upload
-          mkdir pdb
-          Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
-          7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
-          rm "$BUILD_DIR\*.pdb"
-
-          mkdir $RELEASE_DIST
-          # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
-          Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
-          rm "$RELEASE_DIST\*.exe"
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item .\license.txt -Destination $RELEASE_DIST
-          Copy-Item .\README.md -Destination $RELEASE_DIST
-          7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MSVC_SEVENZIP $RELEASE_DIST
-        } else {
-          $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
-          # not going to bother adding separate debug symbols for mingw, so just upload a README for it
-          # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
-          $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
-          Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
-          # store the build information in env vars so we can use them as artifacts
-          $env:BUILD_ZIP = $MINGW_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
-          $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
-          $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
-          $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
-          $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
-          mkdir $RELEASE_DIST
-          mkdir $RELEASE_DIST/platforms
-          mkdir $RELEASE_DIST/styles
-          mkdir $RELEASE_DIST/imageformats
-
-          # copy the compiled binaries and other release files to the release folder
-          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
-          # copy the qt windows plugin dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
-          # copy the qt windows vista style dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
-          # copy the qt jpeg imageformat dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
-          # copy all the dll dependencies to the release folder
-          . "./.appveyor/UtilityFunctions.ps1"
-          $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
-          $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\yuzu_cmd.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\imageformats\qjpeg.dll"
-          Write-Host "Detected the following dependencies:"
-          Write-Host $MingwDLLs
-          foreach ($file in $MingwDLLs) {
-            Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
-          }
-
-          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MINGW_SEVENZIP $RELEASE_DIST
-        }
-
-test_script:
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          ctest -VV -C Release
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
-        }
-  - cd ..
-
-artifacts:
-  - path: $(BUILD_ZIP)
-    name: build
-    type: zip
-
-deploy:
-  provider: GitHub
-  release: $(appveyor_repo_tag_name)
-  auth_token:
-    secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
-  artifact: update,build
-  draft: false
-  prerelease: false
-  on:
-    appveyor_repo_tag: true
--- a/externals/boost
+++ b/externals/boost
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,10 @@ endif ()
 if (DEFINED ENV{DISPLAYVERSION})
  set(DISPLAY_VERSION $ENV{DISPLAYVERSION})
 endif ()
+
+# Pass the path to git to the GenerateSCMRev.cmake as well
+find_package(Git QUIET)
+
 add_custom_command(OUTPUT scm_rev.cpp
    COMMAND ${CMAKE_COMMAND}
      -DSRC_DIR="${CMAKE_SOURCE_DIR}"
@@ -23,6 +27,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      -DTITLE_BAR_FORMAT_RUNNING="${TITLE_BAR_FORMAT_RUNNING}"
      -DBUILD_TAG="${BUILD_TAG}"
      -DBUILD_ID="${DISPLAY_VERSION}"
+      -DGIT_EXECUTABLE="${GIT_EXECUTABLE}"
      -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
    DEPENDS
      # WARNING! It was too much work to try and make a common location for this list,
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
 template class Field<const char*>;
 template class Field<std::chrono::microseconds>;

-#ifdef ARCHITECTURE_x86_64
-static const char* CpuVendorToStr(Common::CPUVendor vendor) {
-    switch (vendor) {
-    case Common::CPUVendor::INTEL:
-        return "Intel";
-    case Common::CPUVendor::AMD:
-        return "Amd";
-    case Common::CPUVendor::OTHER:
-        return "Other";
-    }
-    UNREACHABLE();
-}
-#endif
-
 void AppendBuildInfo(FieldCollection& fc) {
    const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
    fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
 #ifdef ARCHITECTURE_x86_64
    fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
    fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
-    fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,9 +46,16 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        cv.notify_one();

-        ++size;
+        const size_t previous_size{size++};
+
+        // Acquire the mutex and then immediately release it as a fence.
+        // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
+        // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
+        if (previous_size == 0) {
+            std::lock_guard lock{cv_mutex};
+        }
+        cv.notify_one();
    }

    void Pop() {
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
 // Refer to the license.txt file included.

 #include <cstring>
-#include <string>
-#include <thread>
 #include "common/common_types.h"
 #include "common/x64/cpu_detect.h"

@@ -51,8 +49,6 @@ namespace Common {
 static CPUCaps Detect() {
    CPUCaps caps = {};

-    caps.num_cores = std::thread::hardware_concurrency();
-
    // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
    // yuzu at all anyway

@@ -70,12 +66,6 @@ static CPUCaps Detect() {
    __cpuid(cpu_id, 0x80000000);

    u32 max_ex_fn = cpu_id[0];
-    if (!strcmp(caps.brand_string, "GenuineIntel"))
-        caps.vendor = CPUVendor::INTEL;
-    else if (!strcmp(caps.brand_string, "AuthenticAMD"))
-        caps.vendor = CPUVendor::AMD;
-    else
-        caps.vendor = CPUVendor::OTHER;

    // Set reasonable default brand string even if brand string not available
    strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
            caps.sse4_1 = true;
        if ((cpu_id[2] >> 20) & 1)
            caps.sse4_2 = true;
-        if ((cpu_id[2] >> 22) & 1)
-            caps.movbe = true;
        if ((cpu_id[2] >> 25) & 1)
            caps.aes = true;

-        if ((cpu_id[3] >> 24) & 1) {
-            caps.fxsave_fxrstor = true;
-        }
-
        // AVX support requires 3 separate checks:
        //  - Is the AVX bit set in CPUID?
        //  - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
        }
    }

-    caps.flush_to_zero = caps.sse;
-
    if (max_ex_fn >= 0x80000004) {
        // Extract CPU model string
        __cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
    if (max_ex_fn >= 0x80000001) {
        // Check for more features
        __cpuid(cpu_id, 0x80000001);
-        if (cpu_id[2] & 1)
-            caps.lahf_sahf_64 = true;
-        if ((cpu_id[2] >> 5) & 1)
-            caps.lzcnt = true;
        if ((cpu_id[2] >> 16) & 1)
            caps.fma4 = true;
-        if ((cpu_id[3] >> 29) & 1)
-            caps.long_mode = true;
    }

    return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
    return caps;
 }

-std::string GetCPUCapsString() {
-    auto caps = GetCPUCaps();
-
-    std::string sum(caps.cpu_string);
-    sum += " (";
-    sum += caps.brand_string;
-    sum += ")";
-
-    if (caps.sse)
-        sum += ", SSE";
-    if (caps.sse2) {
-        sum += ", SSE2";
-        if (!caps.flush_to_zero)
-            sum += " (without DAZ)";
-    }
-
-    if (caps.sse3)
-        sum += ", SSE3";
-    if (caps.ssse3)
-        sum += ", SSSE3";
-    if (caps.sse4_1)
-        sum += ", SSE4.1";
-    if (caps.sse4_2)
-        sum += ", SSE4.2";
-    if (caps.avx)
-        sum += ", AVX";
-    if (caps.avx2)
-        sum += ", AVX2";
-    if (caps.bmi1)
-        sum += ", BMI1";
-    if (caps.bmi2)
-        sum += ", BMI2";
-    if (caps.fma)
-        sum += ", FMA";
-    if (caps.aes)
-        sum += ", AES";
-    if (caps.movbe)
-        sum += ", MOVBE";
-    if (caps.long_mode)
-        sum += ", 64-bit support";
-
-    return sum;
-}
-
 } // namespace Common
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@

 #pragma once

-#include <string>
-
 namespace Common {

-/// x86/x64 CPU vendors that may be detected by this module
-enum class CPUVendor {
-    INTEL,
-    AMD,
-    OTHER,
-};
-
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
-    CPUVendor vendor;
    char cpu_string[0x21];
    char brand_string[0x41];
-    int num_cores;
    bool sse;
    bool sse2;
    bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
    bool fma;
    bool fma4;
    bool aes;
-
-    // Support for the FXSAVE and FXRSTOR instructions
-    bool fxsave_fxrstor;
-
-    bool movbe;
-
-    // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
-    // are automatically set to (signed) zero.
-    bool flush_to_zero;
-
-    // Support for LAHF and SAHF instructions in 64-bit mode
-    bool lahf_sahf_64;
-
-    bool long_mode;
 };

 /**
@@ -57,10 +32,4 @@ struct CPUCaps {
 */
 const CPUCaps& GetCPUCaps();

-/**
- * Gets a string summary of the name and supported capabilities of the host CPU
- * @return String summary
- */
-std::string GetCPUCapsString();
-
 } // namespace Common
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
    config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
    config.page_table_address_space_bits = address_space_bits;
    config.silently_mirror_page_table = false;
+    config.absolute_offset_page_table = true;

    // Multi-process state
    config.processor_id = core_index;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -46,7 +46,6 @@
 #include "core/settings.h"
 #include "core/telemetry_session.h"
 #include "core/tools/freezer.h"
-#include "video_core/debug_utils/debug_utils.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -341,7 +340,6 @@ struct System::Impl {
    std::unique_ptr<Loader::AppLoader> app_loader;
    std::unique_ptr<VideoCore::RendererBase> renderer;
    std::unique_ptr<Tegra::GPU> gpu_core;
-    std::shared_ptr<Tegra::DebugContext> debug_context;
    std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
    Memory::Memory memory;
    CpuCoreManager cpu_core_manager;
@@ -580,14 +578,6 @@ Loader::AppLoader& System::GetAppLoader() const {
    return *impl->app_loader;
 }

-void System::SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
-    impl->debug_context = std::move(context);
-}
-
-Tegra::DebugContext* System::GetGPUDebugContext() const {
-    return impl->debug_context.get();
-}
-
 void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
    impl->virtual_filesystem = std::move(vfs);
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -307,10 +307,6 @@ public:
    Service::SM::ServiceManager& ServiceManager();
    const Service::SM::ServiceManager& ServiceManager() const;

-    void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context);
-
-    Tegra::DebugContext* GetGPUDebugContext() const;
-
    void SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs);

    std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -17,10 +17,10 @@
 #include "core/memory.h"

 namespace Kernel {
-namespace {
+
 // Wake up num_to_wake (or all) threads in a vector.
-void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
-    auto& system = Core::System::GetInstance();
+void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
+                                 s32 num_to_wake) {
    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
    // them all.
    std::size_t last = waiting_threads.size();
@@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
    for (std::size_t i = 0; i < last; i++) {
        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        RemoveThread(waiting_threads[i]);
        waiting_threads[i]->SetArbiterWaitAddress(0);
        waiting_threads[i]->ResumeFromWait();
        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
    }
 }
-} // Anonymous namespace

 AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
 AddressArbiter::~AddressArbiter() = default;
@@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
 ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
+    InsertThread(SharedFrom(current_thread));
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();
    current_thread->WakeAfterDelay(timeout);
@@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    return RESULT_TIMEOUT;
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
+void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
+    RemoveThread(thread);
+    thread->SetArbiterWaitAddress(0);
+}

-    // Retrieve all threads that are waiting for this address.
-    std::vector<std::shared_ptr<Thread>> threads;
-    const auto& scheduler = system.GlobalScheduler();
-    const auto& thread_list = scheduler.GetThreadList();
-
-    for (const auto& thread : thread_list) {
-        if (thread->GetArbiterWaitAddress() == address) {
-            threads.push_back(thread);
+void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread->GetPriority() >= thread->GetPriority()) {
+            thread_list.insert(it, thread);
+            return;
        }
+        ++it;
    }
+    thread_list.push_back(std::move(thread));
+}

-    // Sort them by priority, such that the highest priority ones come first.
-    std::sort(threads.begin(), threads.end(),
-              [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
-                  return lhs->GetPriority() < rhs->GetPriority();
-              });
+void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread.get() == thread.get()) {
+            thread_list.erase(it);
+            return;
+        }
+        ++it;
+    }
+    UNREACHABLE();
+}

-    return threads;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
+    std::vector<std::shared_ptr<Thread>> result;
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        std::shared_ptr<Thread> current_thread = *it;
+        result.push_back(std::move(current_thread));
+        ++it;
+    }
+    return result;
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <list>
 #include <memory>
+#include <unordered_map>
 #include <vector>

 #include "common/common_types.h"
@@ -48,6 +50,9 @@ public:
    /// Waits on an address with a particular arbitration type.
    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);

+    /// Removes a thread from the container and resets its address arbiter adress to 0
+    void HandleWakeupThread(std::shared_ptr<Thread> thread);
+
 private:
    /// Signals an address being waited on.
    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
    // Waits on the given address with a timeout in nanoseconds
    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);

+    /// Wake up num_to_wake (or all) threads in a vector.
+    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
+
+    /// Insert a thread into the address arbiter container
+    void InsertThread(std::shared_ptr<Thread> thread);
+
+    /// Removes a thread from the address arbiter container
+    void RemoveThread(std::shared_ptr<Thread> thread);
+
    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+
+    /// List of threads waiting for a address arbiter
+    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;

    Core::System& system;
 };
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
        }
    }

-    if (thread->GetArbiterWaitAddress() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
-        thread->SetArbiterWaitAddress(0);
+    if (thread->GetStatus() == ThreadStatus::WaitArb) {
+        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
+        address_arbiter.HandleWakeupThread(thread);
    }

    if (resume) {
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
 // - Second to ensure all host backing memory used is aligned to 256 bytes due
 // to strict alignment restrictions on GPU memory.

-using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+class PhysicalMemory final : public PhysicalMemoryVector {
+    using PhysicalMemoryVector::PhysicalMemoryVector;
+};

 } // namespace Kernel
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }

 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
+    code_memory_size += module_.memory.size();
+
    const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));

    const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
-
-    code_memory_size += module_.memory.size();
 }

 Process::Process(Core::System& system)
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 }

 /// Signal process wide key
-static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
-                                       s32 target) {
+static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
              condition_variable_addr, target);

@@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            system.PrepareReschedule(thread->GetProcessorID());
        }
    }
-
-    return RESULT_SUCCESS;
 }

 // Wait for an address (via Address Arbiter)
@@ -1781,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

+static void KernelDebug([[maybe_unused]] Core::System& system,
+                        [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
+                        [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
+static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
+                                   [[maybe_unused]] u32 trace_state) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick(Core::System& system) {
    LOG_TRACE(Kernel_SVC, "called");
@@ -2418,8 +2426,8 @@ static const FunctionDef SVC_Table[] = {
    {0x39, nullptr, "Unknown"},
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
-    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "DumpInfoNew"},
+    {0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
+    {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
    FuncReturn(system, retval);
 }

-template <ResultCode func(Core::System&, u64, s32)>
-void SvcWrap(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
-}
-
 template <ResultCode func(Core::System&, u64, u32)>
 void SvcWrap(Core::System& system) {
    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) {
    func(system);
 }

+template <void func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)));
+}
+
+template <void func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
+         Param(system, 3));
+}
+
 template <void func(Core::System&, s64)>
 void SvcWrap(Core::System& system) {
    func(system, static_cast<s64>(Param(system, 0)));
 }

+template <void func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
+}
+
 template <void func(Core::System&, u64, u64)>
 void SvcWrap(Core::System& system) {
    func(system, Param(system, 0), Param(system, 1));
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <cstring>
 #include <iterator>
 #include <utility>
 #include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
    // If necessary, expand backing vector to cover new heap extents in
    // the case of allocating. Otherwise, shrink the backing memory,
    // if a smaller heap has been requested.
-    const u64 old_heap_size = GetCurrentHeapSize();
-    if (size > old_heap_size) {
-        const u64 alloc_size = size - old_heap_size;
-
-        heap_memory->insert(heap_memory->end(), alloc_size, 0);
-        RefreshMemoryBlockMappings(heap_memory.get());
-    } else if (size < old_heap_size) {
-        heap_memory->resize(size);
-        heap_memory->shrink_to_fit();
-
-        RefreshMemoryBlockMappings(heap_memory.get());
-    }
+    heap_memory->resize(size);
+    heap_memory->shrink_to_fit();
+    RefreshMemoryBlockMappings(heap_memory.get());

    heap_end = heap_region_base + size;
    ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
    // Always merge allocated memory blocks, even when they don't share the same backing block.
    if (left.type == VMAType::AllocatedMemoryBlock &&
        (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
-        const auto right_begin = right.backing_block->begin() + right.offset;
-        const auto right_end = right_begin + right.size;

        // Check if we can save work.
        if (left.offset == 0 && left.size == left.backing_block->size()) {
            // Fast case: left is an entire backing block.
-            left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
+            left.backing_block->resize(left.size + right.size);
+            std::memcpy(left.backing_block->data() + left.size,
+                        right.backing_block->data() + right.offset, right.size);
        } else {
            // Slow case: make a new memory block for left and right.
-            const auto left_begin = left.backing_block->begin() + left.offset;
-            const auto left_end = left_begin + left.size;
-            const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
-            const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
-
            auto new_memory = std::make_shared<PhysicalMemory>();
-            new_memory->reserve(left_size + right_size);
-            new_memory->insert(new_memory->end(), left_begin, left_end);
-            new_memory->insert(new_memory->end(), right_begin, right_end);
+            new_memory->resize(left.size + right.size);
+            std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
+            std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
+                        right.size);

            left.backing_block = std::move(new_memory);
            left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
        memory.UnmapRegion(page_table, vma.base, vma.size);
        break;
    case VMAType::AllocatedMemoryBlock:
-        memory.MapMemoryRegion(page_table, vma.base, vma.size,
-                               vma.backing_block->data() + vma.offset);
+        memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
        break;
    case VMAType::BackingMemory:
        memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"

 namespace Service::NIFM {

@@ -86,7 +87,12 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(RequestState::Connected);
+
+        if (Settings::values.bcat_backend == "none") {
+            rb.PushEnum(RequestState::NotSubmitted);
+        } else {
+            rb.PushEnum(RequestState::Connected);
+        }
    }

    void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    Core::System& system;
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -104,10 +104,12 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)

        ASSERT(object->status == nvmap::Object::Status::Allocated);

-        u64 size = static_cast<u64>(entry.pages) << 0x10;
+        const u64 size = static_cast<u64>(entry.pages) << 0x10;
        ASSERT(size <= object->size);
+        const u64 map_offset = static_cast<u64>(entry.map_offset) << 0x10;

-        GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
+        const GPUVAddr returned =
+            gpu.MemoryManager().MapBufferEx(object->addr + map_offset, offset, size);
        ASSERT(returned == offset);
    }
    std::memcpy(output.data(), entries.data(), output.size());
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -62,7 +62,7 @@ private:
        u16_le flags;
        u16_le kind;
        u32_le nvmap_handle;
-        INSERT_PADDING_WORDS(1);
+        u32_le map_offset;
        u32_le offset;
        u32_le pages;
    };
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
    return layer_id;
 }

+void NVFlinger::CloseLayer(u64 layer_id) {
+    for (auto& display : displays) {
+        display.CloseLayer(layer_id);
+    }
+}
+
 std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
    const auto* const layer = FindLayer(display_id, layer_id);

@@ -192,7 +198,7 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
            const auto& fence = multi_fence.fences[fence_id];
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -54,6 +54,9 @@ public:
    /// If an invalid display ID is specified, then an empty optional is returned.
    std::optional<u64> CreateLayer(u64 display_id);

+    /// Closes a layer on all displays for the given layer ID.
+    void CloseLayer(u64 layer_id);
+
    /// Finds the buffer queue ID of the specified layer in the specified display.
    ///
    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
 Display::~Display() = default;

 Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
+    return *layers.at(index);
 }

 const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
+    return *layers.at(index);
 }

 std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
    // TODO(Subv): Support more than 1 layer.
    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");

-    layers.emplace_back(id, buffer_queue);
+    layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
+}
+
+void Display::CloseLayer(u64 id) {
+    layers.erase(
+        std::remove_if(layers.begin(), layers.end(),
+                       [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
+        layers.end());
 }

 Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <vector>

@@ -69,6 +70,12 @@ public:
    ///
    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);

+    /// Closes and removes a layer from this display with the given ID.
+    ///
+    /// @param id           The ID assigned to the layer to close.
+    ///
+    void CloseLayer(u64 id);
+
    /// Attempts to find a layer with the given ID.
    ///
    /// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
    u64 id;
    std::string name;

-    std::vector<Layer> layers;
+    std::vector<std::shared_ptr<Layer>> layers;
    Kernel::EventPair vsync_event;
 };

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1066,6 +1066,18 @@ private:
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
    }

+    void CloseLayer(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto layer_id{rp.Pop<u64>()};
+
+        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
+
+        nv_flinger->CloseLayer(layer_id);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
    void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
        const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
        {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
        {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
        {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
-        {2021, nullptr, "CloseLayer"},
+        {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
        {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
        {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
        {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
            codeset_segment->addr = segment_addr;
            codeset_segment->size = aligned_size;

-            memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz);
+            std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
+                        p->p_filesz);
            current_image_position += aligned_size;
        }
    }
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <cstring>
 #include "core/file_sys/kernel_executable.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
        segment.addr = offset;
        segment.offset = offset;
        segment.size = PageAlignSize(static_cast<u32>(data.size()));
-        program_image.resize(offset);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(offset + data.size());
+        std::memcpy(program_image.data() + offset, data.data(), data.size());
    };

    load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <cinttypes>
+#include <cstring>
 #include <vector>

 #include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
        if (nso_header.IsSegmentCompressed(i)) {
            data = DecompressSegment(data, nso_header.segments[i]);
        }
-        program_image.resize(nso_header.segments[i].location);
-        program_image.insert(program_image.end(), data.begin(), data.end());
+        program_image.resize(nso_header.segments[i].location + data.size());
+        std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
+                    data.size());
        codeset.segments[i].addr = nso_header.segments[i].location;
        codeset.segments[i].offset = nso_header.segments[i].location;
        codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
        std::vector<u8> pi_header;
        pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
                         reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
-        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
-                         program_image.end());
+        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
+                         program_image.data() + program_image.size());

        pi_header = pm->PatchNSO(pi_header, file.GetName());

-        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
    }

    // Apply cheats if they exist and the program has a valid title ID
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/hle/kernel/physical_memory.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
        system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
    }

+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset) {
+        MapMemoryRegion(page_table, base, size, memory.data() + offset);
+    }
+
    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
        ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
        ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -146,7 +152,7 @@ struct Memory::Impl {
    u8* GetPointer(const VAddr vaddr) {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
-            return page_pointer + (vaddr & PAGE_MASK);
+            return page_pointer + vaddr;
        }

        if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +235,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* const src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_buffer, src_ptr, copy_amount);
                break;
            }
@@ -276,7 +283,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* const dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_ptr, src_buffer, copy_amount);
                break;
            }
@@ -322,7 +330,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memset(dest_ptr, 0, copy_amount);
                break;
            }
@@ -368,7 +377,8 @@ struct Memory::Impl {
            }
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                WriteBlock(process, dest_addr, src_ptr, copy_amount);
                break;
            }
@@ -446,7 +456,8 @@ struct Memory::Impl {
                        page_type = Common::PageType::Unmapped;
                    } else {
                        page_type = Common::PageType::Memory;
-                        current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
+                        current_page_table->pointers[vaddr >> PAGE_BITS] =
+                            pointer - (vaddr & ~PAGE_MASK);
                    }
                    break;
                }
@@ -493,7 +504,9 @@ struct Memory::Impl {
                      memory);
        } else {
            while (base != end) {
-                page_table.pointers[base] = memory;
+                page_table.pointers[base] = memory - (base << PAGE_BITS);
+                ASSERT_MSG(page_table.pointers[base],
+                           "memory mapping base yield a nullptr within the table");

                base += 1;
                memory += PAGE_SIZE;
@@ -518,7 +531,7 @@ struct Memory::Impl {
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
            T value;
-            std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
+            std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
            return value;
        }

@@ -559,7 +572,7 @@ struct Memory::Impl {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
-            std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
+            std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
            return;
        }

@@ -594,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
    impl->SetCurrentPageTable(process);
 }

+void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                             Kernel::PhysicalMemory& memory, VAddr offset) {
+    impl->MapMemoryRegion(page_table, base, size, memory, offset);
+}
+
 void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
    impl->MapMemoryRegion(page_table, base, size, target);
 }
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
 }

 namespace Kernel {
+class PhysicalMemory;
 class Process;
-}
+} // namespace Kernel

 namespace Memory {

@@ -65,6 +66,19 @@ public:
     */
    void SetCurrentPageTable(Kernel::Process& process);

+    /**
+     * Maps an physical buffer onto a region of the emulated process address space.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The address to start mapping at. Must be page-aligned.
+     * @param size       The amount of bytes to map. Must be page-aligned.
+     * @param memory     Physical buffer with the memory backing the mapping. Must be of length
+     *                   at least `size + offset`.
+     * @param offset     The offset within the physical memory. Must be page-aligned.
+     */
+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                         Kernel::PhysicalMemory& memory, VAddr offset);
+
    /**
     * Maps an allocated buffer onto a region of the emulated process address space.
     *
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,8 +4,6 @@ add_library(video_core STATIC
    buffer_cache/map_interval.h
    dma_pusher.cpp
    dma_pusher.h
-    debug_utils/debug_utils.cpp
-    debug_utils/debug_utils.h
    engines/const_buffer_engine_interface.h
    engines/const_buffer_info.h
    engines/engine_upload.cpp
@@ -151,14 +149,35 @@ add_library(video_core STATIC
 if (ENABLE_VULKAN)
    target_sources(video_core PRIVATE
        renderer_vulkan/declarations.h
+        renderer_vulkan/fixed_pipeline_state.cpp
+        renderer_vulkan/fixed_pipeline_state.h
        renderer_vulkan/maxwell_to_vk.cpp
        renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/renderer_vulkan.h
+        renderer_vulkan/vk_blit_screen.cpp
+        renderer_vulkan/vk_blit_screen.h
        renderer_vulkan/vk_buffer_cache.cpp
        renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_compute_pass.cpp
+        renderer_vulkan/vk_compute_pass.h
+        renderer_vulkan/vk_compute_pipeline.cpp
+        renderer_vulkan/vk_compute_pipeline.h
+        renderer_vulkan/vk_descriptor_pool.cpp
+        renderer_vulkan/vk_descriptor_pool.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_graphics_pipeline.cpp
+        renderer_vulkan/vk_graphics_pipeline.h
+        renderer_vulkan/vk_image.cpp
+        renderer_vulkan/vk_image.h
        renderer_vulkan/vk_memory_manager.cpp
        renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_pipeline_cache.cpp
+        renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_rasterizer.cpp
+        renderer_vulkan/vk_rasterizer.h
+        renderer_vulkan/vk_renderpass_cache.cpp
+        renderer_vulkan/vk_renderpass_cache.h
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_sampler_cache.cpp
@@ -167,10 +186,19 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_shader_decompiler.cpp
        renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_shader_util.cpp
+        renderer_vulkan/vk_shader_util.h
+        renderer_vulkan/vk_staging_buffer_pool.cpp
+        renderer_vulkan/vk_staging_buffer_pool.h
        renderer_vulkan/vk_stream_buffer.cpp
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
-        renderer_vulkan/vk_swapchain.h)
+        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_texture_cache.cpp
+        renderer_vulkan/vk_texture_cache.h
+        renderer_vulkan/vk_update_descriptor.cpp
+        renderer_vulkan/vk_update_descriptor.h
+    )

    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -1,49 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#include <mutex>
-
-#include "video_core/debug_utils/debug_utils.h"
-
-namespace Tegra {
-
-void DebugContext::DoOnEvent(Event event, void* data) {
-    {
-        std::unique_lock lock{breakpoint_mutex};
-
-        // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will
-        // show on debug widgets
-
-        // TODO: Should stop the CPU thread here once we multithread emulation.
-
-        active_breakpoint = event;
-        at_breakpoint = true;
-
-        // Tell all observers that we hit a breakpoint
-        for (auto& breakpoint_observer : breakpoint_observers) {
-            breakpoint_observer->OnMaxwellBreakPointHit(event, data);
-        }
-
-        // Wait until another thread tells us to Resume()
-        resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; });
-    }
-}
-
-void DebugContext::Resume() {
-    {
-        std::lock_guard lock{breakpoint_mutex};
-
-        // Tell all observers that we are about to resume
-        for (auto& breakpoint_observer : breakpoint_observers) {
-            breakpoint_observer->OnMaxwellResume();
-        }
-
-        // Resume the waiting thread (i.e. OnEvent())
-        at_breakpoint = false;
-    }
-
-    resume_from_breakpoint.notify_one();
-}
-
-} // namespace Tegra
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -1,157 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <condition_variable>
-#include <list>
-#include <memory>
-#include <mutex>
-
-namespace Tegra {
-
-class DebugContext {
-public:
-    enum class Event {
-        FirstEvent = 0,
-
-        MaxwellCommandLoaded = FirstEvent,
-        MaxwellCommandProcessed,
-        IncomingPrimitiveBatch,
-        FinishedPrimitiveBatch,
-
-        NumEvents
-    };
-
-    /**
-     * Inherit from this class to be notified of events registered to some debug context.
-     * Most importantly this is used for our debugger GUI.
-     *
-     * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods.
-     * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state
-     * access
-     * @todo Evaluate an alternative interface, in which there is only one managing observer and
-     * multiple child observers running (by design) on the same thread.
-     */
-    class BreakPointObserver {
-    public:
-        /// Constructs the object such that it observes events of the given DebugContext.
-        explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context)
-            : context_weak(debug_context) {
-            std::unique_lock lock{debug_context->breakpoint_mutex};
-            debug_context->breakpoint_observers.push_back(this);
-        }
-
-        virtual ~BreakPointObserver() {
-            auto context = context_weak.lock();
-            if (context) {
-                {
-                    std::unique_lock lock{context->breakpoint_mutex};
-                    context->breakpoint_observers.remove(this);
-                }
-
-                // If we are the last observer to be destroyed, tell the debugger context that
-                // it is free to continue. In particular, this is required for a proper yuzu
-                // shutdown, when the emulation thread is waiting at a breakpoint.
-                if (context->breakpoint_observers.empty())
-                    context->Resume();
-            }
-        }
-
-        /**
-         * Action to perform when a breakpoint was reached.
-         * @param event Type of event which triggered the breakpoint
-         * @param data Optional data pointer (if unused, this is a nullptr)
-         * @note This function will perform nothing unless it is overridden in the child class.
-         */
-        virtual void OnMaxwellBreakPointHit(Event event, void* data) {}
-
-        /**
-         * Action to perform when emulation is resumed from a breakpoint.
-         * @note This function will perform nothing unless it is overridden in the child class.
-         */
-        virtual void OnMaxwellResume() {}
-
-    protected:
-        /**
-         * Weak context pointer. This need not be valid, so when requesting a shared_ptr via
-         * context_weak.lock(), always compare the result against nullptr.
-         */
-        std::weak_ptr<DebugContext> context_weak;
-    };
-
-    /**
-     * Simple structure defining a breakpoint state
-     */
-    struct BreakPoint {
-        bool enabled = false;
-    };
-
-    /**
-     * Static constructor used to create a shared_ptr of a DebugContext.
-     */
-    static std::shared_ptr<DebugContext> Construct() {
-        return std::shared_ptr<DebugContext>(new DebugContext);
-    }
-
-    /**
-     * Used by the emulation core when a given event has happened. If a breakpoint has been set
-     * for this event, OnEvent calls the event handlers of the registered breakpoint observers.
-     * The current thread then is halted until Resume() is called from another thread (or until
-     * emulation is stopped).
-     * @param event Event which has happened
-     * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until
-     * Resume() is called.
-     */
-    void OnEvent(Event event, void* data) {
-        // This check is left in the header to allow the compiler to inline it.
-        if (!breakpoints[(int)event].enabled)
-            return;
-        // For the rest of event handling, call a separate function.
-        DoOnEvent(event, data);
-    }
-
-    void DoOnEvent(Event event, void* data);
-
-    /**
-     * Resume from the current breakpoint.
-     * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock.
-     * Calling from any other thread is safe.
-     */
-    void Resume();
-
-    /**
-     * Delete all set breakpoints and resume emulation.
-     */
-    void ClearBreakpoints() {
-        for (auto& bp : breakpoints) {
-            bp.enabled = false;
-        }
-        Resume();
-    }
-
-    // TODO: Evaluate if access to these members should be hidden behind a public interface.
-    std::array<BreakPoint, static_cast<int>(Event::NumEvents)> breakpoints;
-    Event active_breakpoint{};
-    bool at_breakpoint = false;
-
-private:
-    /**
-     * Private default constructor to make sure people always construct this through Construct()
-     * instead.
-     */
-    DebugContext() = default;
-
-    /// Mutex protecting current breakpoint state and the observer list.
-    std::mutex breakpoint_mutex;
-
-    /// Used by OnEvent to wait for resumption.
-    std::condition_variable resume_from_breakpoint;
-
-    /// List of registered observers
-    std::list<BreakPointObserver*> breakpoint_observers;
-};
-
-} // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,7 +7,6 @@
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
-#include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
@@ -88,11 +87,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
        color_mask.A.Assign(1);
    }

-    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
+    // NVN games expect these values to be enabled at boot
+    regs.rasterize_enable = 1;
    regs.rt_separate_frag_data = 1;
-
-    // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
    regs.framebuffer_srgb = 1;
+
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
    mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
@@ -273,8 +272,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
 }

 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
-    auto debug_context = system.GetGPUDebugContext();
-
    const u32 method = method_call.method;

    if (method == cb_data_state.current) {
@@ -315,10 +312,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method < Regs::NUM_REGS,
               "Invalid Maxwell3D register, increase the size of the Regs structure");

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
-    }
-
    if (regs.reg_array[method] != method_call.argument) {
        regs.reg_array[method] = method_call.argument;
        const std::size_t dirty_reg = dirty_pointers[method];
@@ -424,10 +417,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    default:
        break;
    }
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
-    }
 }

 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
@@ -485,12 +474,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
    ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);

-    auto debug_context = system.GetGPUDebugContext();
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-    }
-
    // Both instance configuration registers can not be set at the same time.
    ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
               "Illegal combination of instancing parameters");
@@ -500,10 +483,6 @@ void Maxwell3D::FlushMMEInlineDraw() {
        rasterizer.DrawMultiBatch(is_indexed);
    }

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-    }
-
    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
    // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
    // it's possible that it is incorrect and that there is some other register used to specify the
@@ -650,12 +629,6 @@ void Maxwell3D::DrawArrays() {
              regs.vertex_buffer.count);
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");

-    auto debug_context = system.GetGPUDebugContext();
-
-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-    }
-
    // Both instance configuration registers can not be set at the same time.
    ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
               "Illegal combination of instancing parameters");
@@ -673,10 +646,6 @@ void Maxwell3D::DrawArrays() {
        rasterizer.DrawBatch(is_indexed);
    }

-    if (debug_context) {
-        debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
-    }
-
    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
    // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
    // it's possible that it is incorrect and that there is some other register used to specify the
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -310,6 +310,11 @@ public:
            }
        };

+        enum class DepthMode : u32 {
+            MinusOneToOne = 0,
+            ZeroToOne = 1,
+        };
+
        enum class PrimitiveTopology : u32 {
            Points = 0x0,
            Lines = 0x1,
@@ -491,11 +496,6 @@ public:
            INSERT_UNION_PADDING_WORDS(1);
        };

-        enum class DepthMode : u32 {
-            MinusOneToOne = 0,
-            ZeroToOne = 1,
-        };
-
        enum class TessellationPrimitive : u32 {
            Isolines = 0,
            Triangles = 1,
@@ -657,7 +657,11 @@ public:
                std::array<f32, 4> tess_level_outer;
                std::array<f32, 2> tess_level_inner;

-                INSERT_UNION_PADDING_WORDS(0x102);
+                INSERT_UNION_PADDING_WORDS(0x10);
+
+                u32 rasterize_enable;
+
+                INSERT_UNION_PADDING_WORDS(0xF1);

                u32 tfb_enabled;

@@ -676,7 +680,7 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_UNION_PADDING_WORDS(1);
+                DepthMode depth_mode;

                float clear_color[4];
                float clear_depth;
@@ -707,13 +711,15 @@ public:

                u32 color_mask_common;

-                INSERT_UNION_PADDING_WORDS(0x6);
-
-                u32 rt_separate_frag_data;
+                INSERT_UNION_PADDING_WORDS(0x2);

                f32 depth_bounds[2];

-                INSERT_UNION_PADDING_WORDS(0xA);
+                INSERT_UNION_PADDING_WORDS(0x2);
+
+                u32 rt_separate_frag_data;
+
+                INSERT_UNION_PADDING_WORDS(0xC);

                struct {
                    u32 address_high;
@@ -1012,7 +1018,14 @@ public:
                    }
                } instanced_arrays;

-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 8, u32> unk4;
+                } vp_point_size;
+
+                INSERT_UNION_PADDING_WORDS(1);

                Cull cull;

@@ -1030,7 +1043,12 @@ public:
                    BitField<4, 1, u32> depth_clamp_far;
                } view_volume_clip_control;

-                INSERT_UNION_PADDING_WORDS(0x21);
+                INSERT_UNION_PADDING_WORDS(0x1F);
+
+                u32 depth_bounds_enable;
+
+                INSERT_UNION_PADDING_WORDS(1);
+
                struct {
                    u32 enable;
                    LogicOperation operation;
@@ -1260,8 +1278,6 @@ public:

    } dirty{};

-    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
-
    /// Reads a register value located at the input method address
    u32 GetRegisterValue(u32 method) const;

@@ -1356,6 +1372,8 @@ private:

    bool execute_on{true};

+    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+
    /// Retrieves information about a specific TIC entry from the TIC buffer.
    Texture::TICEntry GetTICEntry(u32 tic_index) const;

@@ -1420,11 +1438,13 @@ ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tess_mode, 0xC8);
 ASSERT_REG_POSITION(tess_level_outer, 0xC9);
 ASSERT_REG_POSITION(tess_level_inner, 0xCD);
+ASSERT_REG_POSITION(rasterize_enable, 0xDF);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
 ASSERT_REG_POSITION(viewports, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(depth_mode, 0x35F);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
@@ -1438,7 +1458,7 @@ ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
-ASSERT_REG_POSITION(depth_bounds, 0x3EC);
+ASSERT_REG_POSITION(depth_bounds, 0x3E7);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1490,10 +1510,12 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
 ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(color_mask, 0x680);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
    Trunc = 11,
 };

+enum class AtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+};
+
 enum class UniformType : u64 {
    UnsignedByte = 0,
    SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
    Bits128 = 6,
 };

+enum class AtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    S64 = 3,
+};
+
 enum class IMinMaxExchange : u64 {
    None = 0,
    XLo = 1,
@@ -384,6 +403,15 @@ enum class IsberdMode : u64 {

 enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };

+enum class MembarType : u64 {
+    CTA = 0,
+    GL = 1,
+    SYS = 2,
+    VC = 3,
+};
+
+enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
+
 enum class HalfType : u64 {
    H0_H1 = 0,
    F32 = 1,
@@ -929,6 +957,16 @@ union Instruction {
        BitField<46, 2, u64> cache_mode;
    } stg;

+    union {
+        BitField<52, 4, AtomicOp> operation;
+        BitField<28, 2, AtomicType> type;
+        BitField<30, 22, s64> offset;
+
+        s32 GetImmediateOffset() const {
+            return static_cast<s32>(offset << 2);
+        }
+    } atoms;
+
    union {
        BitField<32, 1, PhysicalAttributeDirection> direction;
        BitField<47, 3, AttributeSize> size;
@@ -1042,7 +1080,7 @@ union Instruction {
        BitField<40, 1, R2pMode> mode;
        BitField<41, 2, u64> byte;
        BitField<20, 7, u64> immediate_mask;
-    } r2p;
+    } p2r_r2p;

    union {
        BitField<39, 3, u64> pred39;
@@ -1230,7 +1268,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<54, 2, u64> info;
+        BitField<54, 2, u64> offset_mode;
        BitField<56, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1242,9 +1280,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1256,7 +1294,7 @@ union Instruction {
        BitField<35, 1, u64> ndv_flag;
        BitField<49, 1, u64> nodep_flag;
        BitField<50, 1, u64> dc_flag;
-        BitField<33, 2, u64> info;
+        BitField<33, 2, u64> offset_mode;
        BitField<37, 2, u64> component;

        bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1268,9 +1306,9 @@ union Instruction {
            case TextureMiscMode::DC:
                return dc_flag != 0;
            case TextureMiscMode::AOFFI:
-                return info == 1;
+                return offset_mode == 1;
            case TextureMiscMode::PTP:
-                return info == 2;
+                return offset_mode == 2;
            default:
                break;
            }
@@ -1283,6 +1321,7 @@ union Instruction {
        BitField<50, 1, u64> dc_flag;
        BitField<51, 1, u64> aoffi_flag;
        BitField<52, 2, u64> component;
+        BitField<55, 1, u64> fp16_flag;

        bool UsesMiscMode(TextureMiscMode mode) const {
            switch (mode) {
@@ -1545,6 +1584,11 @@ union Instruction {
        BitField<47, 2, IsberdShift> shift;
    } isberd;

+    union {
+        BitField<8, 2, MembarType> type;
+        BitField<0, 2, MembarUnknown> unknown;
+    } membar;
+
    union {
        BitField<48, 1, u64> signed_a;
        BitField<38, 1, u64> is_byte_chunk_a;
@@ -1644,9 +1688,10 @@ public:
        ST_A,
        ST_L,
        ST_S,
-        ST,   // Store in generic memory
-        STG,  // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        ST,    // Store in generic memory
+        STG,   // Store in global memory
+        ATOMS, // Atomic operation on shared memory
+        AL2P,  // Transforms attribute memory into physical memory
        TEX,
        TEX_B,  // Texture Load Bindless
        TXQ,    // Texture Query
@@ -1669,6 +1714,7 @@ public:
        IPA,
        OUT_R, // Emit vertex/primitive
        ISBERD,
+        MEMBAR,
        VMAD,
        VSETP,
        FFMA_IMM, // Fused Multiply and Add
@@ -1785,6 +1831,7 @@ public:
        PSET,
        CSETP,
        R2P_IMM,
+        P2R_IMM,
        XMAD_IMM,
        XMAD_CR,
        XMAD_RC,
@@ -1930,7 +1977,7 @@ private:
            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
+            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1947,6 +1994,7 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("101-------------", Id::ST, Type::Memory, "ST"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
@@ -1957,7 +2005,7 @@ private:
            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
@@ -1969,6 +2017,7 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -2089,6 +2138,7 @@ private:
            INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
            INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
            INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
+            INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
            INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
            INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
            INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
    // Synced GPU, is always in sync
    if (!is_async) {
        return;
    }
    MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }

 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
    syncpoints[syncpoint_id]++;
    std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
    if (!syncpt_interrupts[syncpoint_id].empty()) {
        u32 value = syncpoints[syncpoint_id].load();
        auto it = syncpt_interrupts[syncpoint_id].begin();
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
    virtual void WaitIdle() const = 0;

    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);

    void IncrementSyncPoint(u32 syncpoint_id);

@@ -312,6 +313,8 @@ private:

    std::mutex sync_mutex;

+    std::condition_variable sync_cv;
+
    const bool is_async;
 };

--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -5,6 +5,7 @@
 #include <mutex>

 #include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>

 #include "common/assert.h"
 #include "common/common_types.h"
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <array>
 #include <cstddef>
+#include <cstring>
 #include <optional>
 #include <vector>

@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin

 Device::Device() : base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::vector extensions = GetExtensions();

    const bool is_nvidia = vendor == "NVIDIA Corporation";
    const bool is_amd = vendor == "ATI Technologies Inc.";
    const bool is_intel = vendor == "Intel";
+    const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;

    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_variable_aoffi = TestVariableAoffi();
    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_broken_compute = is_intel;
+    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -271,12 +271,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            case Maxwell::ShaderProgram::Geometry:
                shader_program_manager->UseTrivialGeometryShader();
                break;
+            case Maxwell::ShaderProgram::Fragment:
+                shader_program_manager->UseTrivialFragmentShader();
+                break;
            default:
                break;
            }
            continue;
        }

+        // Currently this stages are not supported in the OpenGL backend.
+        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+        if (program == Maxwell::ShaderProgram::TesselationControl) {
+            continue;
+        } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+            continue;
+        }
+
        Shader shader{shader_cache.GetStageProgram(program)};

        // Stage indices are 0 - 5
@@ -506,6 +517,7 @@ void RasterizerOpenGL::Clear() {
    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);

    SyncViewport(clear_state);
+    SyncRasterizeEnable(clear_state);
    if (regs.clear_flags.scissor) {
        SyncScissorTest(clear_state);
    }
@@ -533,6 +545,7 @@ void RasterizerOpenGL::Clear() {
 void RasterizerOpenGL::DrawPrelude() {
    auto& gpu = system.GPU().Maxwell3D();

+    SyncRasterizeEnable(state);
    SyncColorMask();
    SyncFragmentColorClampState();
    SyncMultiSampleState();
@@ -1028,6 +1041,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        flip_y = !flip_y;
    }
    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+    state.clip_control.depth_mode =
+        regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+            ? GL_ZERO_TO_ONE
+            : GL_NEGATIVE_ONE_TO_ONE;
 }

 void RasterizerOpenGL::SyncClipEnabled(
@@ -1121,6 +1138,11 @@ void RasterizerOpenGL::SyncStencilTestState() {
    }
 }

+void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    current_state.rasterizer_discard = regs.rasterize_enable == 0;
+}
+
 void RasterizerOpenGL::SyncColorMask() {
    auto& maxwell3d = system.GPU().Maxwell3D();
    if (!maxwell3d.dirty.color_mask) {
@@ -1250,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
    const auto& regs = system.GPU().Maxwell3D().regs;
    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
    // in OpenGL).
+    state.point.program_control = regs.vp_point_size.enable != 0;
    state.point.size = std::max(1.0f, regs.point_size);
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -168,6 +168,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs the rasterizer enable state to match the guest state
+    void SyncRasterizeEnable(OpenGLState& current_state);
+
    /// Syncs Color Mask
    void SyncColorMask();

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;

 namespace {

-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;

@@ -112,25 +109,25 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
 }

 /// Describes primitive behavior on geometry shaders
-constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
    switch (primitive_mode) {
    case GL_POINTS:
-        return {"points", "Points", 1};
+        return {"points", 1};
    case GL_LINES:
    case GL_LINE_STRIP:
-        return {"lines", "Lines", 2};
+        return {"lines", 2};
    case GL_LINES_ADJACENCY:
    case GL_LINE_STRIP_ADJACENCY:
-        return {"lines_adjacency", "LinesAdj", 4};
+        return {"lines_adjacency", 4};
    case GL_TRIANGLES:
    case GL_TRIANGLE_STRIP:
    case GL_TRIANGLE_FAN:
-        return {"triangles", "Triangles", 3};
+        return {"triangles", 3};
    case GL_TRIANGLES_ADJACENCY:
    case GL_TRIANGLE_STRIP_ADJACENCY:
-        return {"triangles_adjacency", "TrianglesAdj", 6};
+        return {"triangles_adjacency", 6};
    default:
-        return {"points", "Invalid", 1};
+        return {"points", 1};
    }
 }

@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
    if (!code_b.empty()) {
        ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
    }
-    const auto entries = GLShader::GetEntries(ir);

    std::string source = fmt::format(R"(// {}
 #version 430 core
@@ -264,29 +260,24 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
                  "#extension GL_NV_shader_thread_group : require\n"
                  "#extension GL_NV_shader_thread_shuffle : require\n";
    }
-    source += '\n';

    if (shader_type == ShaderType::Geometry) {
-        const auto [glsl_topology, debug_name, max_vertices] =
-            GetPrimitiveDescription(variant.primitive_mode);
-
-        source += fmt::format("layout ({}) in;\n\n", glsl_topology);
+        const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
        source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
+        source += fmt::format("layout ({}) in;\n", glsl_topology);
    }
    if (shader_type == ShaderType::Compute) {
+        if (variant.local_memory_size > 0) {
+            source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
+                                  Common::AlignUp(variant.local_memory_size, 4) / 4);
+        }
        source +=
            fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
                        variant.block_x, variant.block_y, variant.block_z);

        if (variant.shared_memory_size > 0) {
-            // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
-            // avoids out of bound stores. Find out why shared memory size is being invalid.
-            source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
-        }
-
-        if (variant.local_memory_size > 0) {
-            source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
-                                  Common::AlignUp(variant.local_memory_size, 4) / 4);
+            // shared_memory_size is described in number of words
+            source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
        }
    }

@@ -319,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {

 CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
                           GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
-    : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
-      device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
-      shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, shader_type{shader_type},
+      entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
    if (!params.precompiled_variants) {
        return;
    }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -48,10 +48,10 @@ class ExprDecompiler;

 enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };

-struct TextureAoffi {};
+struct TextureOffset {};
 struct TextureDerivates {};
 using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
+using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;

 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
    static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -399,6 +399,7 @@ public:
        DeclareConstantBuffers();
        DeclareGlobalMemory();
        DeclareSamplers();
+        DeclareImages();
        DeclarePhysicalAttributeReader();

        code.AddLine("void execute_{}() {{", suffix);
@@ -750,6 +751,9 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            if (operation_index >= operation_decompilers.size()) {
                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -871,6 +875,9 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
            ++code.scope;
@@ -1076,7 +1083,7 @@ private:
    }

    std::string GenerateTexture(Operation operation, const std::string& function_suffix,
-                                const std::vector<TextureIR>& extras) {
+                                const std::vector<TextureIR>& extras, bool separate_dc = false) {
        constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1089,9 +1096,12 @@ private:
        std::string expr = "texture" + function_suffix;
        if (!meta->aoffi.empty()) {
            expr += "Offset";
+        } else if (!meta->ptp.empty()) {
+            expr += "Offsets";
        }
        expr += '(' + GetSampler(meta->sampler) + ", ";
-        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) +
+                                      (has_shadow && !separate_dc ? 1 : 0) - 1);
        expr += '(';
        for (std::size_t i = 0; i < count; ++i) {
            expr += Visit(operation[i]).AsFloat();
@@ -1104,15 +1114,24 @@ private:
            expr += ", float(" + Visit(meta->array).AsInt() + ')';
        }
        if (has_shadow) {
-            expr += ", " + Visit(meta->depth_compare).AsFloat();
+            if (separate_dc) {
+                expr += "), " + Visit(meta->depth_compare).AsFloat();
+            } else {
+                expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
+            }
+        } else {
+            expr += ')';
        }
-        expr += ')';

        for (const auto& variant : extras) {
            if (const auto argument = std::get_if<TextureArgument>(&variant)) {
                expr += GenerateTextureArgument(*argument);
-            } else if (std::holds_alternative<TextureAoffi>(variant)) {
-                expr += GenerateTextureAoffi(meta->aoffi);
+            } else if (std::holds_alternative<TextureOffset>(variant)) {
+                if (!meta->aoffi.empty()) {
+                    expr += GenerateTextureAoffi(meta->aoffi);
+                } else if (!meta->ptp.empty()) {
+                    expr += GenerateTexturePtp(meta->ptp);
+                }
            } else if (std::holds_alternative<TextureDerivates>(variant)) {
                expr += GenerateTextureDerivates(meta->derivates);
            } else {
@@ -1153,6 +1172,20 @@ private:
        return expr;
    }

+    std::string ReadTextureOffset(const Node& value) {
+        if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
+            // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+            // to be constant by the standard).
+            return std::to_string(static_cast<s32>(immediate->GetValue()));
+        } else if (device.HasVariableAoffi()) {
+            // Avoid using variable AOFFI on unsupported devices.
+            return Visit(value).AsInt();
+        } else {
+            // Insert 0 on devices not supporting variable AOFFI.
+            return "0";
+        }
+    }
+
    std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
        if (aoffi.empty()) {
            return {};
@@ -1163,18 +1196,7 @@ private:
        expr += '(';

        for (std::size_t index = 0; index < aoffi.size(); ++index) {
-            const auto operand{aoffi.at(index)};
-            if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
-                // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
-                // to be constant by the standard).
-                expr += std::to_string(static_cast<s32>(immediate->GetValue()));
-            } else if (device.HasVariableAoffi()) {
-                // Avoid using variable AOFFI on unsupported devices.
-                expr += Visit(operand).AsInt();
-            } else {
-                // Insert 0 on devices not supporting variable AOFFI.
-                expr += '0';
-            }
+            expr += ReadTextureOffset(aoffi.at(index));
            if (index + 1 < aoffi.size()) {
                expr += ", ";
            }
@@ -1184,6 +1206,20 @@ private:
        return expr;
    }

+    std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
+        static constexpr std::size_t num_vectors = 4;
+        ASSERT(ptp.size() == num_vectors * 2);
+
+        std::string expr = ", ivec2[](";
+        for (std::size_t vector = 0; vector < num_vectors; ++vector) {
+            const bool has_next = vector + 1 < num_vectors;
+            expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
+                                ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
+        }
+        expr += ')';
+        return expr;
+    }
+
    std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
        if (derivates.empty()) {
            return {};
@@ -1682,7 +1718,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+            operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1694,7 +1730,7 @@ private:
        ASSERT(meta);

        std::string expr = GenerateTexture(
-            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1702,13 +1738,18 @@ private:
    }

    Expression TextureGather(Operation operation) {
-        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        ASSERT(meta);
+        const auto& meta = std::get<MetaTexture>(operation.GetMeta());

-        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
-        return {GenerateTexture(operation, "Gather",
-                                {TextureAoffi{}, TextureArgument{type, meta->component}}) +
-                    GetSwizzle(meta->element),
+        const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
+        const bool separate_dc = meta.sampler.IsShadow();
+
+        std::vector<TextureIR> ir;
+        if (meta.sampler.IsShadow()) {
+            ir = {TextureOffset{}};
+        } else {
+            ir = {TextureOffset{}, TextureArgument{type, meta.component}};
+        }
+        return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
                Type::Float};
    }

@@ -1780,7 +1821,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
+        std::string expr =
+            GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
        return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
    }

@@ -1814,6 +1856,16 @@ private:
                Type::Uint};
    }

+    template <const std::string_view& opname, Type type>
+    Expression Atomic(Operation operation) {
+        ASSERT(stage == ShaderType::Compute);
+        auto& smem = std::get<SmemNode>(*operation[0]);
+
+        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+                            Visit(operation[1]).As(type)),
+                type};
+    }
+
    Expression Branch(Operation operation) {
        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
        UNIMPLEMENTED_IF(!target);
@@ -1992,6 +2044,11 @@ private:
        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
    }

+    Expression MemoryBarrierGL(Operation) {
+        code.AddLine("memoryBarrier();");
+        return {};
+    }
+
    struct Func final {
        Func() = delete;
        ~Func() = delete;
@@ -2147,6 +2204,8 @@ private:
        &GLSLDecompiler::AtomicImage<Func::Xor>,
        &GLSLDecompiler::AtomicImage<Func::Exchange>,

+        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
        &GLSLDecompiler::Branch,
        &GLSLDecompiler::BranchIndirect,
        &GLSLDecompiler::PushFlowStack,
@@ -2173,6 +2232,8 @@ private:

        &GLSLDecompiler::ThreadId,
        &GLSLDecompiler::ShuffleIndexed,
+
+        &GLSLDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

@@ -2264,7 +2325,7 @@ public:
    explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}

    void operator()(const ExprAnd& expr) {
-        inner += "( ";
+        inner += '(';
        std::visit(*this, *expr.operand1);
        inner += " && ";
        std::visit(*this, *expr.operand2);
@@ -2272,7 +2333,7 @@ public:
    }

    void operator()(const ExprOr& expr) {
-        inner += "( ";
+        inner += '(';
        std::visit(*this, *expr.operand1);
        inner += " || ";
        std::visit(*this, *expr.operand2);
@@ -2290,28 +2351,7 @@ public:
    }

    void operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        std::string target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = "false";
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = "true";
-                break;
-            default:
-                target = decomp.GetPredicate(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.GetInternalFlag(flag->GetFlag());
-        } else {
-            UNREACHABLE();
-        }
-        inner += target;
+        inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
    }

    void operator()(const ExprVar& expr) {
@@ -2323,8 +2363,7 @@ public:
    }

    void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
-        inner +=
-            "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+        inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
    }

    const std::string& GetResult() const {
@@ -2332,8 +2371,8 @@ public:
    }

 private:
-    std::string inner;
    GLSLDecompiler& decomp;
+    std::string inner;
 };

 class ASTDecompiler {
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -50,6 +50,10 @@ public:
        current_state.geometry_shader = 0;
    }

+    void UseTrivialFragmentShader() {
+        current_state.fragment_shader = 0;
+    }
+
 private:
    struct PipelineState {
        bool operator==(const PipelineState& rhs) const {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
 }

 void OpenGLState::ApplyPointSize() {
+    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
    if (UpdateValue(cur_state.point.size, point.size)) {
        glPointSize(point.size);
    }
@@ -182,6 +183,10 @@ void OpenGLState::ApplyCulling() {
    }
 }

+void OpenGLState::ApplyRasterizerDiscard() {
+    Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
+}
+
 void OpenGLState::ApplyColorMask() {
    if (!dirty.color_mask) {
        return;
@@ -411,8 +416,9 @@ void OpenGLState::ApplyAlphaTest() {
 }

 void OpenGLState::ApplyClipControl() {
-    if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
-        glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+    if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
+                  std::tie(clip_control.origin, clip_control.depth_mode))) {
+        glClipControl(clip_control.origin, clip_control.depth_mode);
    }
 }

@@ -454,6 +460,7 @@ void OpenGLState::Apply() {
    ApplyPointSize();
    ApplyFragmentColorClamp();
    ApplyMultisample();
+    ApplyRasterizerDiscard();
    ApplyColorMask();
    ApplyDepthClamp();
    ApplyViewport();
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -48,6 +48,8 @@ public:
        GLuint index = 0;
    } primitive_restart; // GL_PRIMITIVE_RESTART

+    bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
+
    struct ColorMask {
        GLboolean red_enabled = GL_TRUE;
        GLboolean green_enabled = GL_TRUE;
@@ -56,6 +58,7 @@ public:
    };
    std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
        color_mask; // GL_COLOR_WRITEMASK
+
    struct {
        bool test_enabled = false; // GL_STENCIL_TEST
        struct {
@@ -128,7 +131,8 @@ public:
    std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;

    struct {
-        float size = 1.0f; // GL_POINT_SIZE
+        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        GLfloat size = 1.0f;          // GL_POINT_SIZE
    } point;

    struct {
@@ -150,6 +154,7 @@ public:

    struct {
        GLenum origin = GL_LOWER_LEFT;
+        GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
    } clip_control;

    OpenGLState();
@@ -173,6 +178,7 @@ public:
    void ApplyMultisample();
    void ApplySRgb();
    void ApplyCulling();
+    void ApplyRasterizerDiscard();
    void ApplyColorMask();
    void ApplyDepth();
    void ApplyPrimitiveRestart();
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {

 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
    {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
        glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        u8* const mip_data = staging_buffer.data() + mip_offset;
+        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
        if (is_compressed) {
-            glGetCompressedTextureImage(texture.handle, level,
-                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                                        staging_buffer.data() + mip_offset);
+            glGetCompressedTextureImage(texture.handle, level, size, mip_data);
        } else {
-            glGetTextureImage(texture.handle, level, format, type,
-                              static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                              staging_buffer.data() + mip_offset);
+            glGetTextureImage(texture.handle, level, format, type, size, mip_data);
        }
    }
 }
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_POINTS;
    case Maxwell::PrimitiveTopology::Lines:
        return GL_LINES;
+    case Maxwell::PrimitiveTopology::LineLoop:
+        return GL_LINE_LOOP;
    case Maxwell::PrimitiveTopology::LineStrip:
        return GL_LINE_STRIP;
    case Maxwell::PrimitiveTopology::Triangles:
@@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_TRIANGLE_FAN;
    case Maxwell::PrimitiveTopology::Quads:
        return GL_QUADS;
-    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
-        UNREACHABLE();
-        return {};
+    case Maxwell::PrimitiveTopology::QuadStrip:
+        return GL_QUAD_STRIP;
+    case Maxwell::PrimitiveTopology::Polygon:
+        return GL_POLYGON;
+    case Maxwell::PrimitiveTopology::LinesAdjacency:
+        return GL_LINES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::LineStripAdjacency:
+        return GL_LINE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+        return GL_TRIANGLES_ADJACENCY;
+    case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+        return GL_TRIANGLE_STRIP_ADJACENCY;
+    case Maxwell::PrimitiveTopology::Patches:
+        return GL_PATCHES;
    }
+    UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology));
+    return GL_POINTS;
 }

 inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,19 +24,21 @@

 namespace OpenGL {

-static const char vertex_shader[] = R"(
-#version 150 core
+namespace {

-in vec2 vert_position;
-in vec2 vert_tex_coord;
-out vec2 frag_tex_coord;
+constexpr char vertex_shader[] = R"(
+#version 430 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;

 // This is a truncated 3x3 matrix for 2D transformations:
 // The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
 // The third column performs translation.
 // The third row could be used for projection, which we don't need in 2D. It hence is assumed to
 // implicitly be [0, 0, 1]
-uniform mat3x2 modelview_matrix;
+layout (location = 0) uniform mat3x2 modelview_matrix;

 void main() {
    // Multiply input position by the rotscale part of the matrix and then manually translate by
@@ -47,34 +49,29 @@ void main() {
 }
 )";

-static const char fragment_shader[] = R"(
-#version 150 core
+constexpr char fragment_shader[] = R"(
+#version 430 core

-in vec2 frag_tex_coord;
-out vec4 color;
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;

-uniform sampler2D color_texture;
+layout (binding = 0) uniform sampler2D color_texture;

 void main() {
-    // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
-    // support more framebuffer pixel formats.
    color = texture(color_texture, frag_tex_coord);
 }
 )";

-/**
- * Vertex structure that the drawn screen rectangles are composed of.
- */
-struct ScreenRectVertex {
-    ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) {
-        position[0] = x;
-        position[1] = y;
-        tex_coord[0] = u;
-        tex_coord[1] = v;
-    }
+constexpr GLint PositionLocation = 0;
+constexpr GLint TexCoordLocation = 1;
+constexpr GLint ModelViewMatrixLocation = 0;

-    GLfloat position[2];
-    GLfloat tex_coord[2];
+struct ScreenRectVertex {
+    constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
+        : position{{x, y}}, tex_coord{{u, v}} {}
+
+    std::array<GLfloat, 2> position;
+    std::array<GLfloat, 2> tex_coord;
 };

 /**
@@ -84,18 +81,82 @@ struct ScreenRectVertex {
 * The projection part of the matrix is trivial, hence these operations are represented
 * by a 3x2 matrix.
 */
-static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
+std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) {
    std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order

    // clang-format off
-    matrix[0] = 2.f / width; matrix[2] = 0.f;           matrix[4] = -1.f;
-    matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] = 1.f;
+    matrix[0] = 2.f / width; matrix[2] =  0.f;          matrix[4] = -1.f;
+    matrix[1] = 0.f;         matrix[3] = -2.f / height; matrix[5] =  1.f;
    // Last matrix row is implicitly assumed to be [0, 0, 1].
    // clang-format on

    return matrix;
 }

+const char* GetSource(GLenum source) {
+    switch (source) {
+    case GL_DEBUG_SOURCE_API:
+        return "API";
+    case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
+        return "WINDOW_SYSTEM";
+    case GL_DEBUG_SOURCE_SHADER_COMPILER:
+        return "SHADER_COMPILER";
+    case GL_DEBUG_SOURCE_THIRD_PARTY:
+        return "THIRD_PARTY";
+    case GL_DEBUG_SOURCE_APPLICATION:
+        return "APPLICATION";
+    case GL_DEBUG_SOURCE_OTHER:
+        return "OTHER";
+    default:
+        UNREACHABLE();
+        return "Unknown source";
+    }
+}
+
+const char* GetType(GLenum type) {
+    switch (type) {
+    case GL_DEBUG_TYPE_ERROR:
+        return "ERROR";
+    case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
+        return "DEPRECATED_BEHAVIOR";
+    case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
+        return "UNDEFINED_BEHAVIOR";
+    case GL_DEBUG_TYPE_PORTABILITY:
+        return "PORTABILITY";
+    case GL_DEBUG_TYPE_PERFORMANCE:
+        return "PERFORMANCE";
+    case GL_DEBUG_TYPE_OTHER:
+        return "OTHER";
+    case GL_DEBUG_TYPE_MARKER:
+        return "MARKER";
+    default:
+        UNREACHABLE();
+        return "Unknown type";
+    }
+}
+
+void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
+                           const GLchar* message, const void* user_param) {
+    const char format[] = "{} {} {}: {}";
+    const char* const str_source = GetSource(source);
+    const char* const str_type = GetType(type);
+
+    switch (severity) {
+    case GL_DEBUG_SEVERITY_HIGH:
+        LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    case GL_DEBUG_SEVERITY_MEDIUM:
+        LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    case GL_DEBUG_SEVERITY_NOTIFICATION:
+    case GL_DEBUG_SEVERITY_LOW:
+        LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
+        break;
+    }
+}
+
+} // Anonymous namespace
+
 RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
    : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}

@@ -138,9 +199,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    prev_state.Apply();
 }

-/**
- * Loads framebuffer from emulated memory into the active OpenGL texture.
- */
 void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
    // Framebuffer orientation handling
    framebuffer_transform_flags = framebuffer.transform_flags;
@@ -181,19 +239,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }

-/**
- * Fills active OpenGL texture with the given RGB color. Since the color is solid, the texture can
- * be 1x1 but will stretch across whatever it's rendered on.
- */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                const TextureInfo& texture) {
    const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
    glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 }

-/**
- * Initializes the OpenGL state and creates persistent objects.
- */
 void RendererOpenGL::InitOpenGLObjects() {
    glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
                 0.0f);
@@ -203,10 +254,6 @@ void RendererOpenGL::InitOpenGLObjects() {
    state.draw.shader_program = shader.handle;
    state.AllDirty();
    state.Apply();
-    uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
-    uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
-    attrib_position = glGetAttribLocation(shader.handle, "vert_position");
-    attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");

    // Generate VBO handle for drawing
    vertex_buffer.Create();
@@ -217,14 +264,14 @@ void RendererOpenGL::InitOpenGLObjects() {

    // Attach vertex data to VAO
    glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
+    glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
                              offsetof(ScreenRectVertex, position));
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
+    glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
                              offsetof(ScreenRectVertex, tex_coord));
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
+    glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
+    glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
+    glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
+    glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
    glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
                              sizeof(ScreenRectVertex));

@@ -331,18 +378,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
                  static_cast<f32>(screen_info.texture.height);
    }

-    std::array<ScreenRectVertex, 4> vertices = {{
+    const std::array vertices = {
        ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
        ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
        ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
        ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
-    }};
+    };

    state.textures[0] = screen_info.display_texture;
    state.framebuffer_srgb.enabled = screen_info.display_srgb;
    state.AllDirty();
    state.Apply();
-    glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
+    glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
    // Restore default state
    state.framebuffer_srgb.enabled = false;
@@ -351,9 +398,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
    state.Apply();
 }

-/**
- * Draws the emulated screens to the emulator window.
- */
 void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
    if (renderer_settings.set_background_color) {
        // Update background color before drawing
@@ -367,21 +411,17 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
    glClear(GL_COLOR_BUFFER_BIT);

    // Set projection matrix
-    std::array<GLfloat, 3 * 2> ortho_matrix =
-        MakeOrthographicMatrix((float)layout.width, (float)layout.height);
-    glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
+    const std::array ortho_matrix =
+        MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+    glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());

-    // Bind texture in Texture Unit 0
-    glActiveTexture(GL_TEXTURE0);
-    glUniform1i(uniform_color_texture, 0);
-
-    DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top,
-                        (float)screen.GetWidth(), (float)screen.GetHeight());
+    DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
+                        static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
+                        static_cast<float>(screen.GetHeight()));

    m_current_frame++;
 }

-/// Updates the framerate
 void RendererOpenGL::UpdateFramerate() {}

 void RendererOpenGL::CaptureScreenshot() {
@@ -418,63 +458,6 @@ void RendererOpenGL::CaptureScreenshot() {
    renderer_settings.screenshot_requested = false;
 }

-static const char* GetSource(GLenum source) {
-#define RET(s)                                                                                     \
-    case GL_DEBUG_SOURCE_##s:                                                                      \
-        return #s
-    switch (source) {
-        RET(API);
-        RET(WINDOW_SYSTEM);
-        RET(SHADER_COMPILER);
-        RET(THIRD_PARTY);
-        RET(APPLICATION);
-        RET(OTHER);
-    default:
-        UNREACHABLE();
-        return "Unknown source";
-    }
-#undef RET
-}
-
-static const char* GetType(GLenum type) {
-#define RET(t)                                                                                     \
-    case GL_DEBUG_TYPE_##t:                                                                        \
-        return #t
-    switch (type) {
-        RET(ERROR);
-        RET(DEPRECATED_BEHAVIOR);
-        RET(UNDEFINED_BEHAVIOR);
-        RET(PORTABILITY);
-        RET(PERFORMANCE);
-        RET(OTHER);
-        RET(MARKER);
-    default:
-        UNREACHABLE();
-        return "Unknown type";
-    }
-#undef RET
-}
-
-static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
-                                  GLsizei length, const GLchar* message, const void* user_param) {
-    const char format[] = "{} {} {}: {}";
-    const char* const str_source = GetSource(source);
-    const char* const str_type = GetType(type);
-
-    switch (severity) {
-    case GL_DEBUG_SEVERITY_HIGH:
-        LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    case GL_DEBUG_SEVERITY_MEDIUM:
-        LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    case GL_DEBUG_SEVERITY_NOTIFICATION:
-    case GL_DEBUG_SEVERITY_LOW:
-        LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message);
-        break;
-    }
-}
-
 bool RendererOpenGL::Init() {
    Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

@@ -495,7 +478,6 @@ bool RendererOpenGL::Init() {
    return true;
 }

-/// Shutdown the renderer
 void RendererOpenGL::ShutDown() {}

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -59,21 +59,31 @@ public:
    void ShutDown() override;

 private:
+    /// Initializes the OpenGL state and creates persistent objects.
    void InitOpenGLObjects();
+
    void AddTelemetryFields();
+
    void CreateRasterizer();

    void ConfigureFramebufferTexture(TextureInfo& texture,
                                     const Tegra::FramebufferConfig& framebuffer);
+
+    /// Draws the emulated screens to the emulator window.
    void DrawScreen(const Layout::FramebufferLayout& layout);
+
    void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
+
+    /// Updates the framerate.
    void UpdateFramerate();

    void CaptureScreenshot();

-    // Loads framebuffer from emulated memory into the display information structure
+    /// Loads framebuffer from emulated memory into the active OpenGL texture.
    void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
-    // Fills active OpenGL texture with the given RGBA color.
+
+    /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture
+    /// can be 1x1 but will stretch across whatever it's rendered on.
    void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                    const TextureInfo& texture);

@@ -94,14 +104,6 @@ private:
    /// OpenGL framebuffer data
    std::vector<u8> gl_framebuffer_data;

-    // Shader uniform location indices
-    GLuint uniform_modelview_matrix;
-    GLuint uniform_color_texture;
-
-    // Shader attribute input indices
-    GLuint attrib_position;
-    GLuint attrib_tex_coord;
-
    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
    Common::Rectangle<int> framebuffer_crop_rect;
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
 #include <vector>

 #include <fmt/format.h>
-
 #include <glad/glad.h>

-#include "common/assert.h"
 #include "common/common_types.h"
-#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/utils.h"

 namespace OpenGL {

+struct VertexArrayPushBuffer::Entry {
+    GLuint binding_index{};
+    const GLuint* buffer{};
+    GLintptr offset{};
+    GLsizei stride{};
+};
+
 VertexArrayPushBuffer::VertexArrayPushBuffer() = default;

 VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
    }
 }

+struct BindBuffersRangePushBuffer::Entry {
+    GLuint binding;
+    const GLuint* buffer;
+    GLintptr offset;
+    GLsizeiptr size;
+};
+
 BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}

 BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
    void Bind();

 private:
-    struct Entry {
-        GLuint binding_index{};
-        const GLuint* buffer{};
-        GLintptr offset{};
-        GLsizei stride{};
-    };
+    struct Entry;

    GLuint vao{};
    const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
    void Bind();

 private:
-    struct Entry {
-        GLuint binding;
-        const GLuint* buffer;
-        GLintptr offset;
-        GLsizeiptr size;
-    };
+    struct Entry;

    GLenum target;
    std::vector<Entry> entries;
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -0,0 +1,302 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <tuple>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+
+namespace Vulkan {
+
+namespace {
+
+constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) {
+    const FixedPipelineState::StencilFace front_stencil(
+        regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass,
+        regs.stencil_front_func_func);
+    const FixedPipelineState::StencilFace back_stencil =
+        regs.stencil_two_side_enable
+            ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail,
+                                              regs.stencil_back_op_zpass,
+                                              regs.stencil_back_func_func)
+            : front_stencil;
+    return FixedPipelineState::DepthStencil(
+        regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1,
+        regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil);
+}
+
+constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) {
+    return FixedPipelineState::InputAssembly(
+        regs.draw.topology, regs.primitive_restart.enabled,
+        regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f);
+}
+
+constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState(
+    const Maxwell& regs, std::size_t render_target) {
+    const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target];
+    const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0};
+
+    const FixedPipelineState::BlendingAttachment default_blending(
+        false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
+        Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One,
+        Maxwell::Blend::Factor::Zero, components);
+    if (render_target >= regs.rt_control.count) {
+        return default_blending;
+    }
+
+    if (!regs.independent_blend_enable) {
+        const auto& src = regs.blend;
+        if (!src.enable[render_target]) {
+            return default_blending;
+        }
+        return FixedPipelineState::BlendingAttachment(
+            true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
+            src.factor_source_a, src.factor_dest_a, components);
+    }
+
+    if (!regs.blend.enable[render_target]) {
+        return default_blending;
+    }
+    const auto& src = regs.independent_blend[render_target];
+    return FixedPipelineState::BlendingAttachment(
+        true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a,
+        src.factor_source_a, src.factor_dest_a, components);
+}
+
+constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) {
+    return FixedPipelineState::ColorBlending(
+        {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a},
+        regs.rt_control.count,
+        {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1),
+         GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3),
+         GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5),
+         GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)});
+}
+
+constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) {
+    return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim,
+                                            regs.tess_mode.spacing, regs.tess_mode.cw != 0);
+}
+
+constexpr std::size_t Point = 0;
+constexpr std::size_t Line = 1;
+constexpr std::size_t Polygon = 2;
+constexpr std::array PolygonOffsetEnableLUT = {
+    Point,   // Points
+    Line,    // Lines
+    Line,    // LineLoop
+    Line,    // LineStrip
+    Polygon, // Triangles
+    Polygon, // TriangleStrip
+    Polygon, // TriangleFan
+    Polygon, // Quads
+    Polygon, // QuadStrip
+    Polygon, // Polygon
+    Line,    // LinesAdjacency
+    Line,    // LineStripAdjacency
+    Polygon, // TrianglesAdjacency
+    Polygon, // TriangleStripAdjacency
+    Polygon, // Patches
+};
+
+constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) {
+    const std::array enabled_lut = {regs.polygon_offset_point_enable,
+                                    regs.polygon_offset_line_enable,
+                                    regs.polygon_offset_fill_enable};
+    const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
+    const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];
+
+    const auto& clip = regs.view_volume_clip_control;
+    const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
+
+    Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
+    if (regs.screen_y_control.triangle_rast_flip != 0 &&
+        regs.viewport_transform[0].scale_y > 0.0f) {
+        if (front_face == Maxwell::Cull::FrontFace::CounterClockWise)
+            front_face = Maxwell::Cull::FrontFace::ClockWise;
+        else if (front_face == Maxwell::Cull::FrontFace::ClockWise)
+            front_face = Maxwell::Cull::FrontFace::CounterClockWise;
+    }
+
+    const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
+                                          depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
+                                          front_face);
+}
+
+} // Anonymous namespace
+
+std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept {
+    return (index << stride) ^ divisor;
+}
+
+bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept {
+    return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor);
+}
+
+std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept {
+    return static_cast<std::size_t>(index) ^ (static_cast<std::size_t>(buffer) << 13) ^
+           (static_cast<std::size_t>(type) << 22) ^ (static_cast<std::size_t>(size) << 31) ^
+           (static_cast<std::size_t>(offset) << 36);
+}
+
+bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept {
+    return std::tie(index, buffer, type, size, offset) ==
+           std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset);
+}
+
+std::size_t FixedPipelineState::StencilFace::Hash() const noexcept {
+    return static_cast<std::size_t>(action_stencil_fail) ^
+           (static_cast<std::size_t>(action_depth_fail) << 4) ^
+           (static_cast<std::size_t>(action_depth_fail) << 20) ^
+           (static_cast<std::size_t>(action_depth_pass) << 36);
+}
+
+bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept {
+    return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) ==
+           std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass,
+                    rhs.test_func);
+}
+
+std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept {
+    return static_cast<std::size_t>(enable) ^ (static_cast<std::size_t>(rgb_equation) << 5) ^
+           (static_cast<std::size_t>(src_rgb_func) << 10) ^
+           (static_cast<std::size_t>(dst_rgb_func) << 15) ^
+           (static_cast<std::size_t>(a_equation) << 20) ^
+           (static_cast<std::size_t>(src_a_func) << 25) ^
+           (static_cast<std::size_t>(dst_a_func) << 30) ^
+           (static_cast<std::size_t>(components[0]) << 35) ^
+           (static_cast<std::size_t>(components[1]) << 36) ^
+           (static_cast<std::size_t>(components[2]) << 37) ^
+           (static_cast<std::size_t>(components[3]) << 38);
+}
+
+bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const
+    noexcept {
+    return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func,
+                    dst_a_func, components) ==
+           std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func,
+                    rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components);
+}
+
+std::size_t FixedPipelineState::VertexInput::Hash() const noexcept {
+    std::size_t hash = num_bindings ^ (num_attributes << 32);
+    for (std::size_t i = 0; i < num_bindings; ++i) {
+        boost::hash_combine(hash, bindings[i].Hash());
+    }
+    for (std::size_t i = 0; i < num_attributes; ++i) {
+        boost::hash_combine(hash, attributes[i].Hash());
+    }
+    return hash;
+}
+
+bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept {
+    return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(),
+                      rhs.bindings.begin() + rhs.num_bindings) &&
+           std::equal(attributes.begin(), attributes.begin() + num_attributes,
+                      rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes);
+}
+
+std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept {
+    std::size_t point_size_int = 0;
+    std::memcpy(&point_size_int, &point_size, sizeof(point_size));
+    return (static_cast<std::size_t>(topology) << 24) ^ (point_size_int << 32) ^
+           static_cast<std::size_t>(primitive_restart_enable);
+}
+
+bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept {
+    return std::tie(topology, primitive_restart_enable, point_size) ==
+           std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size);
+}
+
+std::size_t FixedPipelineState::Tessellation::Hash() const noexcept {
+    return static_cast<std::size_t>(patch_control_points) ^
+           (static_cast<std::size_t>(primitive) << 6) ^ (static_cast<std::size_t>(spacing) << 8) ^
+           (static_cast<std::size_t>(clockwise) << 10);
+}
+
+bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept {
+    return std::tie(patch_control_points, primitive, spacing, clockwise) ==
+           std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise);
+}
+
+std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
+    return static_cast<std::size_t>(cull_enable) ^
+           (static_cast<std::size_t>(depth_bias_enable) << 1) ^
+           (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
+           (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
+           (static_cast<std::size_t>(cull_face) << 24) ^
+           (static_cast<std::size_t>(front_face) << 48);
+}
+
+bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
+    return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
+                    cull_face, front_face) ==
+           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
+                    rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
+}
+
+std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
+    std::size_t hash = static_cast<std::size_t>(depth_test_enable) ^
+                       (static_cast<std::size_t>(depth_write_enable) << 1) ^
+                       (static_cast<std::size_t>(depth_bounds_enable) << 2) ^
+                       (static_cast<std::size_t>(stencil_enable) << 3) ^
+                       (static_cast<std::size_t>(depth_test_function) << 4);
+    boost::hash_combine(hash, front_stencil.Hash());
+    boost::hash_combine(hash, back_stencil.Hash());
+    return hash;
+}
+
+bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept {
+    return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function,
+                    stencil_enable, front_stencil, back_stencil) ==
+           std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable,
+                    rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil,
+                    rhs.back_stencil);
+}
+
+std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept {
+    std::size_t hash = attachments_count << 13;
+    for (std::size_t rt = 0; rt < static_cast<std::size_t>(attachments_count); ++rt) {
+        boost::hash_combine(hash, attachments[rt].Hash());
+    }
+    return hash;
+}
+
+bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept {
+    return std::equal(attachments.begin(), attachments.begin() + attachments_count,
+                      rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count);
+}
+
+std::size_t FixedPipelineState::Hash() const noexcept {
+    std::size_t hash = 0;
+    boost::hash_combine(hash, vertex_input.Hash());
+    boost::hash_combine(hash, input_assembly.Hash());
+    boost::hash_combine(hash, tessellation.Hash());
+    boost::hash_combine(hash, rasterizer.Hash());
+    boost::hash_combine(hash, depth_stencil.Hash());
+    boost::hash_combine(hash, color_blending.Hash());
+    return hash;
+}
+
+bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
+    return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil,
+                    color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly,
+                                                rhs.tessellation, rhs.rasterizer, rhs.depth_stencil,
+                                                rhs.color_blending);
+}
+
+FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
+    FixedPipelineState fixed_state;
+    fixed_state.input_assembly = GetInputAssemblyState(regs);
+    fixed_state.tessellation = GetTessellationState(regs);
+    fixed_state.rasterizer = GetRasterizerState(regs);
+    fixed_state.depth_stencil = GetDepthStencilState(regs);
+    fixed_state.color_blending = GetColorBlendingState(regs);
+    return fixed_state;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -0,0 +1,284 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+
+#include "common/common_types.h"
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Optimize this structure.
+
+struct FixedPipelineState {
+    using PixelFormat = VideoCore::Surface::PixelFormat;
+
+    struct VertexBinding {
+        constexpr VertexBinding(u32 index, u32 stride, u32 divisor)
+            : index{index}, stride{stride}, divisor{divisor} {}
+        VertexBinding() = default;
+
+        u32 index;
+        u32 stride;
+        u32 divisor;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexBinding& rhs) const noexcept;
+
+        bool operator!=(const VertexBinding& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct VertexAttribute {
+        constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type,
+                                  Maxwell::VertexAttribute::Size size, u32 offset)
+            : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {}
+        VertexAttribute() = default;
+
+        u32 index;
+        u32 buffer;
+        Maxwell::VertexAttribute::Type type;
+        Maxwell::VertexAttribute::Size size;
+        u32 offset;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexAttribute& rhs) const noexcept;
+
+        bool operator!=(const VertexAttribute& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct StencilFace {
+        constexpr StencilFace(Maxwell::StencilOp action_stencil_fail,
+                              Maxwell::StencilOp action_depth_fail,
+                              Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func)
+            : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail},
+              action_depth_pass{action_depth_pass}, test_func{test_func} {}
+        StencilFace() = default;
+
+        Maxwell::StencilOp action_stencil_fail;
+        Maxwell::StencilOp action_depth_fail;
+        Maxwell::StencilOp action_depth_pass;
+        Maxwell::ComparisonOp test_func;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const StencilFace& rhs) const noexcept;
+
+        bool operator!=(const StencilFace& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct BlendingAttachment {
+        constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation,
+                                     Maxwell::Blend::Factor src_rgb_func,
+                                     Maxwell::Blend::Factor dst_rgb_func,
+                                     Maxwell::Blend::Equation a_equation,
+                                     Maxwell::Blend::Factor src_a_func,
+                                     Maxwell::Blend::Factor dst_a_func,
+                                     std::array<bool, 4> components)
+            : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func},
+              dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func},
+              dst_a_func{dst_a_func}, components{components} {}
+        BlendingAttachment() = default;
+
+        bool enable;
+        Maxwell::Blend::Equation rgb_equation;
+        Maxwell::Blend::Factor src_rgb_func;
+        Maxwell::Blend::Factor dst_rgb_func;
+        Maxwell::Blend::Equation a_equation;
+        Maxwell::Blend::Factor src_a_func;
+        Maxwell::Blend::Factor dst_a_func;
+        std::array<bool, 4> components;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const BlendingAttachment& rhs) const noexcept;
+
+        bool operator!=(const BlendingAttachment& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct VertexInput {
+        std::size_t num_bindings = 0;
+        std::size_t num_attributes = 0;
+        std::array<VertexBinding, Maxwell::NumVertexArrays> bindings;
+        std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const VertexInput& rhs) const noexcept;
+
+        bool operator!=(const VertexInput& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct InputAssembly {
+        constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable,
+                                float point_size)
+            : topology{topology}, primitive_restart_enable{primitive_restart_enable},
+              point_size{point_size} {}
+        InputAssembly() = default;
+
+        Maxwell::PrimitiveTopology topology;
+        bool primitive_restart_enable;
+        float point_size;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const InputAssembly& rhs) const noexcept;
+
+        bool operator!=(const InputAssembly& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct Tessellation {
+        constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive,
+                               Maxwell::TessellationSpacing spacing, bool clockwise)
+            : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing},
+              clockwise{clockwise} {}
+        Tessellation() = default;
+
+        u32 patch_control_points;
+        Maxwell::TessellationPrimitive primitive;
+        Maxwell::TessellationSpacing spacing;
+        bool clockwise;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const Tessellation& rhs) const noexcept;
+
+        bool operator!=(const Tessellation& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct Rasterizer {
+        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
+                             bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
+                             Maxwell::Cull::FrontFace front_face)
+            : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
+              depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
+              cull_face{cull_face}, front_face{front_face} {}
+        Rasterizer() = default;
+
+        bool cull_enable;
+        bool depth_bias_enable;
+        bool depth_clamp_enable;
+        bool ndc_minus_one_to_one;
+        Maxwell::Cull::CullFace cull_face;
+        Maxwell::Cull::FrontFace front_face;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const Rasterizer& rhs) const noexcept;
+
+        bool operator!=(const Rasterizer& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct DepthStencil {
+        constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable,
+                               bool depth_bounds_enable, bool stencil_enable,
+                               Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil,
+                               StencilFace back_stencil)
+            : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable},
+              depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable},
+              depth_test_function{depth_test_function}, front_stencil{front_stencil},
+              back_stencil{back_stencil} {}
+        DepthStencil() = default;
+
+        bool depth_test_enable;
+        bool depth_write_enable;
+        bool depth_bounds_enable;
+        bool stencil_enable;
+        Maxwell::ComparisonOp depth_test_function;
+        StencilFace front_stencil;
+        StencilFace back_stencil;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const DepthStencil& rhs) const noexcept;
+
+        bool operator!=(const DepthStencil& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    struct ColorBlending {
+        constexpr ColorBlending(
+            std::array<float, 4> blend_constants, std::size_t attachments_count,
+            std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments)
+            : attachments_count{attachments_count}, attachments{attachments} {}
+        ColorBlending() = default;
+
+        std::size_t attachments_count;
+        std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
+
+        std::size_t Hash() const noexcept;
+
+        bool operator==(const ColorBlending& rhs) const noexcept;
+
+        bool operator!=(const ColorBlending& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+    };
+
+    std::size_t Hash() const noexcept;
+
+    bool operator==(const FixedPipelineState& rhs) const noexcept;
+
+    bool operator!=(const FixedPipelineState& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    VertexInput vertex_input;
+    InputAssembly input_assembly;
+    Tessellation tessellation;
+    Rasterizer rasterizer;
+    DepthStencil depth_stencil;
+    ColorBlending color_blending;
+};
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexBinding>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexAttribute>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::StencilFace>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::BlendingAttachment>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::VertexInput>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::InputAssembly>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::Tessellation>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::Rasterizer>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::DepthStencil>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState::ColorBlending>);
+static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
+
+FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::FixedPipelineState> {
+    std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,8 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
    return {};
 }

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter) {
    switch (wrap_mode) {
    case Tegra::Texture::WrapMode::Wrap:
        return vk::SamplerAddressMode::eRepeat;
@@ -55,10 +56,20 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    case Tegra::Texture::WrapMode::Border:
        return vk::SamplerAddressMode::eClampToBorder;
    case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
-        // eClampToBorder to get the border color of the texture, and then sample the edge to
-        // manually mix them. However the shader part of this is not yet implemented.
-        return vk::SamplerAddressMode::eClampToBorder;
+        if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
+            // by sending an invalid enumeration.
+            return static_cast<vk::SamplerAddressMode>(0xcafe);
+        }
+        // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
+        switch (filter) {
+        case Tegra::Texture::TextureFilter::Nearest:
+            return vk::SamplerAddressMode::eClampToEdge;
+        case Tegra::Texture::TextureFilter::Linear:
+            return vk::SamplerAddressMode::eClampToBorder;
+        }
+        UNREACHABLE();
+        return vk::SamplerAddressMode::eClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
        return vk::SamplerAddressMode::eMirrorClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceBorder:
@@ -96,106 +107,140 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar

 } // namespace Sampler

+namespace {
+
+enum : u32 { Attachable = 1, Storage = 2 };
+
 struct FormatTuple {
    vk::Format format; ///< Vulkan format
-    bool attachable;   ///< True when this format can be used as an attachment
-};
-
-static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {vk::Format::eA8B8G8R8UnormPack32, true},    // ABGR8U
-    {vk::Format::eUndefined, false},             // ABGR8S
-    {vk::Format::eUndefined, false},             // ABGR8UI
-    {vk::Format::eB5G6R5UnormPack16, false},     // B5G6R5U
-    {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
-    {vk::Format::eUndefined, false},             // A1B5G5R5U
-    {vk::Format::eR8Unorm, true},                // R8U
-    {vk::Format::eUndefined, false},             // R8UI
-    {vk::Format::eUndefined, false},             // RGBA16F
-    {vk::Format::eUndefined, false},             // RGBA16U
-    {vk::Format::eUndefined, false},             // RGBA16UI
-    {vk::Format::eUndefined, false},             // R11FG11FB10F
-    {vk::Format::eUndefined, false},             // RGBA32UI
-    {vk::Format::eBc1RgbaUnormBlock, false},     // DXT1
-    {vk::Format::eBc2UnormBlock, false},         // DXT23
-    {vk::Format::eBc3UnormBlock, false},         // DXT45
-    {vk::Format::eBc4UnormBlock, false},         // DXN1
-    {vk::Format::eUndefined, false},             // DXN2UNORM
-    {vk::Format::eUndefined, false},             // DXN2SNORM
-    {vk::Format::eUndefined, false},             // BC7U
-    {vk::Format::eUndefined, false},             // BC6H_UF16
-    {vk::Format::eUndefined, false},             // BC6H_SF16
-    {vk::Format::eUndefined, false},             // ASTC_2D_4X4
-    {vk::Format::eUndefined, false},             // BGRA8
-    {vk::Format::eUndefined, false},             // RGBA32F
-    {vk::Format::eUndefined, false},             // RG32F
-    {vk::Format::eUndefined, false},             // R32F
-    {vk::Format::eUndefined, false},             // R16F
-    {vk::Format::eUndefined, false},             // R16U
-    {vk::Format::eUndefined, false},             // R16S
-    {vk::Format::eUndefined, false},             // R16UI
-    {vk::Format::eUndefined, false},             // R16I
-    {vk::Format::eUndefined, false},             // RG16
-    {vk::Format::eUndefined, false},             // RG16F
-    {vk::Format::eUndefined, false},             // RG16UI
-    {vk::Format::eUndefined, false},             // RG16I
-    {vk::Format::eUndefined, false},             // RG16S
-    {vk::Format::eUndefined, false},             // RGB32F
-    {vk::Format::eA8B8G8R8SrgbPack32, true},     // RGBA8_SRGB
-    {vk::Format::eUndefined, false},             // RG8U
-    {vk::Format::eUndefined, false},             // RG8S
-    {vk::Format::eUndefined, false},             // RG32UI
-    {vk::Format::eUndefined, false},             // RGBX16F
-    {vk::Format::eUndefined, false},             // R32UI
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X8
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X5
-    {vk::Format::eUndefined, false},             // ASTC_2D_5X4
-
-    // Compressed sRGB formats
-    {vk::Format::eUndefined, false}, // BGRA8_SRGB
-    {vk::Format::eUndefined, false}, // DXT1_SRGB
-    {vk::Format::eUndefined, false}, // DXT23_SRGB
-    {vk::Format::eUndefined, false}, // DXT45_SRGB
-    {vk::Format::eUndefined, false}, // BC7U_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
+    int usage;         ///< Describes image format usage
+} constexpr tex_format_tuples[] = {
+    {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage},    // ABGR8U
+    {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage},    // ABGR8S
+    {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage},     // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, {}},                        // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
+    {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage},    // A1B5G5R5U (flipped with swizzle)
+    {vk::Format::eR8Unorm, Attachable | Storage},                // R8U
+    {vk::Format::eR8Uint, Attachable | Storage},                 // R8UI
+    {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage},     // RGBA16F
+    {vk::Format::eR16G16B16A16Unorm, Attachable | Storage},      // RGBA16U
+    {vk::Format::eR16G16B16A16Uint, Attachable | Storage},       // RGBA16UI
+    {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage},  // R11FG11FB10F
+    {vk::Format::eR32G32B32A32Uint, Attachable | Storage},       // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, {}},                        // DXT1
+    {vk::Format::eBc2UnormBlock, {}},                            // DXT23
+    {vk::Format::eBc3UnormBlock, {}},                            // DXT45
+    {vk::Format::eBc4UnormBlock, {}},                            // DXN1
+    {vk::Format::eBc5UnormBlock, {}},                            // DXN2UNORM
+    {vk::Format::eBc5SnormBlock, {}},                            // DXN2SNORM
+    {vk::Format::eBc7UnormBlock, {}},                            // BC7U
+    {vk::Format::eBc6HUfloatBlock, {}},                          // BC6H_UF16
+    {vk::Format::eBc6HSfloatBlock, {}},                          // BC6H_SF16
+    {vk::Format::eAstc4x4UnormBlock, {}},                        // ASTC_2D_4X4
+    {vk::Format::eB8G8R8A8Unorm, {}},                            // BGRA8
+    {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage},     // RGBA32F
+    {vk::Format::eR32G32Sfloat, Attachable | Storage},           // RG32F
+    {vk::Format::eR32Sfloat, Attachable | Storage},              // R32F
+    {vk::Format::eR16Sfloat, Attachable | Storage},              // R16F
+    {vk::Format::eR16Unorm, Attachable | Storage},               // R16U
+    {vk::Format::eUndefined, {}},                                // R16S
+    {vk::Format::eUndefined, {}},                                // R16UI
+    {vk::Format::eUndefined, {}},                                // R16I
+    {vk::Format::eR16G16Unorm, Attachable | Storage},            // RG16
+    {vk::Format::eR16G16Sfloat, Attachable | Storage},           // RG16F
+    {vk::Format::eUndefined, {}},                                // RG16UI
+    {vk::Format::eUndefined, {}},                                // RG16I
+    {vk::Format::eR16G16Snorm, Attachable | Storage},            // RG16S
+    {vk::Format::eUndefined, {}},                                // RGB32F
+    {vk::Format::eR8G8B8A8Srgb, Attachable},                     // RGBA8_SRGB
+    {vk::Format::eR8G8Unorm, Attachable | Storage},              // RG8U
+    {vk::Format::eR8G8Snorm, Attachable | Storage},              // RG8S
+    {vk::Format::eR32G32Uint, Attachable | Storage},             // RG32UI
+    {vk::Format::eUndefined, {}},                                // RGBX16F
+    {vk::Format::eR32Uint, Attachable | Storage},                // R32UI
+    {vk::Format::eAstc8x8UnormBlock, {}},                        // ASTC_2D_8X8
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_8X5
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_5X4
+    {vk::Format::eUndefined, {}},                                // BGRA8_SRGB
+    {vk::Format::eBc1RgbaSrgbBlock, {}},                         // DXT1_SRGB
+    {vk::Format::eUndefined, {}},                                // DXT23_SRGB
+    {vk::Format::eBc3SrgbBlock, {}},                             // DXT45_SRGB
+    {vk::Format::eBc7SrgbBlock, {}},                             // BC7U_SRGB
+    {vk::Format::eR4G4B4A4UnormPack16, Attachable},              // R4G4B4A4U
+    {vk::Format::eAstc4x4SrgbBlock, {}},                         // ASTC_2D_4X4_SRGB
+    {vk::Format::eAstc8x8SrgbBlock, {}},                         // ASTC_2D_8X8_SRGB
+    {vk::Format::eAstc8x5SrgbBlock, {}},                         // ASTC_2D_8X5_SRGB
+    {vk::Format::eAstc5x4SrgbBlock, {}},                         // ASTC_2D_5X4_SRGB
+    {vk::Format::eAstc5x5UnormBlock, {}},                        // ASTC_2D_5X5
+    {vk::Format::eAstc5x5SrgbBlock, {}},                         // ASTC_2D_5X5_SRGB
+    {vk::Format::eAstc10x8UnormBlock, {}},                       // ASTC_2D_10X8
+    {vk::Format::eAstc10x8SrgbBlock, {}},                        // ASTC_2D_10X8_SRGB
+    {vk::Format::eAstc6x6UnormBlock, {}},                        // ASTC_2D_6X6
+    {vk::Format::eAstc6x6SrgbBlock, {}},                         // ASTC_2D_6X6_SRGB
+    {vk::Format::eAstc10x10UnormBlock, {}},                      // ASTC_2D_10X10
+    {vk::Format::eAstc10x10SrgbBlock, {}},                       // ASTC_2D_10X10_SRGB
+    {vk::Format::eAstc12x12UnormBlock, {}},                      // ASTC_2D_12X12
+    {vk::Format::eAstc12x12SrgbBlock, {}},                       // ASTC_2D_12X12_SRGB
+    {vk::Format::eAstc8x6UnormBlock, {}},                        // ASTC_2D_8X6
+    {vk::Format::eAstc8x6SrgbBlock, {}},                         // ASTC_2D_8X6_SRGB
+    {vk::Format::eAstc6x5UnormBlock, {}},                        // ASTC_2D_6X5
+    {vk::Format::eAstc6x5SrgbBlock, {}},                         // ASTC_2D_6X5_SRGB
+    {vk::Format::eE5B9G9R9UfloatPack32, {}},                     // E5B9G9R9F

    // Depth formats
-    {vk::Format::eD32Sfloat, true}, // Z32F
-    {vk::Format::eD16Unorm, true},  // Z16
+    {vk::Format::eD32Sfloat, Attachable}, // Z32F
+    {vk::Format::eD16Unorm, Attachable},  // Z16

    // DepthStencil formats
-    {vk::Format::eD24UnormS8Uint, true}, // Z24S8
-    {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
-    {vk::Format::eUndefined, false},     // Z32FS8
-}};
+    {vk::Format::eD24UnormS8Uint, Attachable},  // Z24S8
+    {vk::Format::eD24UnormS8Uint, Attachable},  // S8Z24 (emulated)
+    {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8
+};
+static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);

-static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+constexpr bool IsZetaFormat(PixelFormat pixel_format) {
    return pixel_format >= PixelFormat::MaxColorFormat &&
           pixel_format < PixelFormat::MaxDepthStencilFormat;
 }

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format) {
-    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+} // Anonymous namespace

-    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
-    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
-                         "Unimplemented texture format with pixel format={}",
-                         static_cast<u32>(pixel_format));
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));

-    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
-                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
-    if (tuple.attachable) {
-        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
-                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+    if (tuple.format == vk::Format::eUndefined) {
+        UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
+                          static_cast<u32>(pixel_format));
+        return {vk::Format::eA8B8G8R8UnormPack32, true, true};
    }
-    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+
+    // Use ABGR8 on hardware that doesn't support ASTC natively
+    if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
+        tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
+                           ? vk::Format::eA8B8G8R8SrgbPack32
+                           : vk::Format::eA8B8G8R8UnormPack32;
+    }
+    const bool attachable = tuple.usage & Attachable;
+    const bool storage = tuple.usage & Storage;
+
+    vk::FormatFeatureFlags usage;
+    if (format_type == FormatType::Buffer) {
+        usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer |
+                vk::FormatFeatureFlagBits::eUniformTexelBuffer;
+    } else {
+        usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst |
+                vk::FormatFeatureFlagBits::eTransferSrc;
+        if (attachable) {
+            usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                                : vk::FormatFeatureFlagBits::eColorAttachment;
+        }
+        if (storage) {
+            usage |= vk::FormatFeatureFlagBits::eStorageImage;
+        }
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
 }

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
@@ -215,7 +260,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
    return {};
 }

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology) {
    switch (topology) {
    case Maxwell::PrimitiveTopology::Points:
        return vk::PrimitiveTopology::ePointList;
@@ -227,6 +273,13 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return vk::PrimitiveTopology::eTriangleList;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return vk::PrimitiveTopology::eTriangleStrip;
+    case Maxwell::PrimitiveTopology::TriangleFan:
+        return vk::PrimitiveTopology::eTriangleFan;
+    case Maxwell::PrimitiveTopology::Quads:
+        // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::Patches:
+        return vk::PrimitiveTopology::ePatchList;
    default:
        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
        return {};
@@ -236,37 +289,111 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
    switch (type) {
    case Maxwell::VertexAttribute::Type::SignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+            return vk::Format::eA2B10G10R10SnormPack32;
+        default:
+            break;
+        }
        break;
    case Maxwell::VertexAttribute::Type::UnsignedNorm:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Unorm;
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return vk::Format::eR8G8B8A8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Unorm;
        default:
            break;
        }
        break;
    case Maxwell::VertexAttribute::Type::SignedInt:
-        break;
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sint;
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Sint;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sint;
+        default:
+            break;
+        }
    case Maxwell::VertexAttribute::Type::UnsignedInt:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Uint;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Uint;
        default:
            break;
        }
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uscaled;
+        default:
+            break;
+        }
+        break;
    case Maxwell::VertexAttribute::Type::SignedScaled:
        break;
    case Maxwell::VertexAttribute::Type::Float:
        switch (size) {
-        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
-            return vk::Format::eR32G32B32A32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32_32:
-            return vk::Format::eR32G32B32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32:
-            return vk::Format::eR32G32Sfloat;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sfloat;
        default:
            break;
        }
@@ -308,11 +435,14 @@ vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
    return {};
 }

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
    switch (index_format) {
    case Maxwell::IndexFormat::UnsignedByte:
-        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
-        return vk::IndexType::eUint16;
+        if (!device.IsExtIndexTypeUint8Supported()) {
+            UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
+            return vk::IndexType::eUint16;
+        }
+        return vk::IndexType::eUint8EXT;
    case Maxwell::IndexFormat::UnsignedShort:
        return vk::IndexType::eUint16;
    case Maxwell::IndexFormat::UnsignedInt:
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <utility>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/declarations.h"
@@ -23,24 +22,31 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);

 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter);

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);

 } // namespace Sampler

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format);
+struct FormatInfo {
+    vk::Format format;
+    bool attachable;
+    bool storage;
+};
+
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology);

 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);

 vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);

 vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);

--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -0,0 +1,72 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <vector>
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class VKBlitScreen;
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKResourceManager;
+class VKSwapchain;
+class VKScheduler;
+class VKImage;
+
+struct VKScreenInfo {
+    VKImage* image{};
+    u32 width{};
+    u32 height{};
+    bool is_srgb{};
+};
+
+class RendererVulkan final : public VideoCore::RendererBase {
+public:
+    explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+    ~RendererVulkan() override;
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+
+    /// Initialize the renderer
+    bool Init() override;
+
+    /// Shutdown the renderer
+    void ShutDown() override;
+
+private:
+    std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
+        const vk::DispatchLoaderDynamic& dldi);
+
+    bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
+
+    void Report() const;
+
+    Core::System& system;
+
+    vk::Instance instance;
+    vk::SurfaceKHR surface;
+
+    VKScreenInfo screen_info;
+
+    UniqueDebugUtilsMessengerEXT debug_callback;
+    std::unique_ptr<VKDevice> device;
+    std::unique_ptr<VKSwapchain> swapchain;
+    std::unique_ptr<VKMemoryManager> memory_manager;
+    std::unique_ptr<VKResourceManager> resource_manager;
+    std::unique_ptr<VKScheduler> scheduler;
+    std::unique_ptr<VKBlitScreen> blit_screen;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/renderer_vulkan/shaders/blit.frag
@@ -0,0 +1,24 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+
+layout (location = 0) out vec4 color;
+
+layout (binding = 1) uniform sampler2D color_texture;
+
+void main() {
+    color = texture(color_texture, frag_tex_coord);
+}
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/renderer_vulkan/shaders/blit.vert
@@ -0,0 +1,28 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+
+layout (location = 0) out vec2 frag_tex_coord;
+
+layout (set = 0, binding = 0) uniform MatrixBlock {
+    mat4 modelview_matrix;
+};
+
+void main() {
+    gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
+    frag_tex_coord = vert_tex_coord;
+}
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ b/src/video_core/renderer_vulkan/shaders/quad_array.comp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) buffer OutputBuffer {
+    uint output_indexes[];
+};
+
+layout (push_constant) uniform PushConstants {
+    uint first;
+};
+
+void main() {
+    uint primitive = gl_GlobalInvocationID.x;
+    if (primitive * 6 >= output_indexes.length()) {
+        return;
+    }
+
+    const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
+    for (uint vertex = 0; vertex < 6; ++vertex) {
+        uint index = first + primitive * 4 + quad_map[vertex];
+        output_indexes[primitive * 6 + vertex] = index;
+    }
+}
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/renderer_vulkan/shaders/uint8.comp
@@ -0,0 +1,33 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+/*
+ * Build instructions:
+ * $ glslangValidator -V $THIS_FILE -o output.spv
+ * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
+ * $ xxd -i optimized.spv
+ *
+ * Then copy that bytecode to the C++ file
+ */
+
+#version 460 core
+#extension GL_EXT_shader_16bit_storage : require
+#extension GL_EXT_shader_8bit_storage : require
+
+layout (local_size_x = 1024) in;
+
+layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
+    uint8_t input_indexes[];
+};
+
+layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
+    uint16_t output_indexes[];
+};
+
+void main() {
+    uint id = gl_GlobalInvocationID.x;
+    if (id < input_indexes.length()) {
+        output_indexes[id] = uint16_t(input_indexes[id]);
+    }
+}
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -0,0 +1,627 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/math_util.h"
+
+#include "core/core.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+
+#include "video_core/gpu.h"
+#include "video_core/morton.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+namespace {
+
+// Generated from the "shaders/" directory, read the instructions there.
+constexpr u8 blit_vertex_code[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
+    0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
+    0x38, 0x00, 0x01, 0x00};
+
+constexpr u8 blit_fragment_code[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+struct ScreenRectVertex {
+    ScreenRectVertex() = default;
+    explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
+
+    std::array<f32, 2> position;
+    std::array<f32, 2> tex_coord;
+
+    static vk::VertexInputBindingDescription GetDescription() {
+        return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex),
+                                                 vk::VertexInputRate::eVertex);
+    }
+
+    static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() {
+        return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat,
+                                                    offsetof(ScreenRectVertex, position)),
+                vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat,
+                                                    offsetof(ScreenRectVertex, tex_coord))};
+    }
+};
+
+constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
+    // clang-format off
+    return { 2.f / width, 0.f,          0.f, 0.f,
+             0.f,         2.f / height, 0.f, 0.f,
+             0.f,         0.f,          1.f, 0.f,
+            -1.f,        -1.f,          0.f, 1.f};
+    // clang-format on
+}
+
+std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
+    using namespace VideoCore::Surface;
+    return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
+}
+
+std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
+    return static_cast<std::size_t>(framebuffer.stride) *
+           static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
+}
+
+vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) {
+    switch (framebuffer.pixel_format) {
+    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+        return vk::Format::eA8B8G8R8UnormPack32;
+    case Tegra::FramebufferConfig::PixelFormat::RGB565:
+        return vk::Format::eR5G6B5UnormPack16;
+    default:
+        UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+                          static_cast<u32>(framebuffer.pixel_format));
+        return vk::Format::eA8B8G8R8UnormPack32;
+    }
+}
+
+} // Anonymous namespace
+
+struct VKBlitScreen::BufferData {
+    struct {
+        std::array<f32, 4 * 4> modelview_matrix;
+    } uniform;
+
+    std::array<ScreenRectVertex, 4> vertices;
+
+    // Unaligned image data goes here
+};
+
+VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                           VKSwapchain& swapchain, VKScheduler& scheduler,
+                           const VKScreenInfo& screen_info)
+    : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
+      scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
+    watches.resize(image_count);
+    std::generate(watches.begin(), watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+
+    CreateStaticResources();
+    CreateDynamicResources();
+}
+
+VKBlitScreen::~VKBlitScreen() = default;
+
+void VKBlitScreen::Recreate() {
+    CreateDynamicResources();
+}
+
+std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
+                                                       bool use_accelerated) {
+    RefreshResources(framebuffer);
+
+    // Finish any pending renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    const std::size_t image_index = swapchain.GetImageIndex();
+    watches[image_index]->Watch(scheduler.GetFence());
+
+    VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
+
+    UpdateDescriptorSet(image_index, blit_image->GetPresentView());
+
+    BufferData data;
+    SetUniformData(data, framebuffer);
+    SetVertexData(data, framebuffer);
+
+    auto map = buffer_commit->Map();
+    std::memcpy(map.GetAddress(), &data, sizeof(data));
+
+    if (!use_accelerated) {
+        const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
+
+        const auto pixel_format =
+            VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+        const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
+        const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+        rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
+
+        // TODO(Rodrigo): Read this from HLE
+        constexpr u32 block_height_log2 = 4;
+        VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
+                                 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
+                                 map.GetAddress() + image_offset, host_ptr);
+
+        blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer,
+                               vk::AccessFlagBits::eTransferWrite,
+                               vk::ImageLayout::eTransferDstOptimal);
+
+        const vk::BufferImageCopy copy(image_offset, 0, 0,
+                                       {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0},
+                                       {framebuffer.width, framebuffer.height, 1});
+        scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(),
+                          copy](auto cmdbuf, auto& dld) {
+            cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal,
+                                     {copy}, dld);
+        });
+    }
+    map.Release();
+
+    blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader,
+                           vk::AccessFlagBits::eShaderRead,
+                           vk::ImageLayout::eShaderReadOnlyOptimal);
+
+    scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
+                      descriptor_set = descriptor_sets[image_index], buffer = *buffer,
+                      size = swapchain.GetSize(), pipeline = *pipeline,
+                      layout = *pipeline_layout](auto cmdbuf, auto& dld) {
+        const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}};
+        const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1,
+                                                    &clear_color);
+
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+        cmdbuf.setViewport(
+            0,
+            {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}},
+            dld);
+        cmdbuf.setScissor(0, {{{0, 0}, size}}, dld);
+
+        cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {},
+                                  dld);
+        cmdbuf.draw(4, 1, 0, 0, dld);
+        cmdbuf.endRenderPass(dld);
+    });
+
+    return {scheduler.GetFence(), *semaphores[image_index]};
+}
+
+void VKBlitScreen::CreateStaticResources() {
+    CreateShaders();
+    CreateSemaphores();
+    CreateDescriptorPool();
+    CreateDescriptorSetLayout();
+    CreateDescriptorSets();
+    CreatePipelineLayout();
+    CreateSampler();
+}
+
+void VKBlitScreen::CreateDynamicResources() {
+    CreateRenderPass();
+    CreateFramebuffers();
+    CreateGraphicsPipeline();
+}
+
+void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
+    if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
+        return;
+    }
+    raw_width = framebuffer.width;
+    raw_height = framebuffer.height;
+    ReleaseRawImages();
+
+    CreateStagingBuffer(framebuffer);
+    CreateRawImages(framebuffer);
+}
+
+void VKBlitScreen::CreateShaders() {
+    vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
+    fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
+}
+
+void VKBlitScreen::CreateSemaphores() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    semaphores.resize(image_count);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+    }
+}
+
+void VKBlitScreen::CreateDescriptorPool() {
+    const std::array<vk::DescriptorPoolSize, 2> pool_sizes{
+        vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)},
+        vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler,
+                               static_cast<u32>(image_count)}};
+    const vk::DescriptorPoolCreateInfo pool_ci(
+        {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data());
+    const auto dev = device.GetLogical();
+    descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader());
+}
+
+void VKBlitScreen::CreateRenderPass() {
+    const vk::AttachmentDescription color_attachment(
+        {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear,
+        vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+        vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined,
+        vk::ImageLayout::ePresentSrcKHR);
+
+    const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal);
+
+    const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0,
+                                                     nullptr, 1, &color_attachment_ref, nullptr,
+                                                     nullptr, 0, nullptr);
+
+    const vk::SubpassDependency dependency(
+        VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput,
+        vk::PipelineStageFlagBits::eColorAttachmentOutput, {},
+        vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {});
+
+    const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description,
+                                                 1, &dependency);
+
+    const auto dev = device.GetLogical();
+    renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader());
+}
+
+void VKBlitScreen::CreateDescriptorSetLayout() {
+    const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{
+        vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1,
+                                       vk::ShaderStageFlagBits::eVertex, nullptr),
+        vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1,
+                                       vk::ShaderStageFlagBits::eFragment, nullptr)};
+    const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
+        {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateDescriptorSets() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    descriptor_sets.resize(image_count);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::DescriptorSetLayout layout = *descriptor_set_layout;
+        const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout);
+        const vk::Result result =
+            dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld);
+        ASSERT(result == vk::Result::eSuccess);
+    }
+}
+
+void VKBlitScreen::CreatePipelineLayout() {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateGraphicsPipeline() {
+    const std::array shader_stages = {
+        vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader,
+                                          "main", nullptr),
+        vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader,
+                                          "main", nullptr)};
+
+    const auto vertex_binding_description = ScreenRectVertex::GetDescription();
+    const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
+    const vk::PipelineVertexInputStateCreateInfo vertex_input(
+        {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()),
+        vertex_attrs_description.data());
+
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly(
+        {}, vk::PrimitiveTopology::eTriangleStrip, false);
+
+    // Set a dummy viewport, it's going to be replaced by dynamic states.
+    const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
+    const vk::Rect2D scissor({0, 0}, {1, 1});
+
+    const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor);
+
+    const vk::PipelineRasterizationStateCreateInfo rasterizer(
+        {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone,
+        vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1,
+                                                               false, 0.0f, nullptr, false, false);
+
+    const vk::PipelineColorBlendAttachmentState color_blend_attachment(
+        false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
+        vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
+        vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
+            vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
+
+    const vk::PipelineColorBlendStateCreateInfo color_blending(
+        {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f});
+
+    const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport,
+                                                            vk::DynamicState::eScissor};
+
+    const vk::PipelineDynamicStateCreateInfo dynamic_state(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    const vk::GraphicsPipelineCreateInfo pipeline_ci(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input,
+        &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr,
+        &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateSampler() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::SamplerCreateInfo sampler_ci(
+        {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear,
+        vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder,
+        vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever,
+        0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false);
+    sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateFramebuffers() {
+    const vk::Extent2D size{swapchain.GetSize()};
+    framebuffers.clear();
+    framebuffers.resize(image_count);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::ImageView image_view{swapchain.GetImageViewIndex(i)};
+        const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width,
+                                                       size.height, 1);
+        framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
+    }
+}
+
+void VKBlitScreen::ReleaseRawImages() {
+    for (std::size_t i = 0; i < raw_images.size(); ++i) {
+        watches[i]->Wait();
+    }
+    raw_images.clear();
+    raw_buffer_commits.clear();
+    buffer.reset();
+    buffer_commit.reset();
+}
+
+void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer),
+                                         vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eVertexBuffer |
+                                             vk::BufferUsageFlagBits::eUniformBuffer,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer_commit = memory_manager.Commit(*buffer, true);
+}
+
+void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
+    raw_images.resize(image_count);
+    raw_buffer_commits.resize(image_count);
+
+    const auto format = GetFormat(framebuffer);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::ImageCreateInfo image_ci(
+            {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1,
+            vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal,
+            vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled,
+            vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined);
+
+        raw_images[i] =
+            std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor);
+        raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
+    }
+}
+
+void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const {
+    const vk::DescriptorSet descriptor_set = descriptor_sets[image_index];
+
+    const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform),
+                                               sizeof(BufferData::uniform));
+    const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1,
+                                           vk::DescriptorType::eUniformBuffer, nullptr,
+                                           &buffer_info, nullptr);
+
+    const vk::DescriptorImageInfo image_info(*sampler, image_view,
+                                             vk::ImageLayout::eShaderReadOnlyOptimal);
+    const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1,
+                                               vk::DescriptorType::eCombinedImageSampler,
+                                               &image_info, nullptr, nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld);
+}
+
+void VKBlitScreen::SetUniformData(BufferData& data,
+                                  const Tegra::FramebufferConfig& framebuffer) const {
+    const auto& layout = render_window.GetFramebufferLayout();
+    data.uniform.modelview_matrix =
+        MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
+}
+
+void VKBlitScreen::SetVertexData(BufferData& data,
+                                 const Tegra::FramebufferConfig& framebuffer) const {
+    const auto& framebuffer_transform_flags = framebuffer.transform_flags;
+    const auto& framebuffer_crop_rect = framebuffer.crop_rect;
+
+    static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
+    auto left = texcoords.left;
+    auto right = texcoords.right;
+
+    switch (framebuffer_transform_flags) {
+    case Tegra::FramebufferConfig::TransformFlags::Unset:
+        break;
+    case Tegra::FramebufferConfig::TransformFlags::FlipV:
+        // Flip the framebuffer vertically
+        left = texcoords.right;
+        right = texcoords.left;
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
+                          static_cast<u32>(framebuffer_transform_flags));
+        break;
+    }
+
+    UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
+    UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
+
+    // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+    // (e.g. handheld mode) on a 1920x1080 framebuffer.
+    f32 scale_u = 1.0f;
+    f32 scale_v = 1.0f;
+    if (framebuffer_crop_rect.GetWidth() > 0) {
+        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+                  static_cast<f32>(screen_info.width);
+    }
+    if (framebuffer_crop_rect.GetHeight() > 0) {
+        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+                  static_cast<f32>(screen_info.height);
+    }
+
+    const auto& screen = render_window.GetFramebufferLayout().screen;
+    const auto x = static_cast<f32>(screen.left);
+    const auto y = static_cast<f32>(screen.top);
+    const auto w = static_cast<f32>(screen.GetWidth());
+    const auto h = static_cast<f32>(screen.GetHeight());
+    data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v);
+    data.vertices[1] = ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v);
+    data.vertices[2] = ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v);
+    data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
+}
+
+u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
+    return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
+}
+
+u64 VKBlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
+                                    std::size_t image_index) const {
+    constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
+    return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -0,0 +1,119 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <tuple>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Tegra {
+struct FramebufferConfig;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+struct ScreenInfo;
+class RasterizerVulkan;
+class VKDevice;
+class VKFence;
+class VKImage;
+class VKScheduler;
+class VKSwapchain;
+
+class VKBlitScreen final {
+public:
+    explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                          VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                          VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                          VKSwapchain& swapchain, VKScheduler& scheduler,
+                          const VKScreenInfo& screen_info);
+    ~VKBlitScreen();
+
+    void Recreate();
+
+    std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
+                                             bool use_accelerated);
+
+private:
+    struct BufferData;
+
+    void CreateStaticResources();
+    void CreateShaders();
+    void CreateSemaphores();
+    void CreateDescriptorPool();
+    void CreateRenderPass();
+    void CreateDescriptorSetLayout();
+    void CreateDescriptorSets();
+    void CreatePipelineLayout();
+    void CreateGraphicsPipeline();
+    void CreateSampler();
+
+    void CreateDynamicResources();
+    void CreateFramebuffers();
+
+    void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
+    void ReleaseRawImages();
+    void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
+    void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
+
+    void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const;
+    void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
+    void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
+
+    u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
+    u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
+                          std::size_t image_index) const;
+
+    Core::System& system;
+    Core::Frontend::EmuWindow& render_window;
+    VideoCore::RasterizerInterface& rasterizer;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKSwapchain& swapchain;
+    VKScheduler& scheduler;
+    const std::size_t image_count;
+    const VKScreenInfo& screen_info;
+
+    UniqueShaderModule vertex_shader;
+    UniqueShaderModule fragment_shader;
+    UniqueDescriptorPool descriptor_pool;
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    UniquePipelineLayout pipeline_layout;
+    UniquePipeline pipeline;
+    UniqueRenderPass renderpass;
+    std::vector<UniqueFramebuffer> framebuffers;
+    std::vector<vk::DescriptorSet> descriptor_sets;
+    UniqueSampler sampler;
+
+    UniqueBuffer buffer;
+    VKMemoryCommit buffer_commit;
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches;
+
+    std::vector<UniqueSemaphore> semaphores;
+    std::vector<std::unique_ptr<VKImage>> raw_images;
+    std::vector<VKMemoryCommit> raw_buffer_commits;
+    u32 raw_width = 0;
+    u32 raw_height = 0;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>

-#include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "common/bit_util.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"

 namespace Vulkan {

-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
-                                     std::size_t alignment, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
-      alignment{alignment} {}
+namespace {

-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
-                             Memory::Memory& cpu_memory_,
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
-                                                                                   cpu_memory_} {
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
-                       vk::BufferUsageFlagBits::eUniformBuffer;
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
-                        vk::AccessFlagBits::eUniformRead;
-    stream_buffer =
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
-                                         vk::PipelineStageFlagBits::eAllCommands);
-    buffer_handle = stream_buffer->GetBuffer();
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
+
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }

+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
+
 VKBufferCache::~VKBufferCache() = default;

-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    // Cache management is a big overhead, so only cache entries with a given size.
-    // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
-    if (cache) {
-        const auto entry = TryGet(host_ptr);
-        if (entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return entry->GetOffset();
-            }
-            Unregister(entry);
-        }
-    }
-
-    AlignBuffer(alignment);
-    const u64 uploaded_offset = buffer_offset;
-
-    if (host_ptr == nullptr) {
-        return uploaded_offset;
-    }
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
-        Register(entry);
-    }
-
-    return uploaded_offset;
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 }

-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return uploaded_offset;
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
 }

-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    u8* const uploaded_ptr = buffer_ptr;
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return {uploaded_ptr, uploaded_offset};
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
 }

-void VKBufferCache::Reserve(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
-    buffer_offset = buffer_offset_base;
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);

-    if (invalidate) {
-        InvalidateAll();
-    }
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
 }

-void VKBufferCache::Send() {
-    stream_buffer->Send(buffer_offset - buffer_offset_base);
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
 }

-void VKBufferCache::AlignBuffer(std::size_t alignment) {
-    // Align the offset, not the mapped pointer
-    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
-    buffer_ptr += offset_aligned - buffer_offset;
-    buffer_offset = offset_aligned;
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
 #pragma once

 #include <memory>
-#include <tuple>
+#include <unordered_map>
+#include <vector>

 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"

-namespace Memory {
-class Memory;
-}
-
-namespace Tegra {
-class MemoryManager;
+namespace Core {
+class System;
 }

 namespace Vulkan {

 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;

-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
-                               u8* host_ptr);
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();

-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
-    std::size_t GetSizeInBytes() const override {
-        return size;
-    }
-
-    std::size_t GetSize() const {
-        return size;
-    }
-
-    u64 GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetAlignment() const {
-        return alignment;
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
    }

 private:
-    VAddr cpu_addr{};
-    std::size_t size{};
-    u64 offset{};
-    std::size_t alignment{};
+    VKBuffer buffer;
 };

-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
    ~VKBufferCache();

-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
-    /// allocated.
-    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
-
-    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
-    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
-
-    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
-    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
-
-    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
-    void Reserve(std::size_t max_size);
-
-    /// Ensures that the set data is sent to the device.
-    void Send();
-
-    /// Returns the buffer cache handle.
-    vk::Buffer GetBuffer() const {
-        return buffer_handle;
-    }
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;

 protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+    void WriteBarrier() override {}
+
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;

 private:
-    void AlignBuffer(std::size_t alignment);
-
-    Tegra::MemoryManager& tegra_memory_manager;
-    Memory::Memory& cpu_memory;
-
-    std::unique_ptr<VKStreamBuffer> stream_buffer;
-    vk::Buffer buffer_handle;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -0,0 +1,339 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+namespace {
+
+// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
+constexpr u8 quad_array[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
+    0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
+    0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
+    0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+// Uint8 SPIR-V module. Generated from the "shaders/" directory.
+constexpr u8 uint8_pass[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
+    0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
+    0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
+    0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
+    0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
+    0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
+    0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+} // Anonymous namespace
+
+VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                             const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                             const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                             const std::vector<vk::PushConstantRange> push_constants,
+                             std::size_t code_size, const u8* code) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+    descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
+
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout,
+                                                          static_cast<u32>(push_constants.size()),
+                                                          push_constants.data());
+    layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+
+    if (!templates.empty()) {
+        const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+            {}, static_cast<u32>(templates.size()), templates.data(),
+            vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+            vk::PipelineBindPoint::eGraphics, *layout, 0);
+        descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+
+        descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+    }
+
+    auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
+    std::memcpy(code_copy.get(), code, code_size);
+    const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
+    module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
+
+    const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module,
+                                                     "main", nullptr);
+
+    const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0);
+    pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld);
+}
+
+VKComputePass::~VKComputePass() = default;
+
+vk::DescriptorSet VKComputePass::CommitDescriptorSet(
+    VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator->Commit(fence);
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKStagingBufferPool& staging_buffer_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
+                    std::size(quad_array), quad_array),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+QuadArrayPass::~QuadArrayPass() = default;
+
+std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
+    const u32 num_triangle_vertices = num_vertices * 6 / 4;
+    const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    ASSERT(num_vertices % 4 == 0);
+    const u32 num_quads = num_vertices / 4;
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
+                      first, set](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first,
+                             dld);
+        cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {*buffer.handle, 0};
+}
+
+Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                     VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                     VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr),
+                     vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {}, std::size(uint8_pass), uint8_pass),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+Uint8Pass::~Uint8Pass() = default;
+
+std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                                      u64 src_offset) {
+    const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+                      num_vertices](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {&*buffer.handle, 0};
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -0,0 +1,77 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKStagingBufferPool;
+class VKUpdateDescriptorQueue;
+
+class VKComputePass {
+public:
+    explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                           const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                           const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                           const std::vector<vk::PushConstantRange> push_constants,
+                           std::size_t code_size, const u8* code);
+    ~VKComputePass();
+
+protected:
+    vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
+                                          VKFence& fence);
+
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniquePipelineLayout layout;
+    UniquePipeline pipeline;
+
+private:
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    std::optional<DescriptorAllocator> descriptor_allocator;
+    UniqueShaderModule module;
+};
+
+class QuadArrayPass final : public VKComputePass {
+public:
+    explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                           VKDescriptorPool& descriptor_pool,
+                           VKStagingBufferPool& staging_buffer_pool,
+                           VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~QuadArrayPass();
+
+    std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+class Uint8Pass final : public VKComputePass {
+public:
+    explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                       VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                       VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~Uint8Pass();
+
+    std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                               u64 src_offset);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -0,0 +1,112 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                                     VKDescriptorPool& descriptor_pool,
+                                     VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     const SPIRVShader& shader)
+    : device{device}, scheduler{scheduler}, entries{shader.entries},
+      descriptor_set_layout{CreateDescriptorSetLayout()},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate()},
+      shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
+
+VKComputePipeline::~VKComputePipeline() = default;
+
+vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+    u32 binding = 0;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        // TODO(Rodrigo): Maybe make individual bindings here?
+        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
+            bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
+                                  nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
+    const auto dev = device.GetLogical();
+    return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
+    const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
+    const auto dev = device.GetLogical();
+    return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniquePipeline VKComputePipeline::CreatePipeline() const {
+    vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
+                                                      *shader_module, "main", nullptr);
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+    if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
+        shader_stage_ci.pNext = &subgroup_size_ci;
+    }
+
+    const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
+    const auto dev = device.GetLogical();
+    return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -0,0 +1,66 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class VKComputePipeline final {
+public:
+    explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                               VKDescriptorPool& descriptor_pool,
+                               VKUpdateDescriptorQueue& update_descriptor_queue,
+                               const SPIRVShader& shader);
+    ~VKComputePipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    const ShaderEntries& GetEntries() {
+        return entries;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
+
+    UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
+
+    UniquePipeline CreatePipeline() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    ShaderEntries entries;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniqueShaderModule shader_module;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
+constexpr std::size_t SETS_GROW_RATE = 0x20;
+
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
+                                         vk::DescriptorSetLayout layout)
+    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+
+DescriptorAllocator::~DescriptorAllocator() = default;
+
+vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
+    return *descriptors[CommitResource(fence)];
+}
+
+void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
+    auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
+    descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
+                       std::make_move_iterator(new_sets.end()));
+}
+
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
+    : device{device}, active_pool{AllocateNewPool()} {}
+
+VKDescriptorPool::~VKDescriptorPool() = default;
+
+vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
+    static constexpr u32 num_sets = 0x20000;
+    static constexpr vk::DescriptorPoolSize pool_sizes[] = {
+        {vk::DescriptorType::eUniformBuffer, num_sets * 90},
+        {vk::DescriptorType::eStorageBuffer, num_sets * 60},
+        {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
+        {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
+        {vk::DescriptorType::eStorageImage, num_sets * 40}};
+
+    const vk::DescriptorPoolCreateInfo create_info(
+        vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
+        static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
+    const auto dev = device.GetLogical();
+    return *pools.emplace_back(
+        dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
+}
+
+std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
+    vk::DescriptorSetLayout layout, std::size_t count) {
+    std::vector layout_copies(count, layout);
+    vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
+                                                layout_copies.data());
+
+    std::vector<vk::DescriptorSet> sets(count);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfPoolMemory:
+        active_pool = AllocateNewPool();
+        allocate_info.descriptorPool = active_pool;
+        if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
+            break;
+        }
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
+    }
+
+    vk::PoolFree deleter(dev, active_pool, dld);
+    std::vector<UniqueDescriptorSet> unique_sets;
+    unique_sets.reserve(count);
+    for (const auto set : sets) {
+        unique_sets.push_back(UniqueDescriptorSet{set, deleter});
+    }
+    return unique_sets;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+class VKDescriptorPool;
+
+class DescriptorAllocator final : public VKFencedPool {
+public:
+    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
+    ~DescriptorAllocator() override;
+
+    DescriptorAllocator(const DescriptorAllocator&) = delete;
+
+    vk::DescriptorSet Commit(VKFence& fence);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    VKDescriptorPool& descriptor_pool;
+    const vk::DescriptorSetLayout layout;
+
+    std::vector<UniqueDescriptorSet> descriptors;
+};
+
+class VKDescriptorPool final {
+    friend DescriptorAllocator;
+
+public:
+    explicit VKDescriptorPool(const VKDevice& device);
+    ~VKDescriptorPool();
+
+private:
+    vk::DescriptorPool AllocateNewPool();
+
+    std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
+                                                         std::size_t count);
+
+    const VKDevice& device;
+
+    std::vector<UniqueDescriptorPool> pools;
+    vk::DescriptorPool active_pool;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -3,12 +3,15 @@
 // Refer to the license.txt file included.

 #include <bitset>
+#include <chrono>
 #include <cstdlib>
 #include <optional>
 #include <set>
 #include <string_view>
+#include <thread>
 #include <vector>
 #include "common/assert.h"
+#include "core/settings.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"

@@ -201,6 +204,22 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
    return wanted_format;
 }

+void VKDevice::ReportLoss() const {
+    LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
+
+    // Wait some time to let the log flush
+    std::this_thread::sleep_for(std::chrono::seconds{1});
+
+    if (!nv_device_diagnostic_checkpoints) {
+        return;
+    }
+
+    [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld);
+    // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
+    // executed. It can be done on a debugger by evaluating the expression:
+    // *(VKGraphicsPipeline*)data[0]
+}
+
 bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
                                      const vk::DispatchLoaderDynamic& dldi) const {
    // Disable for now to avoid converting ASTC twice.
@@ -381,6 +400,8 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
             VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
        Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
             false);
+        Test(extension, nv_device_diagnostic_checkpoints,
+             VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
    }

    if (khr_shader_float16_int8) {
@@ -464,6 +485,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
 std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
    static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+                                        vk::Format::eA8B8G8R8UintPack32,
                                        vk::Format::eA8B8G8R8SnormPack32,
                                        vk::Format::eA8B8G8R8SrgbPack32,
                                        vk::Format::eB5G6R5UnormPack16,
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -39,6 +39,9 @@ public:
    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
                                  FormatType format_type) const;

+    /// Reports a device loss.
+    void ReportLoss() const;
+
    /// Returns the dispatch loader with direct function pointers of the device.
    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
        return dld;
@@ -159,6 +162,11 @@ public:
        return ext_shader_viewport_index_layer;
    }

+    /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
+    bool IsNvDeviceDiagnosticCheckpoints() const {
+        return nv_device_diagnostic_checkpoints;
+    }
+
    /// Returns the vendor name reported from Vulkan.
    std::string_view GetVendorName() const {
        return vendor_name;
@@ -218,6 +226,7 @@ private:
    bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
    bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
    bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool nv_device_diagnostic_checkpoints{};   ///< Support for VK_NV_device_diagnostic_checkpoints.

    // Telemetry parameters
    std::string vendor_name;                      ///< Device's driver name.
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,271 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+namespace {
+
+vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
+    return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
+                              MaxwellToVK::StencilOp(face.action_depth_pass),
+                              MaxwellToVK::StencilOp(face.action_depth_fail),
+                              MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
+}
+
+bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
+    static constexpr std::array unsupported_topologies = {
+        vk::PrimitiveTopology::ePointList,
+        vk::PrimitiveTopology::eLineList,
+        vk::PrimitiveTopology::eTriangleList,
+        vk::PrimitiveTopology::eLineListWithAdjacency,
+        vk::PrimitiveTopology::eTriangleListWithAdjacency,
+        vk::PrimitiveTopology::ePatchList};
+    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
+                     topology) == std::end(unsupported_topologies);
+}
+
+} // Anonymous namespace
+
+VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                       VKDescriptorPool& descriptor_pool,
+                                       VKUpdateDescriptorQueue& update_descriptor_queue,
+                                       VKRenderPassCache& renderpass_cache,
+                                       const GraphicsPipelineCacheKey& key,
+                                       const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                       const SPIRVProgram& program)
+    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
+                                                                        program)},
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
+                                                                             key.renderpass_params,
+                                                                             program)} {}
+
+VKGraphicsPipeline::~VKGraphicsPipeline() = default;
+
+vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
+    const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
+    const SPIRVProgram& program) const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    for (const auto& stage : program) {
+        if (stage) {
+            FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
+                                                template_entries);
+        }
+    }
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
+    const SPIRVProgram& program) const {
+    std::vector<UniqueShaderModule> modules;
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
+        const auto& stage = program[i];
+        if (!stage) {
+            continue;
+        }
+        const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
+                                                   stage->code.data());
+        modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
+    }
+    return modules;
+}
+
+UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
+                                                  const SPIRVProgram& program) const {
+    const auto& vi = fixed_state.vertex_input;
+    const auto& ia = fixed_state.input_assembly;
+    const auto& ds = fixed_state.depth_stencil;
+    const auto& cd = fixed_state.color_blending;
+    const auto& ts = fixed_state.tessellation;
+    const auto& rs = fixed_state.rasterizer;
+
+    std::vector<vk::VertexInputBindingDescription> vertex_bindings;
+    std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
+    for (std::size_t i = 0; i < vi.num_bindings; ++i) {
+        const auto& binding = vi.bindings[i];
+        const bool instanced = binding.divisor != 0;
+        const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
+        vertex_bindings.emplace_back(binding.index, binding.stride, rate);
+        if (instanced) {
+            vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
+        }
+    }
+
+    std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
+    const auto& input_attributes = program[0]->entries.attributes;
+    for (std::size_t i = 0; i < vi.num_attributes; ++i) {
+        const auto& attribute = vi.attributes[i];
+        if (input_attributes.find(attribute.index) == input_attributes.end()) {
+            // Skip attributes not used by the vertex shaders.
+            continue;
+        }
+        vertex_attributes.emplace_back(attribute.index, attribute.buffer,
+                                       MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+                                       attribute.offset);
+    }
+
+    vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
+        {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
+        static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
+
+    const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
+        static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
+    if (!vertex_binding_divisors.empty()) {
+        vertex_input_ci.pNext = &vertex_input_divisor_ci;
+    }
+
+    const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
+        {}, primitive_topology,
+        ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
+
+    const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
+
+    const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
+                                                          Maxwell::NumViewports, nullptr);
+
+    // TODO(Rodrigo): Find out what's the default register value for front face
+    const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
+        {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
+        rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
+        rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
+        rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
+        {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
+
+    const vk::CompareOp depth_test_compare = ds.depth_test_enable
+                                                 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
+                                                 : vk::CompareOp::eAlways;
+
+    const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
+        {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
+        ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
+        GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
+
+    std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    const std::size_t num_attachments =
+        std::min(cd.attachments_count, renderpass_params.color_attachments.size());
+    for (std::size_t i = 0; i < num_attachments; ++i) {
+        constexpr std::array component_table{
+            vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
+            vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
+        const auto& blend = cd.attachments[i];
+
+        vk::ColorComponentFlags color_components{};
+        for (std::size_t j = 0; j < component_table.size(); ++j) {
+            if (blend.components[j])
+                color_components |= component_table[j];
+        }
+
+        cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
+            blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
+            MaxwellToVK::BlendFactor(blend.dst_rgb_func),
+            MaxwellToVK::BlendEquation(blend.rgb_equation),
+            MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
+            MaxwellToVK::BlendEquation(blend.a_equation), color_components);
+    }
+    const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
+                                                                  static_cast<u32>(num_attachments),
+                                                                  cb_attachments.data(), {});
+
+    constexpr std::array dynamic_states = {
+        vk::DynamicState::eViewport,         vk::DynamicState::eScissor,
+        vk::DynamicState::eDepthBias,        vk::DynamicState::eBlendConstants,
+        vk::DynamicState::eDepthBounds,      vk::DynamicState::eStencilCompareMask,
+        vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
+    const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+    std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
+    std::size_t module_index = 0;
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!program[stage]) {
+            continue;
+        }
+        const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
+        const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
+        auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
+                                                    *modules[module_index++], "main", nullptr);
+        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
+            stage_ci.pNext = &subgroup_size_ci;
+        }
+    }
+
+    const vk::GraphicsPipelineCreateInfo create_info(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
+        &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
+        &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+struct GraphicsPipelineCacheKey;
+
+class VKDescriptorPool;
+class VKDevice;
+class VKRenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+
+class VKGraphicsPipeline final {
+public:
+    explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                VKDescriptorPool& descriptor_pool,
+                                VKUpdateDescriptorQueue& update_descriptor_queue,
+                                VKRenderPassCache& renderpass_cache,
+                                const GraphicsPipelineCacheKey& key,
+                                const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                const SPIRVProgram& program);
+    ~VKGraphicsPipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    vk::RenderPass GetRenderPass() const {
+        return renderpass;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout(
+        const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
+        const SPIRVProgram& program) const;
+
+    std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+
+    UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
+                                  const SPIRVProgram& program) const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    const FixedPipelineState fixed_state;
+    const u64 hash;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    std::vector<UniqueShaderModule> modules;
+
+    vk::RenderPass renderpass;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -0,0 +1,106 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
+                 const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask)
+    : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
+      image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
+    UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
+                         "Queue family tracking is not implemented");
+
+    const auto dev = device.GetLogical();
+    image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
+
+    const u32 num_ranges = image_num_layers * image_num_levels;
+    barriers.resize(num_ranges);
+    subrange_states.resize(num_ranges, {{}, image_ci.initialLayout});
+}
+
+VKImage::~VKImage() = default;
+
+void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                         vk::ImageLayout new_layout) {
+    if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
+        return;
+    }
+
+    std::size_t cursor = 0;
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            barriers[cursor] = vk::ImageMemoryBarrier(
+                state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED,
+                VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1});
+            state.access = new_access;
+            state.layout = new_layout;
+        }
+    }
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) {
+        // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
+        constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands;
+        cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr,
+                               static_cast<u32>(cursor), barriers.data(), dld);
+    });
+}
+
+bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                         vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept {
+    const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
+                               base_level == 0 && num_levels == image_num_levels;
+    if (!is_full_range) {
+        state_diverged = true;
+    }
+
+    if (!state_diverged) {
+        auto& state = GetSubrangeState(0, 0);
+        if (state.access != new_access || state.layout != new_layout) {
+            return true;
+        }
+    }
+
+    for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
+        for (u32 level_it = 0; level_it < num_levels; ++level_it) {
+            const u32 layer = base_layer + layer_it;
+            const u32 level = base_level + level_it;
+            auto& state = GetSubrangeState(layer, level);
+            if (state.access != new_access || state.layout != new_layout) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void VKImage::CreatePresentView() {
+    // Image type has to be 2D to be presented.
+    const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {},
+                                                {aspect_mask, 0, 1, 0, 1});
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld);
+}
+
+VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
+    return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
+                           static_cast<std::size_t>(level)];
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ b/src/video_core/renderer_vulkan/vk_image.h
@@ -0,0 +1,84 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class VKImage {
+public:
+    explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
+                     const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask);
+    ~VKImage();
+
+    /// Records in the passed command buffer an image transition and updates the state of the image.
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout);
+
+    /// Returns a view compatible with presentation, the image has to be 2D.
+    vk::ImageView GetPresentView() {
+        if (!present_view) {
+            CreatePresentView();
+        }
+        return *present_view;
+    }
+
+    /// Returns the Vulkan image handler.
+    vk::Image GetHandle() const {
+        return *image;
+    }
+
+    /// Returns the Vulkan format for this image.
+    vk::Format GetFormat() const {
+        return format;
+    }
+
+    /// Returns the Vulkan aspect mask.
+    vk::ImageAspectFlags GetAspectMask() const {
+        return aspect_mask;
+    }
+
+private:
+    struct SubrangeState final {
+        vk::AccessFlags access{};                             ///< Current access bits.
+        vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout.
+    };
+
+    bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept;
+
+    /// Creates a presentation view.
+    void CreatePresentView();
+
+    /// Returns the subrange state for a layer and layer.
+    SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
+
+    const VKDevice& device; ///< Device handler.
+    VKScheduler& scheduler; ///< Device scheduler.
+
+    const vk::Format format;                ///< Vulkan format.
+    const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
+    const u32 image_num_layers;             ///< Number of layers.
+    const u32 image_num_levels;             ///< Number of mipmap levels.
+
+    UniqueImage image;            ///< Image handle.
+    UniqueImageView present_view; ///< Image view compatible with presentation.
+
+    std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers.
+    std::vector<SubrangeState> subrange_states;   ///< Current subrange state.
+
+    bool state_diverged = false; ///< True when subresources mismatch in layout.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -16,34 +17,32 @@

 namespace Vulkan {

-// TODO(Rodrigo): Fine tune this number
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+namespace {
+
+u64 GetAllocationChunkSize(u64 required_size) {
+    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
+    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
+    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
+}
+
+} // Anonymous namespace

 class VKMemoryAllocation final {
 public:
    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
-                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
-        if (is_mappable) {
-            const auto dev = device.GetLogical();
-            const auto& dld = device.GetDispatchLoader();
-            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
-        }
-    }
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
+          shifted_type{ShiftType(type)} {}

    ~VKMemoryAllocation() {
        const auto dev = device.GetLogical();
        const auto& dld = device.GetDispatchLoader();
-        if (is_mappable)
-            dev.unmapMemory(memory, dld);
        dev.free(memory, nullptr, dld);
    }

    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
-                                        static_cast<u64>(alignment));
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
        if (!found) {
            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                       static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
                return nullptr;
            }
        }
-        u8* address = is_mappable ? base_address + *found : nullptr;
-        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
                                                           *found + commit_size);
        commits.push_back(commit.get());

@@ -65,12 +63,10 @@ public:

    void Free(const VKMemoryCommitImpl* commit) {
        ASSERT(commit);
-        const auto it =
-            std::find_if(commits.begin(), commits.end(),
-                         [&](const auto& stored_commit) { return stored_commit == commit; });
+
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
        if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
-            UNREACHABLE();
+            UNREACHABLE_MSG("Freeing unallocated commit!");
            return;
        }
        commits.erase(it);
@@ -88,11 +84,11 @@ private:
    }

    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
-        while (iterator + size < end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+        u64 iterator = Common::AlignUp(start, alignment);
+        while (iterator + size <= end) {
+            const u64 try_left = iterator;
            const u64 try_right = try_left + size;

            bool overlap = false;
@@ -100,7 +96,7 @@ private:
                const auto [commit_left, commit_right] = commit->interval;
                if (try_left < commit_right && commit_left < try_right) {
                    // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                    overlap = true;
                    break;
                }
@@ -110,6 +106,7 @@ private:
                return try_left;
            }
        }
+
        // No free regions where found, return an empty optional.
        return std::nullopt;
    }
@@ -117,12 +114,8 @@ private:
    const VKDevice& device;                   ///< Vulkan device.
    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
-    const bool is_mappable;                   ///< Whether the allocation is mappable.
-
-    /// Base address of the mapped pointer.
-    u8* base_address{};

    /// Hints where the next free region is likely going to be.
    u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
 };

 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
-      is_memory_unified{GetMemoryUnified(props)} {}
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
+                          device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(properties)} {}

 VKMemoryManager::~VKMemoryManager() = default;

-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
+                                       bool host_visible) {
+    const u64 chunk_size = GetAllocationChunkSize(requirements.size);

    // When a host visible commit is asked, search for host visible and coherent, otherwise search
    // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
            : vk::MemoryPropertyFlagBits::eDeviceLocal;

-    const auto TryCommit = [&]() -> VKMemoryCommit {
-        for (auto& alloc : allocs) {
-            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
-                continue;
-
-            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
-                return commit;
-            }
-        }
-        return {};
-    };
-
-    if (auto commit = TryCommit(); commit) {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
        return commit;
    }

    // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
-        // TODO(Rodrigo): Try to use host memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
-        UNREACHABLE();
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
+        // Allocation has failed, panic.
+        UNREACHABLE_MSG("Ran out of VRAM!");
+        return {};
    }

    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
    // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
    ASSERT(commit);
    return commit;
 }
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }

 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                  u64 size) {
-    const u32 type = [&]() {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+    const u32 type = [&] {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                // The type matches in type and in the wanted properties.
                return type_index;
            }
        }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
-        UNREACHABLE();
-        return 0u;
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
+        return 0U;
    }();

    const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    // Try to allocate found type.
    const vk::MemoryAllocateInfo memory_ai(size, type);
    vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
        res != vk::Result::eSuccess) {
        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
        return false;
    }
-    allocs.push_back(
+    allocations.push_back(
        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
    return true;
 }

-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                               vk::MemoryPropertyFlags wanted_properties) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return {};
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(
+    const vk::PhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
            // Memory is considered unified when heaps are device local only.
            return false;
        }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    return true;
 }

-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
-                                       u8* data, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}

 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
    allocation->Free(this);
 }

-u8* VKMemoryCommitImpl::GetData() const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
-    return data;
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
+    const auto dev = device.GetLogical();
+    const auto address = reinterpret_cast<u8*>(
+        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
+    return MemoryMap{this, address};
+}
+
+void VKMemoryCommitImpl::Unmap() const {
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(memory, device.GetDispatchLoader());
+}
+
+MemoryMap VKMemoryCommitImpl::Map() const {
+    return Map(interval.second - interval.first);
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@

 namespace Vulkan {

+class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
    explicit VKMemoryManager(const VKDevice& device);
+    VKMemoryManager(const VKMemoryManager&) = delete;
    ~VKMemoryManager();

    /**
     * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
     * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     * memory. When passing false, it will try to allocate device local memory.
+     *                     memory. When passing false, it will try to allocate device local memory.
     * @returns A memory commit.
     */
    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
    /// Allocates a chunk of memory.
    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);

-    /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    /// Tries to allocate a memory commit.
+    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                  vk::MemoryPropertyFlags wanted_properties);

-    const VKDevice& device;                                  ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
-    const bool is_memory_unified;                            ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
+
+    const VKDevice& device;                              ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                        ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };

 class VKMemoryCommitImpl final {
    friend VKMemoryAllocation;
+    friend MemoryMap;

 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
-                                u64 begin, u64 end);
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                vk::DeviceMemory memory, u64 begin, u64 end);
    ~VKMemoryCommitImpl();

-    /// Returns the writeable memory map. The commit has to be mappable.
-    u8* GetData() const;
+    /// Maps a memory region and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map(u64 size, u64 offset = 0) const;
+
+    /// Maps the whole commit and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map() const;

    /// Returns the Vulkan memory handler.
    vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
    }

 private:
+    /// Unmaps memory.
+    void Unmap() const;
+
+    const VKDevice& device;           ///< Vulkan device.
    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+/// Holds ownership of a memory map.
+class MemoryMap final {
+public:
+    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
+        : commit{commit}, address{address} {}
+
+    ~MemoryMap() {
+        if (commit) {
+            commit->Unmap();
+        }
+    }
+
+    /// Prematurely releases the memory map.
+    void Release() {
+        commit->Unmap();
+        commit = nullptr;
+    }
+
+    /// Returns the address of the memory map.
+    u8* GetAddress() const {
+        return address;
+    }
+
+    /// Returns the address of the memory map;
+    operator u8*() const {
+        return address;
+    }
+
+private:
+    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
+    u8* address{};                      ///< Address to the mapped memory.
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -0,0 +1,395 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader/compiler_settings.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
+    VideoCommon::Shader::CompileDepth::FullDecompile};
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    const std::size_t start_offset = is_compute ? 0 : 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code, is_compute));
+    return program_code;
+}
+
+constexpr std::size_t GetStageFromProgram(std::size_t program) {
+    return program == 0 ? 0 : program - 1;
+}
+
+constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
+    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+}
+
+ShaderType GetShaderType(Maxwell::ShaderProgram program) {
+    switch (program) {
+    case Maxwell::ShaderProgram::VertexB:
+        return ShaderType::Vertex;
+    case Maxwell::ShaderProgram::TesselationControl:
+        return ShaderType::TesselationControl;
+    case Maxwell::ShaderProgram::TesselationEval:
+        return ShaderType::TesselationEval;
+    case Maxwell::ShaderProgram::Geometry:
+        return ShaderType::Geometry;
+    case Maxwell::ShaderProgram::Fragment:
+        return ShaderType::Fragment;
+    default:
+        UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
+        return ShaderType::Vertex;
+    }
+}
+
+u32 FillDescriptorLayout(const ShaderEntries& entries,
+                         std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                         Maxwell::ShaderProgram program_type, u32 base_binding) {
+    const ShaderType stage = GetStageFromProgram(program_type);
+    const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
+
+    u32 binding = base_binding;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        for (std::size_t i = 0; i < num_entries; ++i) {
+            bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+    return binding;
+}
+
+} // Anonymous namespace
+
+CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
+                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
+                           ProgramCode program_code, u32 main_offset)
+    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
+      program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
+      shader_ir{this->program_code, main_offset, compiler_settings, locker},
+      entries{GenerateShaderEntries(shader_ir)} {}
+
+CachedShader::~CachedShader() = default;
+
+Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
+    Core::System& system, Tegra::Engines::ShaderType stage) {
+    if (stage == Tegra::Engines::ShaderType::Compute) {
+        return system.GPU().KeplerCompute();
+    } else {
+        return system.GPU().Maxwell3D();
+    }
+}
+
+VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                                 const VKDevice& device, VKScheduler& scheduler,
+                                 VKDescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue)
+    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
+      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
+      renderpass_cache(device) {}
+
+VKPipelineCache::~VKPipelineCache() = default;
+
+std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+    const auto& gpu = system.GPU().Maxwell3D();
+    auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
+    if (!dirty) {
+        return last_shaders;
+    }
+    dirty = false;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto& shader_config = gpu.regs.shader_config[index];
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const GPUVAddr program_addr{GetShaderAddress(system, program)};
+        const auto host_ptr{memory_manager.GetPointer(program_addr)};
+        auto shader = TryGet(host_ptr);
+        if (!shader) {
+            // No shader found - create a new one
+            constexpr u32 stage_offset = 10;
+            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+
+            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+            ASSERT(cpu_addr);
+
+            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
+                                                    host_ptr, std::move(code), stage_offset);
+            Register(shader);
+        }
+        shaders[index] = std::move(shader);
+    }
+    return last_shaders = shaders;
+}
+
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    if (last_graphics_pipeline && last_graphics_key == key) {
+        return *last_graphics_pipeline;
+    }
+    last_graphics_key = key;
+
+    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+        const auto [program, bindings] = DecompileShaders(key);
+        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
+                                                     update_descriptor_queue, renderpass_cache, key,
+                                                     bindings, program);
+    }
+    return *(last_graphics_pipeline = entry.get());
+}
+
+VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (!is_cache_miss) {
+        return *entry;
+    }
+    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto program_addr = key.shader;
+    const auto host_ptr = memory_manager.GetPointer(program_addr);
+
+    auto shader = TryGet(host_ptr);
+    if (!shader) {
+        // No shader found - create a new one
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+
+        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        constexpr u32 kernel_main_offset = 0;
+        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
+                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                kernel_main_offset);
+        Register(shader);
+    }
+
+    Specialization specialization;
+    specialization.workgroup_size = key.workgroup_size;
+    specialization.shared_memory_size = key.shared_memory_size;
+
+    const SPIRVShader spirv_shader{
+        Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
+        shader->GetEntries()};
+    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
+                                                update_descriptor_queue, spirv_shader);
+    return *entry;
+}
+
+void VKPipelineCache::Unregister(const Shader& shader) {
+    bool finished = false;
+    const auto Finish = [&] {
+        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
+        // flush.
+        if (finished) {
+            return;
+        }
+        finished = true;
+        scheduler.Finish();
+    };
+
+    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
+    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
+        auto& entry = it->first;
+        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
+            entry.shaders.end()) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = graphics_cache.erase(it);
+    }
+    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
+        auto& entry = it->first;
+        if (entry.shader != invalidated_addr) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = compute_cache.erase(it);
+    }
+
+    RasterizerCache::Unregister(shader);
+}
+
+std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
+VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
+    const auto& fixed_state = key.fixed_state;
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto& gpu = system.GPU().Maxwell3D();
+
+    Specialization specialization;
+    specialization.primitive_topology = fixed_state.input_assembly.topology;
+    if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+        ASSERT(fixed_state.input_assembly.point_size != 0.0f);
+        specialization.point_size = fixed_state.input_assembly.point_size;
+    }
+    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+        specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
+    }
+    specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+    specialization.tessellation.primitive = fixed_state.tessellation.primitive;
+    specialization.tessellation.spacing = fixed_state.tessellation.spacing;
+    specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
+    for (const auto& rt : key.renderpass_params.color_attachments) {
+        specialization.enabled_rendertargets.set(rt.index);
+    }
+
+    SPIRVProgram program;
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const auto shader = TryGet(host_ptr);
+        ASSERT(shader);
+
+        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
+        const auto program_type = GetShaderType(program_enum);
+        const auto& entries = shader->GetEntries();
+        program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
+                          entries};
+
+        if (program_enum == Maxwell::ShaderProgram::VertexA) {
+            // VertexB was combined with VertexA, so we skip the VertexB iteration
+            ++index;
+        }
+
+        const u32 old_binding = specialization.base_binding;
+        specialization.base_binding =
+            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
+        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    }
+    return {std::move(program), std::move(bindings)};
+}
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
+    static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
+    const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
+        const u32 count = static_cast<u32>(count_);
+        if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
+            device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
+            // crash.
+            for (u32 i = 0; i < count; ++i) {
+                template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
+                                              offset + i * entry_size, entry_size);
+            }
+        } else if (count != 0) {
+            template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+        }
+        offset += count * entry_size;
+        binding += count;
+    };
+
+    AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -0,0 +1,200 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKComputePipeline;
+class VKDescriptorPool;
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class CachedShader;
+using Shader = std::shared_ptr<CachedShader>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using ProgramCode = std::vector<u64>;
+
+struct GraphicsPipelineCacheKey {
+    FixedPipelineState fixed_state;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    RenderPassParams renderpass_params;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = fixed_state.Hash();
+        for (const auto& shader : shaders) {
+            boost::hash_combine(hash, shader);
+        }
+        boost::hash_combine(hash, renderpass_params.Hash());
+        return hash;
+    }
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return std::tie(fixed_state, shaders, renderpass_params) ==
+               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    }
+};
+
+struct ComputePipelineCacheKey {
+    GPUVAddr shader{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};
+
+    std::size_t Hash() const noexcept {
+        return static_cast<std::size_t>(shader) ^
+               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
+               static_cast<std::size_t>(workgroup_size[0]) ^
+               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
+               (static_cast<std::size_t>(workgroup_size[2]) << 24);
+    }
+
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+        return std::tie(shader, shared_memory_size, workgroup_size) ==
+               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<Vulkan::ComputePipelineCacheKey> {
+    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class CachedShader final : public RasterizerCacheObject {
+public:
+    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+    ~CachedShader();
+
+    GPUVAddr GetGpuAddr() const {
+        return gpu_addr;
+    }
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return program_code.size() * sizeof(u64);
+    }
+
+    VideoCommon::Shader::ShaderIR& GetIR() {
+        return shader_ir;
+    }
+
+    const VideoCommon::Shader::ShaderIR& GetIR() const {
+        return shader_ir;
+    }
+
+    const ShaderEntries& GetEntries() const {
+        return entries;
+    }
+
+private:
+    static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
+                                                                 Tegra::Engines::ShaderType stage);
+
+    GPUVAddr gpu_addr{};
+    VAddr cpu_addr{};
+    ProgramCode program_code;
+    VideoCommon::Shader::ConstBufferLocker locker;
+    VideoCommon::Shader::ShaderIR shader_ir;
+    ShaderEntries entries;
+};
+
+class VKPipelineCache final : public RasterizerCache<Shader> {
+public:
+    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                             const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~VKPipelineCache();
+
+    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+
+    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+
+protected:
+    void Unregister(const Shader& shader) override;
+
+    void FlushObjectInner(const Shader& object) override {}
+
+private:
+    std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
+        const GraphicsPipelineCacheKey& key);
+
+    Core::System& system;
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    VKDescriptorPool& descriptor_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    VKRenderPassCache renderpass_cache;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    GraphicsPipelineCacheKey last_graphics_key;
+    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
+        graphics_cache;
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
+};
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -0,0 +1,263 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_accelerated.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Tegra::Engines {
+class Maxwell3D;
+}
+
+namespace Vulkan {
+
+struct VKScreenInfo;
+
+using ImageViewsPack =
+    boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
+
+struct FramebufferCacheKey {
+    vk::RenderPass renderpass{};
+    u32 width = 0;
+    u32 height = 0;
+    ImageViewsPack views;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
+        for (const auto& view : views) {
+            boost::hash_combine(hash, static_cast<VkImageView>(view));
+        }
+        boost::hash_combine(hash, width);
+        boost::hash_combine(hash, height);
+        return hash;
+    }
+
+    bool operator==(const FramebufferCacheKey& rhs) const noexcept {
+        return std::tie(renderpass, views, width, height) ==
+               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::FramebufferCacheKey> {
+    std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class BufferBindings;
+
+struct ImageView {
+    View view;
+    vk::ImageLayout* layout = nullptr;
+};
+
+class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
+public:
+    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                              VKScreenInfo& screen_info, const VKDevice& device,
+                              VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                              VKScheduler& scheduler);
+    ~RasterizerVulkan() override;
+
+    bool DrawBatch(bool is_indexed) override;
+    bool DrawMultiBatch(bool is_indexed) override;
+    void Clear() override;
+    void DispatchCompute(GPUVAddr code_addr) override;
+    void FlushAll() override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushCommands() override;
+    void TickFrame() override;
+    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
+    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
+                           u32 pixel_stride) override;
+
+    /// Maximum supported size that a constbuffer can have in bytes.
+    static constexpr std::size_t MaxConstbufferSize = 0x10000;
+    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
+                  "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+
+private:
+    struct DrawParameters {
+        void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
+
+        u32 base_instance = 0;
+        u32 num_instances = 0;
+        u32 base_vertex = 0;
+        u32 num_vertices = 0;
+        bool is_indexed = 0;
+    };
+
+    using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
+
+    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
+
+    void Draw(bool is_indexed, bool is_instanced);
+
+    void FlushWork();
+
+    Texceptions UpdateAttachments();
+
+    std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
+
+    /// Setups geometry buffers and state.
+    DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
+                                 bool is_indexed, bool is_instanced);
+
+    /// Setup descriptors in the graphics pipeline.
+    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+
+    void SetupImageTransitions(Texceptions texceptions,
+                               const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
+                               const View& zeta_attachment);
+
+    void UpdateDynamicStates();
+
+    bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
+
+    void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
+                           BufferBindings& buffer_bindings);
+
+    void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
+
+    /// Setup constant buffers in the graphics pipeline.
+    void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup global buffers in the graphics pipeline.
+    void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup texel buffers in the graphics pipeline.
+    void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup textures in the graphics pipeline.
+    void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup images in the graphics pipeline.
+    void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup constant buffers in the compute pipeline.
+    void SetupComputeConstBuffers(const ShaderEntries& entries);
+
+    /// Setup global buffers in the compute pipeline.
+    void SetupComputeGlobalBuffers(const ShaderEntries& entries);
+
+    /// Setup texel buffers in the compute pipeline.
+    void SetupComputeTexelBuffers(const ShaderEntries& entries);
+
+    /// Setup textures in the compute pipeline.
+    void SetupComputeTextures(const ShaderEntries& entries);
+
+    /// Setup images in the compute pipeline.
+    void SetupComputeImages(const ShaderEntries& entries);
+
+    void SetupConstBuffer(const ConstBufferEntry& entry,
+                          const Tegra::Engines::ConstBufferInfo& buffer);
+
+    void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
+
+    void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+
+    void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+
+    void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+
+    void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
+
+    std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
+
+    std::size_t CalculateComputeStreamBufferSize() const;
+
+    std::size_t CalculateVertexArraysSize() const;
+
+    std::size_t CalculateIndexBufferSize() const;
+
+    std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
+                                         const Tegra::Engines::ConstBufferInfo& buffer) const;
+
+    RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
+
+    Core::System& system;
+    Core::Frontend::EmuWindow& render_window;
+    VKScreenInfo& screen_info;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+
+    VKStagingBufferPool staging_pool;
+    VKDescriptorPool descriptor_pool;
+    VKUpdateDescriptorQueue update_descriptor_queue;
+    QuadArrayPass quad_array_pass;
+    Uint8Pass uint8_pass;
+
+    VKTextureCache texture_cache;
+    VKPipelineCache pipeline_cache;
+    VKBufferCache buffer_cache;
+    VKSamplerCache sampler_cache;
+
+    std::array<View, Maxwell::NumRenderTargets> color_attachments;
+    View zeta_attachment;
+
+    std::vector<ImageView> sampled_views;
+    std::vector<ImageView> image_views;
+
+    u32 draw_counter = 0;
+
+    // TODO(Rodrigo): Invalidate on image destruction
+    std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+
+namespace Vulkan {
+
+VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
+
+VKRenderPassCache::~VKRenderPassCache() = default;
+
+vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
+    const auto [pair, is_cache_miss] = cache.try_emplace(params);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        entry = CreateRenderPass(params);
+    }
+    return *entry;
+}
+
+UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    std::vector<vk::AttachmentDescription> descriptors;
+    std::vector<vk::AttachmentReference> color_references;
+
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(attachment.pixel_format));
+
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eColorAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+                                 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
+        color_references.emplace_back(static_cast<u32>(rt), color_layout);
+    }
+
+    vk::AttachmentReference zeta_attachment_ref;
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(params.zeta_pixel_format));
+
+        const auto zeta_layout = params.zeta_texception
+                                     ? vk::ImageLayout::eGeneral
+                                     : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
+        zeta_attachment_ref =
+            vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
+    }
+
+    const vk::SubpassDescription subpass_description(
+        {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
+        color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
+        nullptr);
+
+    vk::AccessFlags access;
+    vk::PipelineStageFlags stage;
+    if (!color_references.empty()) {
+        access |=
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    }
+
+    if (params.has_zeta) {
+        access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                  vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
+    }
+
+    const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
+                                                   {});
+
+    const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
+                                               descriptors.data(), 1, &subpass_description, 1,
+                                               &subpass_dependency);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createRenderPassUnique(create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+// TODO(Rodrigo): Optimize this structure for faster hashing
+
+struct RenderPassParams {
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;
+
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }
+
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };
+
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::RenderPassParams> {
+    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKRenderPassCache final {
+public:
+    explicit VKRenderPassCache(const VKDevice& device);
+    ~VKRenderPassCache();
+
+    vk::RenderPass GetRenderPass(const RenderPassParams& params);
+
+private:
+    UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
+
+    const VKDevice& device;
+    std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -72,12 +72,22 @@ VKFence::VKFence(const VKDevice& device, UniqueFence handle)
 VKFence::~VKFence() = default;

 void VKFence::Wait() {
+    static constexpr u64 timeout = std::numeric_limits<u64>::max();
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+    switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) {
+    case vk::Result::eSuccess:
+        return;
+    case vk::Result::eErrorDeviceLost:
+        device.ReportLoss();
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::waitForFences");
+    }
 }

 void VKFence::Release() {
+    ASSERT(is_owned);
    is_owned = false;
 }

@@ -133,8 +143,32 @@ void VKFence::Unprotect(VKResource* resource) {
    protected_resources.erase(it);
 }

+void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
+    std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
+                 new_resource);
+}
+
 VKFenceWatch::VKFenceWatch() = default;

+VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
+    Watch(initial_fence);
+}
+
+VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
+    fence = std::exchange(rhs.fence, nullptr);
+    if (fence) {
+        fence->RedirectProtection(&rhs, this);
+    }
+}
+
+VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
+    fence = std::exchange(rhs.fence, nullptr);
+    if (fence) {
+        fence->RedirectProtection(&rhs, this);
+    }
+    return *this;
+}
+
 VKFenceWatch::~VKFenceWatch() {
    if (fence) {
        fence->Unprotect(this);
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -65,6 +65,9 @@ public:
    /// Removes protection for a resource.
    void Unprotect(VKResource* resource);

+    /// Redirects one protected resource to a new address.
+    void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
+
    /// Retreives the fence.
    operator vk::Fence() const {
        return *handle;
@@ -97,8 +100,13 @@ private:
 class VKFenceWatch final : public VKResource {
 public:
    explicit VKFenceWatch();
+    VKFenceWatch(VKFence& initial_fence);
+    VKFenceWatch(VKFenceWatch&&) noexcept;
+    VKFenceWatch(const VKFenceWatch&) = delete;
    ~VKFenceWatch() override;

+    VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
+
    /// Waits for the fence to be released.
    void Wait();

@@ -116,6 +124,14 @@ public:

    void OnFenceRemoval(VKFence* signaling_fence) override;

+    /**
+     * Do not use it paired with Watch. Use TryWatch instead.
+     * Returns true when the watch is free.
+     */
+    bool IsUsed() const {
+        return fence != nullptr;
+    }
+
 private:
    VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
 };
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,10 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
        MaxwellToVK::Sampler::Filter(tsc.min_filter),
        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
-        max_anisotropy, tsc.depth_compare_enabled,
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
        unnormalized_coords);
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@

 namespace Vulkan {

-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
-    : device{device}, resource_manager{resource_manager} {
-    next_fence = &resource_manager.CommitFence();
-    AllocateNewContext();
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+                                           const vk::DispatchLoaderDynamic& dld) {
+    auto command = first;
+    while (command != nullptr) {
+        auto next = command->GetNext();
+        command->Execute(cmdbuf, dld);
+        command->~Command();
+        command = next;
+    }
+
+    command_offset = 0;
+    first = nullptr;
+    last = nullptr;
 }

-VKScheduler::~VKScheduler() = default;
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager}, next_fence{
+                                                              &resource_manager.CommitFence()} {
+    AcquireNewChunk();
+    AllocateNewContext();
+    worker_thread = std::thread(&VKScheduler::WorkerThread, this);
+}
+
+VKScheduler::~VKScheduler() {
+    quit = true;
+    cv.notify_all();
+    worker_thread.join();
+}

 void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

 void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
    SubmitExecution(semaphore);
    current_fence->Wait();
-    if (release_fence)
+    if (release_fence) {
        current_fence->Release();
+    }
    AllocateNewContext();
 }

+void VKScheduler::WaitWorker() {
+    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+    DispatchWork();
+
+    bool finished = false;
+    do {
+        cv.notify_all();
+        std::unique_lock lock{mutex};
+        finished = chunk_queue.Empty();
+    } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+    if (chunk->Empty()) {
+        return;
+    }
+    chunk_queue.Push(std::move(chunk));
+    cv.notify_all();
+    AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+    if (state.renderpass && renderpass_bi == *state.renderpass) {
+        return;
+    }
+    const bool end_renderpass = state.renderpass.has_value();
+    state.renderpass = renderpass_bi;
+    Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+        if (end_renderpass) {
+            cmdbuf.endRenderPass(dld);
+        }
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+    });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+    EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+    if (state.graphics_pipeline == pipeline) {
+        return;
+    }
+    state.graphics_pipeline = pipeline;
+    Record([pipeline](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+    });
+}
+
+void VKScheduler::WorkerThread() {
+    std::unique_lock lock{mutex};
+    do {
+        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+        if (quit) {
+            continue;
+        }
+        auto extracted_chunk = std::move(chunk_queue.Front());
+        chunk_queue.Pop();
+        extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+        chunk_reserve.Push(std::move(extracted_chunk));
+    } while (!quit);
+}
+
 void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    EndPendingOperations();
+    InvalidateState();
+    WaitWorker();
+
+    std::unique_lock lock{mutex};
+
+    const auto queue = device.GetGraphicsQueue();
    const auto& dld = device.GetDispatchLoader();
    current_cmdbuf.end(dld);

-    const auto queue = device.GetGraphicsQueue();
-    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
                                     &semaphore);
-    queue.submit({submit_info}, *current_fence, dld);
+    queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
 }

 void VKScheduler::AllocateNewContext() {
+    std::unique_lock lock{mutex};
    current_fence = next_fence;
-    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
    next_fence = &resource_manager.CommitFence();

-    const auto& dld = device.GetDispatchLoader();
-    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+                         device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+    state.graphics_pipeline = nullptr;
+    state.viewports = false;
+    state.scissors = false;
+    state.depth_bias = false;
+    state.blend_constants = false;
+    state.depth_bounds = false;
+    state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+    EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+    if (!state.renderpass) {
+        return;
+    }
+    state.renderpass = std::nullopt;
+    Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+    if (chunk_reserve.Empty()) {
+        chunk = std::make_unique<CommandChunk>();
+        return;
+    }
+    chunk = std::move(chunk_reserve.Front());
+    chunk_reserve.Pop();
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@

 #pragma once

+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
 #include "common/common_types.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/declarations.h"

 namespace Vulkan {
@@ -30,23 +37,6 @@ private:
    VKFence* const& fence;
 };

-class VKCommandBufferView {
-public:
-    VKCommandBufferView() = default;
-    VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
-
-    const vk::CommandBuffer* operator->() const noexcept {
-        return &cmdbuf;
-    }
-
-    operator vk::CommandBuffer() const noexcept {
-        return cmdbuf;
-    }
-
-private:
-    const vk::CommandBuffer& cmdbuf;
-};
-
 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class VKScheduler {
@@ -54,32 +44,190 @@ public:
    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
    ~VKScheduler();

-    /// Gets a reference to the current fence.
-    VKFenceView GetFence() const {
-        return current_fence;
-    }
-
-    /// Gets a reference to the current command buffer.
-    VKCommandBufferView GetCommandBuffer() const {
-        return current_cmdbuf;
-    }
-
    /// Sends the current execution context to the GPU.
    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);

+    /// Waits for the worker thread to finish executing everything. After this function returns it's
+    /// safe to touch worker resources.
+    void WaitWorker();
+
+    /// Sends currently recorded work to the worker thread.
+    void DispatchWork();
+
+    /// Requests to begin a renderpass.
+    void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+    /// Requests the current executino context to be able to execute operations only allowed outside
+    /// of a renderpass.
+    void RequestOutsideRenderPassOperationContext();
+
+    /// Binds a pipeline to the current execution context.
+    void BindGraphicsPipeline(vk::Pipeline pipeline);
+
+    /// Returns true when viewports have been set in the current command buffer.
+    bool TouchViewports() {
+        return std::exchange(state.viewports, true);
+    }
+
+    /// Returns true when scissors have been set in the current command buffer.
+    bool TouchScissors() {
+        return std::exchange(state.scissors, true);
+    }
+
+    /// Returns true when depth bias have been set in the current command buffer.
+    bool TouchDepthBias() {
+        return std::exchange(state.depth_bias, true);
+    }
+
+    /// Returns true when blend constants have been set in the current command buffer.
+    bool TouchBlendConstants() {
+        return std::exchange(state.blend_constants, true);
+    }
+
+    /// Returns true when depth bounds have been set in the current command buffer.
+    bool TouchDepthBounds() {
+        return std::exchange(state.depth_bounds, true);
+    }
+
+    /// Returns true when stencil values have been set in the current command buffer.
+    bool TouchStencilValues() {
+        return std::exchange(state.stencil_values, true);
+    }
+
+    /// Send work to a separate thread.
+    template <typename T>
+    void Record(T&& command) {
+        if (chunk->Record(command)) {
+            return;
+        }
+        DispatchWork();
+        (void)chunk->Record(command);
+    }
+
+    /// Gets a reference to the current fence.
+    VKFenceView GetFence() const {
+        return current_fence;
+    }
+
 private:
+    class Command {
+    public:
+        virtual ~Command() = default;
+
+        virtual void Execute(vk::CommandBuffer cmdbuf,
+                             const vk::DispatchLoaderDynamic& dld) const = 0;
+
+        Command* GetNext() const {
+            return next;
+        }
+
+        void SetNext(Command* next_) {
+            next = next_;
+        }
+
+    private:
+        Command* next = nullptr;
+    };
+
+    template <typename T>
+    class TypedCommand final : public Command {
+    public:
+        explicit TypedCommand(T&& command) : command{std::move(command)} {}
+        ~TypedCommand() override = default;
+
+        TypedCommand(TypedCommand&&) = delete;
+        TypedCommand& operator=(TypedCommand&&) = delete;
+
+        void Execute(vk::CommandBuffer cmdbuf,
+                     const vk::DispatchLoaderDynamic& dld) const override {
+            command(cmdbuf, dld);
+        }
+
+    private:
+        T command;
+    };
+
+    class CommandChunk final {
+    public:
+        void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+        template <typename T>
+        bool Record(T& command) {
+            using FuncType = TypedCommand<T>;
+            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+            if (command_offset > sizeof(data) - sizeof(FuncType)) {
+                return false;
+            }
+
+            Command* current_last = last;
+
+            last = new (data.data() + command_offset) FuncType(std::move(command));
+
+            if (current_last) {
+                current_last->SetNext(last);
+            } else {
+                first = last;
+            }
+
+            command_offset += sizeof(FuncType);
+            return true;
+        }
+
+        bool Empty() const {
+            return command_offset == 0;
+        }
+
+    private:
+        Command* first = nullptr;
+        Command* last = nullptr;
+
+        std::size_t command_offset = 0;
+        std::array<u8, 0x8000> data{};
+    };
+
+    void WorkerThread();
+
    void SubmitExecution(vk::Semaphore semaphore);

    void AllocateNewContext();

+    void InvalidateState();
+
+    void EndPendingOperations();
+
+    void EndRenderPass();
+
+    void AcquireNewChunk();
+
    const VKDevice& device;
    VKResourceManager& resource_manager;
    vk::CommandBuffer current_cmdbuf;
    VKFence* current_fence = nullptr;
    VKFence* next_fence = nullptr;
+
+    struct State {
+        std::optional<vk::RenderPassBeginInfo> renderpass;
+        vk::Pipeline graphics_pipeline;
+        bool viewports = false;
+        bool scissors = false;
+        bool depth_bias = false;
+        bool blend_constants = false;
+        bool depth_bounds = false;
+        bool stencil_values = false;
+    } state;
+
+    std::unique_ptr<CommandChunk> chunk;
+    std::thread worker_thread;
+
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool quit = false;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -543,7 +543,7 @@ private:
        }

        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
-            if (!IsRenderTargetUsed(rt)) {
+            if (!specialization.enabled_rendertargets[rt]) {
                continue;
            }

@@ -954,6 +954,10 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            const auto decompiler = operation_decompilers[operation_index];
            if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            const Id true_label = OpLabel();
            const Id skip_label = OpLabel();
@@ -1555,26 +1563,11 @@ private:

    Expression Texture(Operation operation) {
        const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-        UNIMPLEMENTED_IF(!meta.aoffi.empty());

        const bool can_implicit = stage == ShaderType::Fragment;
        const Id sampler = GetTextureSampler(operation);
        const Id coords = GetCoordinates(operation, Type::Float);

-        if (meta.depth_compare) {
-            // Depth sampling
-            UNIMPLEMENTED_IF(meta.bias);
-            const Id dref = AsFloat(Visit(meta.depth_compare));
-            if (can_implicit) {
-                return {OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, {}),
-                        Type::Float};
-            } else {
-                return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref,
-                                                     spv::ImageOperandsMask::Lod, v_float_zero),
-                        Type::Float};
-            }
-        }
-
        std::vector<Id> operands;
        spv::ImageOperandsMask mask{};
        if (meta.bias) {
@@ -1582,13 +1575,36 @@ private:
            operands.push_back(AsFloat(Visit(meta.bias)));
        }

+        if (!can_implicit) {
+            mask = mask | spv::ImageOperandsMask::Lod;
+            operands.push_back(v_float_zero);
+        }
+
+        if (!meta.aoffi.empty()) {
+            mask = mask | spv::ImageOperandsMask::Offset;
+            operands.push_back(GetOffsetCoordinates(operation));
+        }
+
+        if (meta.depth_compare) {
+            // Depth sampling
+            UNIMPLEMENTED_IF(meta.bias);
+            const Id dref = AsFloat(Visit(meta.depth_compare));
+            if (can_implicit) {
+                return {
+                    OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
+            } else {
+                return {
+                    OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
+            }
+        }
+
        Id texture;
        if (can_implicit) {
            texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
        } else {
-            texture = OpImageSampleExplicitLod(t_float4, sampler, coords,
-                                               mask | spv::ImageOperandsMask::Lod, v_float_zero,
-                                               operands);
+            texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
        }
        return GetTextureElement(operation, texture, Type::Float);
    }
@@ -1601,7 +1617,8 @@ private:
        const Id lod = AsFloat(Visit(meta.lod));

        spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
-        std::vector<Id> operands;
+        std::vector<Id> operands{lod};
+
        if (!meta.aoffi.empty()) {
            mask = mask | spv::ImageOperandsMask::Offset;
            operands.push_back(GetOffsetCoordinates(operation));
@@ -1609,11 +1626,10 @@ private:

        if (meta.sampler.IsShadow()) {
            const Id dref = AsFloat(Visit(meta.depth_compare));
-            return {
-                OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, lod, operands),
-                Type::Float};
+            return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
+                    Type::Float};
        }
-        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, lod, operands);
+        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
        return GetTextureElement(operation, texture, Type::Float);
    }

@@ -1722,7 +1738,7 @@ private:
        const std::vector grad = {dx, dy};

        static constexpr auto mask = spv::ImageOperandsMask::Grad;
-        const Id texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, grad);
+        const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
        return GetTextureElement(operation, texture, Type::Float);
    }

@@ -1780,6 +1796,11 @@ private:
        return {};
    }

+    Expression UAtomicAdd(Operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
    Expression Branch(Operation operation) {
        const auto& target = std::get<ImmediateNode>(*operation[0]);
        OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -1833,7 +1854,7 @@ private:
    }

    void PreExit() {
-        if (stage == ShaderType::Vertex) {
+        if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
            const u32 position_index = out_indices.position.value();
            const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
            const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
@@ -1860,12 +1881,18 @@ private:
            // rendertargets/components are skipped in the register assignment.
            u32 current_reg = 0;
            for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+                if (!specialization.enabled_rendertargets[rt]) {
+                    // Skip rendertargets that are not enabled
+                    continue;
+                }
                // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
                for (u32 component = 0; component < 4; ++component) {
+                    const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component);
                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                        OpStore(AccessElement(t_out_float, frag_colors.at(rt), component),
-                                SafeGetRegister(current_reg));
+                        OpStore(pointer, SafeGetRegister(current_reg));
                        ++current_reg;
+                    } else {
+                        OpStore(pointer, component == 3 ? v_float_one : v_float_zero);
                    }
                }
            }
@@ -1971,6 +1998,18 @@ private:
        return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
    }

+    Expression MemoryBarrierGL(Operation) {
+        const auto scope = spv::Scope::Device;
+        const auto semantics =
+            spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
+            spv::MemorySemanticsMask::WorkgroupMemory |
+            spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
+
+        OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
+                        Constant(t_uint, static_cast<u32>(semantics)));
+        return {};
+    }
+
    Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
        const Id id = OpVariable(type, storage);
        Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
@@ -1983,15 +2022,6 @@ private:
        return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
    }

-    bool IsRenderTargetUsed(u32 rt) const {
-        for (u32 component = 0; component < 4; ++component) {
-            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
    template <typename... Args>
    Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
        std::vector<Id> members;
@@ -2348,6 +2378,8 @@ private:
        &SPIRVDecompiler::AtomicImageXor,
        &SPIRVDecompiler::AtomicImageExchange,

+        &SPIRVDecompiler::UAtomicAdd,
+
        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::BranchIndirect,
        &SPIRVDecompiler::PushFlowStack,
@@ -2374,6 +2406,8 @@ private:

        &SPIRVDecompiler::ThreadId,
        &SPIRVDecompiler::ShuffleIndexed,
+
+        &SPIRVDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

@@ -2538,29 +2572,7 @@ public:
    }

    Id operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        Id target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = decomp.v_false;
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = decomp.v_true;
-                break;
-            default:
-                target = decomp.predicates.at(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
-        } else {
-            UNREACHABLE();
-        }
-
-        return decomp.OpLoad(decomp.t_bool, target);
+        return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
    }

    Id operator()(const ExprVar& expr) {
@@ -2575,7 +2587,7 @@ public:
        const Id target = decomp.Constant(decomp.t_uint, expr.value);
        Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
        gpr = decomp.OpBitcast(decomp.t_uint, gpr);
-        return decomp.OpLogicalEqual(decomp.t_uint, gpr, target);
+        return decomp.OpIEqual(decomp.t_bool, gpr, target);
    }

    Id Visit(const Expr& node) {
@@ -2645,11 +2657,11 @@ public:
        const Id loop_label = decomp.OpLabel();
        const Id endloop_label = decomp.OpLabel();
        const Id loop_start_block = decomp.OpLabel();
-        const Id loop_end_block = decomp.OpLabel();
+        const Id loop_continue_block = decomp.OpLabel();
        current_loop_exit = endloop_label;
        decomp.OpBranch(loop_label);
        decomp.AddLabel(loop_label);
-        decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone);
+        decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
        decomp.OpBranch(loop_start_block);
        decomp.AddLabel(loop_start_block);
        ASTNode current = ast.nodes.GetFirst();
@@ -2657,6 +2669,8 @@ public:
            Visit(current);
            current = current->GetNext();
        }
+        decomp.OpBranch(loop_continue_block);
+        decomp.AddLabel(loop_continue_block);
        ExprDecompiler expr_parser{decomp};
        const Id condition = expr_parser.Visit(ast.condition);
        decomp.OpBranchConditional(condition, loop_label, endloop_label);
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -94,6 +94,7 @@ struct Specialization final {
    Maxwell::PrimitiveTopology primitive_topology{};
    std::optional<float> point_size{};
    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
+    bool ndc_minus_one_to_one{};

    // Tessellation specific
    struct {
@@ -101,6 +102,9 @@ struct Specialization final {
        Maxwell::TessellationSpacing spacing{};
        bool clockwise{};
    } tessellation;
+
+    // Fragment specific
+    std::bitset<8> enabled_rendertargets;
 };
 // Old gcc versions don't consider this trivially copyable.
 // static_assert(std::is_trivially_copyable_v<Specialization>);
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+
+namespace Vulkan {
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
+    // Avoid undefined behavior by copying to a staging allocation
+    ASSERT(code_size % sizeof(u32) == 0);
+    const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
+    std::memcpy(data.get(), code_data, code_size);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get());
+    vk::ShaderModule shader_module;
+    if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) {
+        UNREACHABLE_MSG("Shader module failed to build!");
+    }
+
+    return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -0,0 +1,17 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
+
+} // namespace Vulkan
--- a/Show More
+++ b/Show More