delete .appeveyor dir

delete appveyor config
Merge pull request #3222 from ReinUsesLisp/maxwell-to-vk
2019-12-17 00:20:34 -08:00 · 2019-12-15 11:16:39 -08:00 · 2019-12-14 22:30:12 -05:00 · 2019-12-14 16:13:47 -05:00 · 2019-12-14 16:04:31 -05:00 · 2019-12-14 03:04:42 -05:00
29 changed files with 2161 additions and 1133 deletions
--- a/.appveyor/UtilityFunctions.ps1
+++ b/.appveyor/UtilityFunctions.ps1
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
-    if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
-    }
-}
-popd
-
-function Which ($search_path, $name) {
-    ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
-    ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
-        Which $search_path $_.Value
-    }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
-    $final_deps = @()
-    $deps_to_process = GetDeps $search_path $binary
-    while ($deps_to_process.Count -gt 0) {
-        $current, $deps_to_process = $deps_to_process
-        if ($final_deps -contains $current) { continue }
-
-        # Is this a system dll file?
-        # We use the same algorithm that cmake uses to determine this.
-        if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
-        if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
-        if ($current -match "\\msvc[^\\]+dll") { continue }
-        if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
-        $final_deps += $current
-        $new_deps = GetDeps $search_path $current
-        $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
-    }
-    return $final_deps
-}
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
-  - C:\ProgramData\chocolatey\bin -> appveyor.yml
-  - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
-  # Tell msys2 to add mingw64 to the path
-  MSYSTEM: MINGW64
-  # Tell msys2 to inherit the current directory when starting the shell
-  CHERE_INVOKING: 1
-  matrix:
-    - BUILD_TYPE: msvc
-    - BUILD_TYPE: mingw
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - git submodule update --init --recursive
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'mingw') {
-          $dependencies = "mingw64/mingw-w64-x86_64-cmake",
-                          "mingw64/mingw-w64-x86_64-qt5",
-                          "mingw64/mingw-w64-x86_64-SDL2"
-          # redirect err to null to prevent warnings from becoming errors
-          # workaround to prevent pacman from failing due to cyclical dependencies
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
-        }
-
-before_build:
-  - mkdir %BUILD_TYPE%_build
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
-        }
-  - cd ..
-
-build_script:
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # https://www.appveyor.com/docs/build-phase
-          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
-        }
-
-after_build:
-  - ps: |
-        $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
-        $GITREV = $(git show -s --format='%h')
-
-        # Find out which kind of release we are producing by tag name
-        if ($env:APPVEYOR_REPO_TAG_NAME) {
-          $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
-        } else {
-          # There is no repo tag - make assumptions
-          $RELEASE_DIST = "head"
-        }
-
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # Where are these spaces coming from? Regardless, let's remove them
-          $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
-          $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
-          # set the build names as env vars so the artifacts can upload them
-          $env:BUILD_ZIP = $MSVC_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
-          $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
-          $BUILD_DIR = ".\msvc_build\bin\Release"
-
-          # Make a debug symbol upload
-          mkdir pdb
-          Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
-          7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
-          rm "$BUILD_DIR\*.pdb"
-
-          mkdir $RELEASE_DIST
-          # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
-          Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
-          rm "$RELEASE_DIST\*.exe"
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item .\license.txt -Destination $RELEASE_DIST
-          Copy-Item .\README.md -Destination $RELEASE_DIST
-          7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MSVC_SEVENZIP $RELEASE_DIST
-        } else {
-          $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
-          # not going to bother adding separate debug symbols for mingw, so just upload a README for it
-          # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
-          $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
-          Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
-          # store the build information in env vars so we can use them as artifacts
-          $env:BUILD_ZIP = $MINGW_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
-          $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
-          $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
-          $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
-          $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
-          mkdir $RELEASE_DIST
-          mkdir $RELEASE_DIST/platforms
-          mkdir $RELEASE_DIST/styles
-          mkdir $RELEASE_DIST/imageformats
-
-          # copy the compiled binaries and other release files to the release folder
-          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
-          # copy the qt windows plugin dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
-          # copy the qt windows vista style dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
-          # copy the qt jpeg imageformat dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
-          # copy all the dll dependencies to the release folder
-          . "./.appveyor/UtilityFunctions.ps1"
-          $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
-          $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\yuzu_cmd.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\imageformats\qjpeg.dll"
-          Write-Host "Detected the following dependencies:"
-          Write-Host $MingwDLLs
-          foreach ($file in $MingwDLLs) {
-            Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
-          }
-
-          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MINGW_SEVENZIP $RELEASE_DIST
-        }
-
-test_script:
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          ctest -VV -C Release
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
-        }
-  - cd ..
-
-artifacts:
-  - path: $(BUILD_ZIP)
-    name: build
-    type: zip
-
-deploy:
-  provider: GitHub
-  release: $(appveyor_repo_tag_name)
-  auth_token:
-    secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
-  artifact: update,build
-  draft: false
-  prerelease: false
-  on:
-    appveyor_repo_tag: true
--- a/externals/boost
+++ b/externals/boost
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -17,10 +17,10 @@
 #include "core/memory.h"

 namespace Kernel {
-namespace {
+
 // Wake up num_to_wake (or all) threads in a vector.
-void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
-    auto& system = Core::System::GetInstance();
+void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
+                                 s32 num_to_wake) {
    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
    // them all.
    std::size_t last = waiting_threads.size();
@@ -32,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
    for (std::size_t i = 0; i < last; i++) {
        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        RemoveThread(waiting_threads[i]);
        waiting_threads[i]->SetArbiterWaitAddress(0);
        waiting_threads[i]->ResumeFromWait();
        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
    }
 }
-} // Anonymous namespace

 AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
 AddressArbiter::~AddressArbiter() = default;
@@ -184,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
 ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
+    InsertThread(SharedFrom(current_thread));
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();
    current_thread->WakeAfterDelay(timeout);
@@ -192,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    return RESULT_TIMEOUT;
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
+void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
+    RemoveThread(thread);
+    thread->SetArbiterWaitAddress(0);
+}

-    // Retrieve all threads that are waiting for this address.
-    std::vector<std::shared_ptr<Thread>> threads;
-    const auto& scheduler = system.GlobalScheduler();
-    const auto& thread_list = scheduler.GetThreadList();
-
-    for (const auto& thread : thread_list) {
-        if (thread->GetArbiterWaitAddress() == address) {
-            threads.push_back(thread);
+void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread->GetPriority() >= thread->GetPriority()) {
+            thread_list.insert(it, thread);
+            return;
        }
+        ++it;
    }
+    thread_list.push_back(std::move(thread));
+}

-    // Sort them by priority, such that the highest priority ones come first.
-    std::sort(threads.begin(), threads.end(),
-              [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
-                  return lhs->GetPriority() < rhs->GetPriority();
-              });
+void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread.get() == thread.get()) {
+            thread_list.erase(it);
+            return;
+        }
+        ++it;
+    }
+    UNREACHABLE();
+}

-    return threads;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
+    std::vector<std::shared_ptr<Thread>> result;
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        std::shared_ptr<Thread> current_thread = *it;
+        result.push_back(std::move(current_thread));
+        ++it;
+    }
+    return result;
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <list>
 #include <memory>
+#include <unordered_map>
 #include <vector>

 #include "common/common_types.h"
@@ -48,6 +50,9 @@ public:
    /// Waits on an address with a particular arbitration type.
    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);

+    /// Removes a thread from the container and resets its address arbiter adress to 0
+    void HandleWakeupThread(std::shared_ptr<Thread> thread);
+
 private:
    /// Signals an address being waited on.
    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
    // Waits on the given address with a timeout in nanoseconds
    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);

+    /// Wake up num_to_wake (or all) threads in a vector.
+    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
+
+    /// Insert a thread into the address arbiter container
+    void InsertThread(std::shared_ptr<Thread> thread);
+
+    /// Removes a thread from the address arbiter container
+    void RemoveThread(std::shared_ptr<Thread> thread);
+
    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+
+    /// List of threads waiting for a address arbiter
+    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;

    Core::System& system;
 };
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -78,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
        }
    }

-    if (thread->GetArbiterWaitAddress() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
-        thread->SetArbiterWaitAddress(0);
+    if (thread->GetStatus() == ThreadStatus::WaitArb) {
+        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
+        address_arbiter.HandleWakeupThread(thread);
    }

    if (resume) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1650,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 }

 /// Signal process wide key
-static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
-                                       s32 target) {
+static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
              condition_variable_addr, target);

@@ -1726,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            system.PrepareReschedule(thread->GetProcessorID());
        }
    }
-
-    return RESULT_SUCCESS;
 }

 // Wait for an address (via Address Arbiter)
@@ -1781,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

+static void KernelDebug([[maybe_unused]] Core::System& system,
+                        [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
+                        [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
+static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
+                                   [[maybe_unused]] u32 trace_state) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick(Core::System& system) {
    LOG_TRACE(Kernel_SVC, "called");
@@ -2418,8 +2426,8 @@ static const FunctionDef SVC_Table[] = {
    {0x39, nullptr, "Unknown"},
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
-    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "DumpInfoNew"},
+    {0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
+    {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
    FuncReturn(system, retval);
 }

-template <ResultCode func(Core::System&, u64, s32)>
-void SvcWrap(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
-}
-
 template <ResultCode func(Core::System&, u64, u32)>
 void SvcWrap(Core::System& system) {
    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) {
    func(system);
 }

+template <void func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)));
+}
+
+template <void func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
+         Param(system, 3));
+}
+
 template <void func(Core::System&, s64)>
 void SvcWrap(Core::System& system) {
    func(system, static_cast<s64>(Param(system, 0)));
 }

+template <void func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
+}
+
 template <void func(Core::System&, u64, u64)>
 void SvcWrap(Core::System& system) {
    func(system, Param(system, 0), Param(system, 1));
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -310,6 +310,11 @@ public:
            }
        };

+        enum class DepthMode : u32 {
+            MinusOneToOne = 0,
+            ZeroToOne = 1,
+        };
+
        enum class PrimitiveTopology : u32 {
            Points = 0x0,
            Lines = 0x1,
@@ -491,11 +496,6 @@ public:
            INSERT_UNION_PADDING_WORDS(1);
        };

-        enum class DepthMode : u32 {
-            MinusOneToOne = 0,
-            ZeroToOne = 1,
-        };
-
        enum class TessellationPrimitive : u32 {
            Isolines = 0,
            Triangles = 1,
@@ -676,7 +676,7 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_UNION_PADDING_WORDS(1);
+                DepthMode depth_mode;

                float clear_color[4];
                float clear_depth;
@@ -1425,6 +1425,7 @@ ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
 ASSERT_REG_POSITION(viewports, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(depth_mode, 0x35F);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,10 +98,11 @@ union Attribute {
        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
+        BitField<31, 1, u64> patch;
        BitField<47, 3, AttributeSize> size;

        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
+            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
        }
    } fmt20;

@@ -383,6 +384,15 @@ enum class IsberdMode : u64 {

 enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };

+enum class MembarType : u64 {
+    CTA = 0,
+    GL = 1,
+    SYS = 2,
+    VC = 3,
+};
+
+enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
+
 enum class HalfType : u64 {
    H0_H1 = 0,
    F32 = 1,
@@ -1544,6 +1554,11 @@ union Instruction {
        BitField<47, 2, IsberdShift> shift;
    } isberd;

+    union {
+        BitField<8, 2, MembarType> type;
+        BitField<0, 2, MembarUnknown> unknown;
+    } membar;
+
    union {
        BitField<48, 1, u64> signed_a;
        BitField<38, 1, u64> is_byte_chunk_a;
@@ -1668,6 +1683,7 @@ public:
        IPA,
        OUT_R, // Emit vertex/primitive
        ISBERD,
+        MEMBAR,
        VMAD,
        VSETP,
        FFMA_IMM, // Fused Multiply and Add
@@ -1929,7 +1945,7 @@ private:
            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
+            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1968,6 +1984,7 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -5,6 +5,7 @@
 #include <mutex>

 #include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>

 #include "common/assert.h"
 #include "common/common_types.h"
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <array>
 #include <cstddef>
+#include <cstring>
 #include <optional>
 #include <vector>

@@ -134,11 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin

 Device::Device() : base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::vector extensions = GetExtensions();

    const bool is_nvidia = vendor == "NVIDIA Corporation";
    const bool is_amd = vendor == "ATI Technologies Inc.";
    const bool is_intel = vendor == "Intel";
+    const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;

    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -152,7 +155,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_variable_aoffi = TestVariableAoffi();
    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_broken_compute = is_intel;
+    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -277,6 +277,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            continue;
        }

+        // Currently this stages are not supported in the OpenGL backend.
+        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+        if (program == Maxwell::ShaderProgram::TesselationControl) {
+            continue;
+        } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+            continue;
+        }
+
        Shader shader{shader_cache.GetStageProgram(program)};

        // Stage indices are 0 - 5
@@ -1028,6 +1036,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        flip_y = !flip_y;
    }
    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+    state.clip_control.depth_mode =
+        regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+            ? GL_ZERO_TO_ONE
+            : GL_NEGATIVE_ONE_TO_ONE;
 }

 void RasterizerOpenGL::SyncClipEnabled(
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -281,11 +281,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
        if (variant.shared_memory_size > 0) {
            // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
            // avoids out of bound stores. Find out why shared memory size is being invalid.
-            source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
+            source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
        }

        if (variant.local_memory_size > 0) {
-            source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
+            source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
                                  Common::AlignUp(variant.local_memory_size, 4) / 4);
        }
    }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1915,6 +1915,10 @@ private:
        return {};
    }

+    Expression InvocationId(Operation operation) {
+        return {"gl_InvocationID", Type::Int};
+    }
+
    Expression YNegate(Operation operation) {
        return {"y_direction", Type::Float};
    }
@@ -1988,6 +1992,11 @@ private:
        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
    }

+    Expression MemoryBarrierGL(Operation) {
+        code.AddLine("memoryBarrier();");
+        return {};
+    }
+
    struct Func final {
        Func() = delete;
        ~Func() = delete;
@@ -2153,6 +2162,7 @@ private:
        &GLSLDecompiler::EmitVertex,
        &GLSLDecompiler::EndPrimitive,

+        &GLSLDecompiler::InvocationId,
        &GLSLDecompiler::YNegate,
        &GLSLDecompiler::LocalInvocationId<0>,
        &GLSLDecompiler::LocalInvocationId<1>,
@@ -2168,6 +2178,8 @@ private:

        &GLSLDecompiler::ThreadId,
        &GLSLDecompiler::ShuffleIndexed,
+
+        &GLSLDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));

--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -411,8 +411,9 @@ void OpenGLState::ApplyAlphaTest() {
 }

 void OpenGLState::ApplyClipControl() {
-    if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
-        glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+    if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
+                  std::tie(clip_control.origin, clip_control.depth_mode))) {
+        glClipControl(clip_control.origin, clip_control.depth_mode);
    }
 }

--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -150,6 +150,7 @@ public:

    struct {
        GLenum origin = GL_LOWER_LEFT;
+        GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
    } clip_control;

    OpenGLState();
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,8 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
    return {};
 }

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter) {
    switch (wrap_mode) {
    case Tegra::Texture::WrapMode::Wrap:
        return vk::SamplerAddressMode::eRepeat;
@@ -55,10 +56,15 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    case Tegra::Texture::WrapMode::Border:
        return vk::SamplerAddressMode::eClampToBorder;
    case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
-        // eClampToBorder to get the border color of the texture, and then sample the edge to
-        // manually mix them. However the shader part of this is not yet implemented.
-        return vk::SamplerAddressMode::eClampToBorder;
+        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        switch (filter) {
+        case Tegra::Texture::TextureFilter::Nearest:
+            return vk::SamplerAddressMode::eClampToEdge;
+        case Tegra::Texture::TextureFilter::Linear:
+            return vk::SamplerAddressMode::eClampToBorder;
+        }
+        UNREACHABLE();
+        return vk::SamplerAddressMode::eClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
        return vk::SamplerAddressMode::eMirrorClampToEdge;
    case Tegra::Texture::WrapMode::MirrorOnceBorder:
@@ -96,106 +102,140 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar

 } // namespace Sampler

+namespace {
+
+enum : u32 { Attachable = 1, Storage = 2 };
+
 struct FormatTuple {
    vk::Format format; ///< Vulkan format
-    bool attachable;   ///< True when this format can be used as an attachment
-};
-
-static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {vk::Format::eA8B8G8R8UnormPack32, true},    // ABGR8U
-    {vk::Format::eUndefined, false},             // ABGR8S
-    {vk::Format::eUndefined, false},             // ABGR8UI
-    {vk::Format::eB5G6R5UnormPack16, false},     // B5G6R5U
-    {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
-    {vk::Format::eUndefined, false},             // A1B5G5R5U
-    {vk::Format::eR8Unorm, true},                // R8U
-    {vk::Format::eUndefined, false},             // R8UI
-    {vk::Format::eUndefined, false},             // RGBA16F
-    {vk::Format::eUndefined, false},             // RGBA16U
-    {vk::Format::eUndefined, false},             // RGBA16UI
-    {vk::Format::eUndefined, false},             // R11FG11FB10F
-    {vk::Format::eUndefined, false},             // RGBA32UI
-    {vk::Format::eBc1RgbaUnormBlock, false},     // DXT1
-    {vk::Format::eBc2UnormBlock, false},         // DXT23
-    {vk::Format::eBc3UnormBlock, false},         // DXT45
-    {vk::Format::eBc4UnormBlock, false},         // DXN1
-    {vk::Format::eUndefined, false},             // DXN2UNORM
-    {vk::Format::eUndefined, false},             // DXN2SNORM
-    {vk::Format::eUndefined, false},             // BC7U
-    {vk::Format::eUndefined, false},             // BC6H_UF16
-    {vk::Format::eUndefined, false},             // BC6H_SF16
-    {vk::Format::eUndefined, false},             // ASTC_2D_4X4
-    {vk::Format::eUndefined, false},             // BGRA8
-    {vk::Format::eUndefined, false},             // RGBA32F
-    {vk::Format::eUndefined, false},             // RG32F
-    {vk::Format::eUndefined, false},             // R32F
-    {vk::Format::eUndefined, false},             // R16F
-    {vk::Format::eUndefined, false},             // R16U
-    {vk::Format::eUndefined, false},             // R16S
-    {vk::Format::eUndefined, false},             // R16UI
-    {vk::Format::eUndefined, false},             // R16I
-    {vk::Format::eUndefined, false},             // RG16
-    {vk::Format::eUndefined, false},             // RG16F
-    {vk::Format::eUndefined, false},             // RG16UI
-    {vk::Format::eUndefined, false},             // RG16I
-    {vk::Format::eUndefined, false},             // RG16S
-    {vk::Format::eUndefined, false},             // RGB32F
-    {vk::Format::eA8B8G8R8SrgbPack32, true},     // RGBA8_SRGB
-    {vk::Format::eUndefined, false},             // RG8U
-    {vk::Format::eUndefined, false},             // RG8S
-    {vk::Format::eUndefined, false},             // RG32UI
-    {vk::Format::eUndefined, false},             // RGBX16F
-    {vk::Format::eUndefined, false},             // R32UI
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X8
-    {vk::Format::eUndefined, false},             // ASTC_2D_8X5
-    {vk::Format::eUndefined, false},             // ASTC_2D_5X4
-
-    // Compressed sRGB formats
-    {vk::Format::eUndefined, false}, // BGRA8_SRGB
-    {vk::Format::eUndefined, false}, // DXT1_SRGB
-    {vk::Format::eUndefined, false}, // DXT23_SRGB
-    {vk::Format::eUndefined, false}, // DXT45_SRGB
-    {vk::Format::eUndefined, false}, // BC7U_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5
-    {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8
-    {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
+    int usage;         ///< Describes image format usage
+} constexpr tex_format_tuples[] = {
+    {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage},    // ABGR8U
+    {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage},    // ABGR8S
+    {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage},     // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, {}},                        // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
+    {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage},    // A1B5G5R5U (flipped with swizzle)
+    {vk::Format::eR8Unorm, Attachable | Storage},                // R8U
+    {vk::Format::eR8Uint, Attachable | Storage},                 // R8UI
+    {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage},     // RGBA16F
+    {vk::Format::eR16G16B16A16Unorm, Attachable | Storage},      // RGBA16U
+    {vk::Format::eR16G16B16A16Uint, Attachable | Storage},       // RGBA16UI
+    {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage},  // R11FG11FB10F
+    {vk::Format::eR32G32B32A32Uint, Attachable | Storage},       // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, {}},                        // DXT1
+    {vk::Format::eBc2UnormBlock, {}},                            // DXT23
+    {vk::Format::eBc3UnormBlock, {}},                            // DXT45
+    {vk::Format::eBc4UnormBlock, {}},                            // DXN1
+    {vk::Format::eBc5UnormBlock, {}},                            // DXN2UNORM
+    {vk::Format::eBc5SnormBlock, {}},                            // DXN2SNORM
+    {vk::Format::eBc7UnormBlock, {}},                            // BC7U
+    {vk::Format::eBc6HUfloatBlock, {}},                          // BC6H_UF16
+    {vk::Format::eBc6HSfloatBlock, {}},                          // BC6H_SF16
+    {vk::Format::eAstc4x4UnormBlock, {}},                        // ASTC_2D_4X4
+    {vk::Format::eB8G8R8A8Unorm, {}},                            // BGRA8
+    {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage},     // RGBA32F
+    {vk::Format::eR32G32Sfloat, Attachable | Storage},           // RG32F
+    {vk::Format::eR32Sfloat, Attachable | Storage},              // R32F
+    {vk::Format::eR16Sfloat, Attachable | Storage},              // R16F
+    {vk::Format::eR16Unorm, Attachable | Storage},               // R16U
+    {vk::Format::eUndefined, {}},                                // R16S
+    {vk::Format::eUndefined, {}},                                // R16UI
+    {vk::Format::eUndefined, {}},                                // R16I
+    {vk::Format::eR16G16Unorm, Attachable | Storage},            // RG16
+    {vk::Format::eR16G16Sfloat, Attachable | Storage},           // RG16F
+    {vk::Format::eUndefined, {}},                                // RG16UI
+    {vk::Format::eUndefined, {}},                                // RG16I
+    {vk::Format::eR16G16Snorm, Attachable | Storage},            // RG16S
+    {vk::Format::eUndefined, {}},                                // RGB32F
+    {vk::Format::eR8G8B8A8Srgb, Attachable},                     // RGBA8_SRGB
+    {vk::Format::eR8G8Unorm, Attachable | Storage},              // RG8U
+    {vk::Format::eR8G8Snorm, Attachable | Storage},              // RG8S
+    {vk::Format::eR32G32Uint, Attachable | Storage},             // RG32UI
+    {vk::Format::eUndefined, {}},                                // RGBX16F
+    {vk::Format::eR32Uint, Attachable | Storage},                // R32UI
+    {vk::Format::eAstc8x8UnormBlock, {}},                        // ASTC_2D_8X8
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_8X5
+    {vk::Format::eUndefined, {}},                                // ASTC_2D_5X4
+    {vk::Format::eUndefined, {}},                                // BGRA8_SRGB
+    {vk::Format::eBc1RgbaSrgbBlock, {}},                         // DXT1_SRGB
+    {vk::Format::eUndefined, {}},                                // DXT23_SRGB
+    {vk::Format::eBc3SrgbBlock, {}},                             // DXT45_SRGB
+    {vk::Format::eBc7SrgbBlock, {}},                             // BC7U_SRGB
+    {vk::Format::eR4G4B4A4UnormPack16, Attachable},              // R4G4B4A4U
+    {vk::Format::eAstc4x4SrgbBlock, {}},                         // ASTC_2D_4X4_SRGB
+    {vk::Format::eAstc8x8SrgbBlock, {}},                         // ASTC_2D_8X8_SRGB
+    {vk::Format::eAstc8x5SrgbBlock, {}},                         // ASTC_2D_8X5_SRGB
+    {vk::Format::eAstc5x4SrgbBlock, {}},                         // ASTC_2D_5X4_SRGB
+    {vk::Format::eAstc5x5UnormBlock, {}},                        // ASTC_2D_5X5
+    {vk::Format::eAstc5x5SrgbBlock, {}},                         // ASTC_2D_5X5_SRGB
+    {vk::Format::eAstc10x8UnormBlock, {}},                       // ASTC_2D_10X8
+    {vk::Format::eAstc10x8SrgbBlock, {}},                        // ASTC_2D_10X8_SRGB
+    {vk::Format::eAstc6x6UnormBlock, {}},                        // ASTC_2D_6X6
+    {vk::Format::eAstc6x6SrgbBlock, {}},                         // ASTC_2D_6X6_SRGB
+    {vk::Format::eAstc10x10UnormBlock, {}},                      // ASTC_2D_10X10
+    {vk::Format::eAstc10x10SrgbBlock, {}},                       // ASTC_2D_10X10_SRGB
+    {vk::Format::eAstc12x12UnormBlock, {}},                      // ASTC_2D_12X12
+    {vk::Format::eAstc12x12SrgbBlock, {}},                       // ASTC_2D_12X12_SRGB
+    {vk::Format::eAstc8x6UnormBlock, {}},                        // ASTC_2D_8X6
+    {vk::Format::eAstc8x6SrgbBlock, {}},                         // ASTC_2D_8X6_SRGB
+    {vk::Format::eAstc6x5UnormBlock, {}},                        // ASTC_2D_6X5
+    {vk::Format::eAstc6x5SrgbBlock, {}},                         // ASTC_2D_6X5_SRGB
+    {vk::Format::eE5B9G9R9UfloatPack32, {}},                     // E5B9G9R9F

    // Depth formats
-    {vk::Format::eD32Sfloat, true}, // Z32F
-    {vk::Format::eD16Unorm, true},  // Z16
+    {vk::Format::eD32Sfloat, Attachable}, // Z32F
+    {vk::Format::eD16Unorm, Attachable},  // Z16

    // DepthStencil formats
-    {vk::Format::eD24UnormS8Uint, true}, // Z24S8
-    {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
-    {vk::Format::eUndefined, false},     // Z32FS8
-}};
+    {vk::Format::eD24UnormS8Uint, Attachable},  // Z24S8
+    {vk::Format::eD24UnormS8Uint, Attachable},  // S8Z24 (emulated)
+    {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8
+};
+static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);

-static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+constexpr bool IsZetaFormat(PixelFormat pixel_format) {
    return pixel_format >= PixelFormat::MaxColorFormat &&
           pixel_format < PixelFormat::MaxDepthStencilFormat;
 }

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format) {
-    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+} // Anonymous namespace

-    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
-    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
-                         "Unimplemented texture format with pixel format={}",
-                         static_cast<u32>(pixel_format));
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));

-    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
-                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
-    if (tuple.attachable) {
-        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
-                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
+    if (tuple.format == vk::Format::eUndefined) {
+        UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
+                          static_cast<u32>(pixel_format));
+        return {vk::Format::eA8B8G8R8UnormPack32, true, true};
    }
-    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+
+    // Use ABGR8 on hardware that doesn't support ASTC natively
+    if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
+        tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
+                           ? vk::Format::eA8B8G8R8SrgbPack32
+                           : vk::Format::eA8B8G8R8UnormPack32;
+    }
+    const bool attachable = tuple.usage & Attachable;
+    const bool storage = tuple.usage & Storage;
+
+    vk::FormatFeatureFlags usage;
+    if (format_type == FormatType::Buffer) {
+        usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer |
+                vk::FormatFeatureFlagBits::eUniformTexelBuffer;
+    } else {
+        usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst |
+                vk::FormatFeatureFlagBits::eTransferSrc;
+        if (attachable) {
+            usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                                : vk::FormatFeatureFlagBits::eColorAttachment;
+        }
+        if (storage) {
+            usage |= vk::FormatFeatureFlagBits::eStorageImage;
+        }
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
 }

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
@@ -215,7 +255,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
    return {};
 }

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology) {
    switch (topology) {
    case Maxwell::PrimitiveTopology::Points:
        return vk::PrimitiveTopology::ePointList;
@@ -227,6 +268,13 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return vk::PrimitiveTopology::eTriangleList;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return vk::PrimitiveTopology::eTriangleStrip;
+    case Maxwell::PrimitiveTopology::TriangleFan:
+        return vk::PrimitiveTopology::eTriangleFan;
+    case Maxwell::PrimitiveTopology::Quads:
+        // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::Patches:
+        return vk::PrimitiveTopology::ePatchList;
    default:
        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
        return {};
@@ -236,37 +284,111 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
    switch (type) {
    case Maxwell::VertexAttribute::Type::SignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Snorm;
+        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+            return vk::Format::eA2B10G10R10SnormPack32;
+        default:
+            break;
+        }
        break;
    case Maxwell::VertexAttribute::Type::UnsignedNorm:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Unorm;
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return vk::Format::eR8G8B8A8Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Unorm;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Unorm;
        default:
            break;
        }
        break;
    case Maxwell::VertexAttribute::Type::SignedInt:
-        break;
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sint;
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Sint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Sint;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sint;
+        default:
+            break;
+        }
    case Maxwell::VertexAttribute::Type::UnsignedInt:
        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8:
+            return vk::Format::eR8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8:
+            return vk::Format::eR8G8B8Uint;
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Uint;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Uint;
        default:
            break;
        }
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return vk::Format::eR8G8Uscaled;
+        default:
+            break;
+        }
+        break;
    case Maxwell::VertexAttribute::Type::SignedScaled:
        break;
    case Maxwell::VertexAttribute::Type::Float:
        switch (size) {
-        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
-            return vk::Format::eR32G32B32A32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32_32:
-            return vk::Format::eR32G32B32Sfloat;
-        case Maxwell::VertexAttribute::Size::Size_32_32:
-            return vk::Format::eR32G32Sfloat;
        case Maxwell::VertexAttribute::Size::Size_32:
            return vk::Format::eR32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return vk::Format::eR16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return vk::Format::eR16G16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return vk::Format::eR16G16B16Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return vk::Format::eR16G16B16A16Sfloat;
        default:
            break;
        }
@@ -308,11 +430,14 @@ vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
    return {};
 }

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
    switch (index_format) {
    case Maxwell::IndexFormat::UnsignedByte:
-        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
-        return vk::IndexType::eUint16;
+        if (!device.IsExtIndexTypeUint8Supported()) {
+            UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
+            return vk::IndexType::eUint16;
+        }
+        return vk::IndexType::eUint8EXT;
    case Maxwell::IndexFormat::UnsignedShort:
        return vk::IndexType::eUint16;
    case Maxwell::IndexFormat::UnsignedInt:
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <utility>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/declarations.h"
@@ -23,24 +22,31 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);

 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode);
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+                                Tegra::Texture::TextureFilter filter);

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);

 } // namespace Sampler

-std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format);
+struct FormatInfo {
+    vk::Format format;
+    bool attachable;
+    bool storage;
+};
+
+FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);

 vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);

-vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
+vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device,
+                                        Maxwell::PrimitiveTopology topology);

 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);

 vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison);

-vk::IndexType IndexFormat(Maxwell::IndexFormat index_format);
+vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);

 vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op);

--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,10 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
        MaxwellToVK::Sampler::Filter(tsc.min_filter),
        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u), MaxwellToVK::Sampler::WrapMode(tsc.wrap_v),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p), tsc.GetLodBias(), has_anisotropy,
-        max_anisotropy, tsc.depth_compare_enabled,
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
        unnormalized_coords);
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -5,29 +5,28 @@
 #pragma once

 #include <array>
+#include <bitset>
 #include <memory>
 #include <set>
+#include <type_traits>
 #include <utility>
 #include <vector>

-#include <sirit/sirit.h>
-
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
 #include "video_core/shader/shader_ir.h"

-namespace VideoCommon::Shader {
-class ShaderIR;
-}
-
 namespace Vulkan {
 class VKDevice;
 }

-namespace Vulkan::VKShader {
+namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using TexelBufferEntry = VideoCommon::Shader::Sampler;
 using SamplerEntry = VideoCommon::Shader::Sampler;
+using ImageEntry = VideoCommon::Shader::Image;

 constexpr u32 DESCRIPTOR_SET = 0;

@@ -46,39 +45,74 @@ private:

 class GlobalBufferEntry {
 public:
-    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+    constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}

-    u32 GetCbufIndex() const {
+    constexpr u32 GetCbufIndex() const {
        return cbuf_index;
    }

-    u32 GetCbufOffset() const {
+    constexpr u32 GetCbufOffset() const {
        return cbuf_offset;
    }

+    constexpr bool IsWritten() const {
+        return is_written;
+    }
+
 private:
    u32 cbuf_index{};
    u32 cbuf_offset{};
+    bool is_written{};
 };

 struct ShaderEntries {
-    u32 const_buffers_base_binding{};
-    u32 global_buffers_base_binding{};
-    u32 samplers_base_binding{};
+    u32 NumBindings() const {
+        return static_cast<u32>(const_buffers.size() + global_buffers.size() +
+                                texel_buffers.size() + samplers.size() + images.size());
+    }
+
    std::vector<ConstBufferEntry> const_buffers;
    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<TexelBufferEntry> texel_buffers;
    std::vector<SamplerEntry> samplers;
+    std::vector<ImageEntry> images;
    std::set<u32> attributes;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
    std::size_t shader_length{};
-    Sirit::Id entry_function{};
-    std::vector<Sirit::Id> interfaces;
+    bool uses_warps{};
 };

-using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+struct Specialization final {
+    u32 base_binding{};

-DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
-                           Tegra::Engines::ShaderType stage);
+    // Compute specific
+    std::array<u32, 3> workgroup_size{};
+    u32 shared_memory_size{};

-} // namespace Vulkan::VKShader
+    // Graphics specific
+    Maxwell::PrimitiveTopology primitive_topology{};
+    std::optional<float> point_size{};
+    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
+
+    // Tessellation specific
+    struct {
+        Maxwell::TessellationPrimitive primitive{};
+        Maxwell::TessellationSpacing spacing{};
+        bool clockwise{};
+    } tessellation;
+};
+// Old gcc versions don't consider this trivially copyable.
+// static_assert(std::is_trivially_copyable_v<Specialization>);
+
+struct SPIRVShader {
+    std::vector<u32> code;
+    ShaderEntries entries;
+};
+
+ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
+
+std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Tegra::Engines::ShaderType stage, const Specialization& specialization);
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

 namespace {
+
 u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
@@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
        return 1;
    }
 }
+
 } // Anonymous namespace

 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        u64 next_element = instr.attribute.fmt20.element;
-        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+        u64 element = instr.attribute.fmt20.element;
+        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());

-        const auto StoreNextElement = [&](u32 reg_offset) {
-            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
-                                                 next_element, GetRegister(instr.gpr39));
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            Node dest;
+            if (instr.attribute.fmt20.patch) {
+                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+                dest = MakeNode<PatchNode>(offset);
+            } else {
+                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+                                          GetRegister(instr.gpr39));
+            }
            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);

            bb.push_back(Operation(OperationCode::Assign, dest, src));

-            // Load the next attribute element into the following register. If the element
-            // to load goes beyond the vec4 size, load the first element of the next
-            // attribute.
-            next_element = (next_element + 1) % 4;
-            next_index = next_index + (next_element == 0 ? 1 : 0);
-        };
-
-        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-            StoreNextElement(reg_offset);
+            // Load the next attribute element into the following register. If the element to load
+            // goes beyond the vec4 size, load the first element of the next attribute.
+            element = (element + 1) % 4;
+            index = index + (element == 0 ? 1 : 0);
        }
-
        break;
    }
    case OpCode::Id::ST_L:
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    case OpCode::Id::MOV_SYS: {
        const Node value = [this, instr] {
            switch (instr.sys20) {
+            case SystemVariable::InvocationId:
+                return Operation(OperationCode::InvocationId);
            case SystemVariable::Ydirection:
                return Operation(OperationCode::YNegate);
            case SystemVariable::InvocationInfo:
@@ -255,6 +257,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
        break;
    }
+    case OpCode::Id::MEMBAR: {
+        UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
+        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
+        bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+        break;
+    }
    case OpCode::Id::DEPBAR: {
        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
        break;
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

+    // Signal the backend that this shader uses warp instructions.
+    uses_warps = true;
+
    switch (opcode->get().GetId()) {
    case OpCode::Id::VOTE: {
        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -172,6 +172,7 @@ enum class OperationCode {
    EmitVertex,   /// () -> void
    EndPrimitive, /// () -> void

+    InvocationId,       /// () -> int
    YNegate,            /// () -> float
    LocalInvocationIdX, /// () -> uint
    LocalInvocationIdY, /// () -> uint
@@ -188,6 +189,8 @@ enum class OperationCode {
    ThreadId,       /// () -> uint
    ShuffleIndexed, /// (uint value, uint index) -> uint

+    MemoryBarrierGL, /// () -> void
+
    Amount,
 };

@@ -213,13 +216,14 @@ class PredicateNode;
 class AbufNode;
 class CbufNode;
 class LmemNode;
+class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;

-using NodeData =
-    std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
-                 PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
+                              LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
@@ -542,6 +546,19 @@ private:
    u32 element{};
 };

+/// Patch memory (used to communicate tessellation stages).
+class PatchNode final {
+public:
+    explicit PatchNode(u32 offset) : offset{offset} {}
+
+    u32 GetOffset() const {
+        return offset;
+    }
+
+private:
+    u32 offset{};
+};
+
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
 class CbufNode final {
 public:
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -137,6 +137,10 @@ public:
        return uses_vertex_id;
    }

+    bool UsesWarps() const {
+        return uses_warps;
+    }
+
    bool HasPhysicalAttributes() const {
        return uses_physical_attributes;
    }
@@ -415,6 +419,7 @@ private:
    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
    bool uses_instance_id{};
    bool uses_vertex_id{};
+    bool uses_warps{};

    Tegra::Shader::Header header;
 };
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -7,6 +7,7 @@
 #include <variant>

 #include "common/common_types.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"

 namespace VideoCommon::Shader {
Author	SHA1	Message	Date
Amila Welihinda	8a23c32cf0	delete .appeveyor dir	2019-12-17 00:20:34 -08:00
Amila Welihinda	0471eb6dc7	delete appveyor config	2019-12-15 11:16:39 -08:00
bunnei	3d51153611	Merge pull request #3222 from ReinUsesLisp/maxwell-to-vk maxwell_to_vk: Use VK_EXT_index_type_uint8 and misc changes	2019-12-14 22:30:12 -05:00
bunnei	ccda77c8c4	Merge pull request #3224 from bunnei/boost-ext-update externals: Update boost-ext to include safe_numerics.	2019-12-14 16:13:47 -05:00
bunnei	035ec7d9de	Merge pull request #3213 from ReinUsesLisp/intel-mesa gl_device: Enable compute shaders for Intel Mesa drivers	2019-12-14 16:04:31 -05:00
bunnei	285705b5f4	externals: Update boost-ext to include safe_numerics. - This is useful to me for an upcoming change.	2019-12-14 03:04:42 -05:00
bunnei	2b650543c6	Merge pull request #3212 from ReinUsesLisp/fix-smem-lmem gl_shader_cache: Add missing new-line on emitted GLSL	2019-12-13 21:35:29 -05:00
ReinUsesLisp	e3ea583893	maxwell_to_vk: Improve image format table and add more formats A1B5G5R5 uses A1R5G5B5. This is flipped with image view swizzles; flushing is still not properly implemented on Vulkan for this particular format.	2019-12-13 03:12:29 -03:00
ReinUsesLisp	f27b21077d	maxwell_to_vk: Implement more vertex formats	2019-12-13 03:12:28 -03:00
ReinUsesLisp	8db8631d81	maxwell_to_vk: Implement more primitive topologies Add an extra argument to query device capabilities in the future. The intention behind this is to use native quads, quad strips, line loops and polygons if these are released for Vulkan.	2019-12-13 03:12:28 -03:00
ReinUsesLisp	15513f0801	maxwell_to_vk: Approach GL_CLAMP closer to the GL spec The OpenGL spec defines GL_CLAMP's formula similarly to CLAMP_TO_EDGE and CLAMP_TO_BORDER depending on the filter mode used. It doesn't exactly behave like this, but it's the closest we can get with what Vulkan offers without emulating it by injecting shader code.	2019-12-13 03:12:28 -03:00
ReinUsesLisp	f845df8651	maxwell_to_vk: Use VK_EXT_index_type_uint8 when available	2019-12-13 02:37:23 -03:00
bunnei	6d0d79109b	Merge pull request #3214 from lioncash/svc-func kernel/svc: Amend function signature of SignalProcessWideKey	2019-12-12 21:32:36 -05:00
bunnei	8fc49a83b6	Merge pull request #3217 from jhol/fix-boost-include Added missing include	2019-12-11 22:21:24 -05:00
Fernando Sahmkow	900b2e5cae	Merge pull request #3218 from FernandoS27/tess-gl Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are unimplemented	2019-12-11 17:50:09 -04:00
Fernando Sahmkow	1d2ba3cc97	Gl_Rasterizer: Skip Tesselation Control and Eval stages as they are un implemented. This commit ensures the OGL backend does not execute tesselation shader stages as they are currently unimplemented.	2019-12-11 15:41:26 -04:00
bunnei	1a66cde175	Merge pull request #3210 from ReinUsesLisp/memory-barrier shader: Implement MEMBAR.GL	2019-12-11 14:24:39 -05:00
Joel Holdsworth	e9faa1617c	Added missing include	2019-12-11 18:11:49 +00:00
Fernando Sahmkow	22c6b9fab2	Kernel: Correct behavior of Address Arbiter threads. (#3165 ) * Kernel: Correct behavior of Address Arbiter threads. This corrects arbitration threads to behave just like in Horizon OS. They are added into a container and released according to what priority they had when added. Horizon OS does not reorder them if their priority changes. * Kernel: Address Feedback.	2019-12-11 10:55:38 -05:00
Lioncash	30e365e4fc	kernel/svc: Correct function signature of SignalProcessWideKey This function doesn't actually return a result code, so we can amend the signature of it to match.	2019-12-11 07:13:27 -05:00
ReinUsesLisp	f564eaebed	gl_device: Enable compute shaders for Intel Mesa drivers Previously we naively checked for "Intel" in GL_VENDOR, but this includes both Intel's proprietary driver and the mesa driver. Re-enable compute shaders for mesa.	2019-12-11 00:00:30 -03:00
ReinUsesLisp	48e16c4c49	gl_shader_cache: Add missing new-line on emitted GLSL Add missing new-line. This caused shaders using local memory and shared memory to inject a preprocessor GLSL line after an expression (resulting in invalid code). It looked like this: shared uint smem[8];#define LOCAL_MEMORY_SIZE 16 It should look like this (addressed by this commit): shared uint smem[8]; \#define LOCAL_MEMORY_SIZE 16	2019-12-10 23:52:51 -03:00
bunnei	34f8881d3e	Merge pull request #3201 from lioncash/dump kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew	2019-12-10 21:48:37 -05:00
Rodrigo Locatti	c8db7d1399	Merge pull request #3211 from FernandoS27/depth-mode Maxwell3D: Implement Depth Mode.	2019-12-10 21:20:52 -03:00
Fernando Sahmkow	7ffb672f61	Maxwell3D: Implement Depth Mode. This commit finishes adding depth mode that was reverted before due to other unresolved issues.	2019-12-10 19:51:46 -04:00
ReinUsesLisp	425a254fa2	shader: Implement MEMBAR.GL Implement using memoryBarrier in GLSL and OpMemoryBarrier on SPIR-V.	2019-12-10 16:45:03 -03:00
Fernando Sahmkow	6edadef96d	Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler vk_shader_decompiler: Add tessellation and misc changes	2019-12-10 08:01:41 -04:00
ReinUsesLisp	233ed96a5c	vk_shader_decompiler: Fix build issues on old gcc versions	2019-12-10 01:55:38 -03:00
ReinUsesLisp	d30cf51d7d	vk_shader_decompiler: Reduce YNegate's severity	2019-12-09 23:52:28 -03:00
ReinUsesLisp	0b5b93053d	shader_ir/other: Implement S2R InvocationId	2019-12-09 23:52:28 -03:00
ReinUsesLisp	ecbfa416f0	vk_shader_decompiler: Misc changes Update Sirit and its usage in vk_shader_decompiler. Highlights: - Implement tessellation shaders - Implement geometry shaders - Implement some missing features - Use native half float instructions when available.	2019-12-09 23:51:57 -03:00
ReinUsesLisp	9ad6327fbd	shader: Keep track of shaders using warp instructions	2019-12-09 23:40:41 -03:00
ReinUsesLisp	6233b1db08	shader_ir/memory: Implement patch stores	2019-12-09 23:25:21 -03:00
Fernando Sahmkow	f2458106e6	Merge pull request #3205 from ReinUsesLisp/vk-device vk_device: Misc changes	2019-12-09 20:02:58 -04:00
Lioncash	67b8265bd6	kernel/svc: Provide implementations for svcDumpInfo/svcDumpInfoNew These are fairly trivial to implement, we can just do nothing. This also provides a spot for us to potentially dump out any relevant info in the future (e.g. for debugging purposes with homebrew, etc). While we're at it, we can also correct the names of both of these supervisor calls.	2019-12-07 22:01:17 -05:00