gpu_thread: Handle cache management before DMA commands.

configure_graphics: Disallow changing use_asynchronous_gpu_emulation while running.
gpu: Move flush and invalidate to GPU thread.
2019-01-12 02:41:22 -05:00 · 2019-01-12 01:36:47 -05:00 · 2019-01-12 01:36:47 -05:00 · 2019-01-12 01:36:46 -05:00 · 2019-01-12 01:36:46 -05:00 · 2019-01-12 01:36:46 -05:00
157 changed files with 5348 additions and 8145 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,27 +1,16 @@
 <!--
 Please keep in mind yuzu is EXPERIMENTAL SOFTWARE.

-Please read the FAQ:
-https://yuzu-emu.org/wiki/faq/
+Please read the FAQ: https://yuzu-emu.org/wiki/faq/

-THIS IS NOT A SUPPORT FORUM, FOR SUPPORT GO TO:
-https://community.citra-emu.org/
+When submitting an issue, please do the following:

-If the FAQ does not answer your question, please go to:
-https://community.citra-emu.org/
-
-When submitting an issue, please check the following:
-
- You have read the above.
- You have provided the version (commit hash) of yuzu you are using.
- You have provided sufficient detail for the issue to be reproduced.
- You have provided system specs (if relevant).
- Please also provide:
-  - For any issues, a log file
+- Provide the version (commit hash) of yuzu you are using.
+- Provide sufficient detail for the issue to be reproduced.
+- Provide:
  - For crashes, a backtrace.
  - For graphical issues, comparison screenshots with real hardware.
  - For emulation inaccuracies, a test-case (if able).
-
 -->


--- a/.travis/common/travis-ci.env
+++ b/.travis/common/travis-ci.env
@@ -6,8 +6,6 @@ TRAVIS_BRANCH
 TRAVIS_BUILD_ID
 TRAVIS_BUILD_NUMBER
 TRAVIS_COMMIT
-TRAVIS_COMMIT_RANGE
-TRAVIS_EVENT_TYPE
 TRAVIS_JOB_ID
 TRAVIS_JOB_NUMBER
 TRAVIS_REPO_SLUG
--- a/CMakeModules/CopyYuzuQt5Deps.cmake
+++ b/CMakeModules/CopyYuzuQt5Deps.cmake
@@ -45,8 +45,5 @@ function(copy_yuzu_Qt5_deps target_dir)

    windows_copy_files(yuzu ${Qt5_PLATFORMS_DIR} ${PLATFORMS} qwindows$<$<CONFIG:Debug>:d>.*)
    windows_copy_files(yuzu ${Qt5_STYLES_DIR} ${STYLES} qwindowsvistastyle$<$<CONFIG:Debug>:d>.*)
-    windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS}
-        qjpeg$<$<CONFIG:Debug>:d>.*
-        qgif$<$<CONFIG:Debug>:d>.*
-        )
+    windows_copy_files(yuzu ${Qt5_IMAGEFORMATS_DIR} ${IMAGEFORMATS} qjpeg$<$<CONFIG:Debug>:d>.*)
 endfunction(copy_yuzu_Qt5_deps)
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) {
    }

    // Implementation of a volume slider with a dynamic range of 60 dB
-    const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f;
+    const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f};
    for (auto& sample : samples) {
        sample = static_cast<s16>(sample * volume_scale_factor);
    }
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -53,8 +53,8 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);

-    // Place a lower limit of 5% speed. When a game boots up, there will be
-    // many silence samples. These do not need to be timestretched.
+    // Place a lower limit of 5% speed.  When a game boots up, there will be
+    // many silence samples.  These do not need to be timestretched.
    m_stretch_ratio = std::max(m_stretch_ratio, 0.05);
    m_sound_touch.setTempo(m_stretch_ratio);

--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -140,6 +140,8 @@ add_library(core STATIC
    hle/kernel/svc_wrap.h
    hle/kernel/thread.cpp
    hle/kernel/thread.h
+    hle/kernel/timer.cpp
+    hle/kernel/timer.h
    hle/kernel/vm_manager.cpp
    hle/kernel/vm_manager.h
    hle/kernel/wait_object.cpp
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -128,10 +128,12 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+        is_powered_on = true;
+
+        gpu_core = std::make_unique<Tegra::GPU>(*renderer);

        cpu_core_manager.Initialize(system);
-        is_powered_on = true;
+
        LOG_DEBUG(Core, "Initialized OK");

        // Reset counters and set time origin to current frame
@@ -443,31 +445,27 @@ std::shared_ptr<FileSys::VfsFilesystem> System::GetFilesystem() const {
    return impl->virtual_filesystem;
 }

-void System::SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet) {
+void System::SetProfileSelector(std::unique_ptr<Core::Frontend::ProfileSelectApplet> applet) {
    impl->profile_selector = std::move(applet);
 }

-const Frontend::ProfileSelectApplet& System::GetProfileSelector() const {
+const Core::Frontend::ProfileSelectApplet& System::GetProfileSelector() const {
    return *impl->profile_selector;
 }

-void System::SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet) {
+void System::SetSoftwareKeyboard(std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> applet) {
    impl->software_keyboard = std::move(applet);
 }

-const Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
+const Core::Frontend::SoftwareKeyboardApplet& System::GetSoftwareKeyboard() const {
    return *impl->software_keyboard;
 }

-void System::SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet) {
+void System::SetWebBrowser(std::unique_ptr<Core::Frontend::WebBrowserApplet> applet) {
    impl->web_browser = std::move(applet);
 }

-Frontend::WebBrowserApplet& System::GetWebBrowser() {
-    return *impl->web_browser;
-}
-
-const Frontend::WebBrowserApplet& System::GetWebBrowser() const {
+const Core::Frontend::WebBrowserApplet& System::GetWebBrowser() const {
    return *impl->web_browser;
 }

--- a/src/core/core.h
+++ b/src/core/core.h
@@ -243,18 +243,17 @@ public:

    std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;

-    void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
+    void SetProfileSelector(std::unique_ptr<Core::Frontend::ProfileSelectApplet> applet);

-    const Frontend::ProfileSelectApplet& GetProfileSelector() const;
+    const Core::Frontend::ProfileSelectApplet& GetProfileSelector() const;

-    void SetSoftwareKeyboard(std::unique_ptr<Frontend::SoftwareKeyboardApplet> applet);
+    void SetSoftwareKeyboard(std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> applet);

-    const Frontend::SoftwareKeyboardApplet& GetSoftwareKeyboard() const;
+    const Core::Frontend::SoftwareKeyboardApplet& GetSoftwareKeyboard() const;

-    void SetWebBrowser(std::unique_ptr<Frontend::WebBrowserApplet> applet);
+    void SetWebBrowser(std::unique_ptr<Core::Frontend::WebBrowserApplet> applet);

-    Frontend::WebBrowserApplet& GetWebBrowser();
-    const Frontend::WebBrowserApplet& GetWebBrowser() const;
+    const Core::Frontend::WebBrowserApplet& GetWebBrowser() const;

 private:
    System();
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -359,8 +359,6 @@ bool NCA::ReadPFS0Section(const NCASectionHeader& section, const NCASectionTable
            dirs.push_back(std::move(npfs));
            if (IsDirectoryExeFS(dirs.back()))
                exefs = dirs.back();
-            else if (IsDirectoryLogoPartition(dirs.back()))
-                logo = dirs.back();
        } else {
            if (has_rights_id)
                status = Loader::ResultStatus::ErrorIncorrectTitlekeyOrTitlekek;
@@ -548,8 +546,4 @@ u64 NCA::GetBaseIVFCOffset() const {
    return ivfc_offset;
 }

-VirtualDir NCA::GetLogoPartition() const {
-    return logo;
-}
-
 } // namespace FileSys
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -74,13 +74,6 @@ inline bool IsDirectoryExeFS(const std::shared_ptr<VfsDirectory>& pfs) {
    return pfs->GetFile("main") != nullptr && pfs->GetFile("main.npdm") != nullptr;
 }

-inline bool IsDirectoryLogoPartition(const VirtualDir& pfs) {
-    // NintendoLogo is the static image in the top left corner while StartupMovie is the animation
-    // in the bottom right corner.
-    return pfs->GetFile("NintendoLogo.png") != nullptr &&
-           pfs->GetFile("StartupMovie.gif") != nullptr;
-}
-
 // An implementation of VfsDirectory that represents a Nintendo Content Archive (NCA) conatiner.
 // After construction, use GetStatus to determine if the file is valid and ready to be used.
 class NCA : public ReadOnlyVfsDirectory {
@@ -109,8 +102,6 @@ public:
    // Returns the base ivfc offset used in BKTR patching.
    u64 GetBaseIVFCOffset() const;

-    VirtualDir GetLogoPartition() const;
-
 private:
    bool CheckSupportedNCA(const NCAHeader& header);
    bool HandlePotentialHeaderDecryption();
@@ -131,7 +122,6 @@ private:

    VirtualFile romfs = nullptr;
    VirtualDir exefs = nullptr;
-    VirtualDir logo = nullptr;
    VirtualFile file;
    VirtualFile bktr_base_romfs;
    u64 ivfc_offset = 0;
--- a/src/core/file_sys/directory.h
+++ b/src/core/file_sys/directory.h
@@ -39,4 +39,27 @@ static_assert(sizeof(Entry) == 0x310, "Directory Entry struct isn't exactly 0x31
 static_assert(offsetof(Entry, type) == 0x304, "Wrong offset for type in Entry.");
 static_assert(offsetof(Entry, file_size) == 0x308, "Wrong offset for file_size in Entry.");

+class DirectoryBackend : NonCopyable {
+public:
+    DirectoryBackend() {}
+    virtual ~DirectoryBackend() {}
+
+    /**
+     * List files contained in the directory
+     * @param count Number of entries to return at once in entries
+     * @param entries Buffer to read data into
+     * @return Number of entries listed
+     */
+    virtual u64 Read(const u64 count, Entry* entries) = 0;
+
+    /// Returns the number of entries still left to read.
+    virtual u64 GetEntryCount() const = 0;
+
+    /**
+     * Close the directory
+     * @return true if the directory closed correctly
+     */
+    virtual bool Close() const = 0;
+};
+
 } // namespace FileSys
--- a/src/core/frontend/applets/web_browser.cpp
+++ b/src/core/frontend/applets/web_browser.cpp
@@ -13,7 +13,7 @@ DefaultWebBrowserApplet::~DefaultWebBrowserApplet() = default;

 void DefaultWebBrowserApplet::OpenPage(std::string_view filename,
                                       std::function<void()> unpack_romfs_callback,
-                                       std::function<void()> finished_callback) {
+                                       std::function<void()> finished_callback) const {
    LOG_INFO(Service_AM,
             "(STUBBED) called - No suitable web browser implementation found to open website page "
             "at '{}'!",
--- a/src/core/frontend/applets/web_browser.h
+++ b/src/core/frontend/applets/web_browser.h
@@ -14,7 +14,7 @@ public:
    virtual ~WebBrowserApplet();

    virtual void OpenPage(std::string_view url, std::function<void()> unpack_romfs_callback,
-                          std::function<void()> finished_callback) = 0;
+                          std::function<void()> finished_callback) const = 0;
 };

 class DefaultWebBrowserApplet final : public WebBrowserApplet {
@@ -22,7 +22,7 @@ public:
    ~DefaultWebBrowserApplet() override;

    void OpenPage(std::string_view url, std::function<void()> unpack_romfs_callback,
-                  std::function<void()> finished_callback) override;
+                  std::function<void()> finished_callback) const override;
 };

 } // namespace Core::Frontend
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,11 +507,8 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {

    LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
              bp->second.len, bp->second.addr, static_cast<int>(type));
-
-    if (type == BreakpointType::Execute) {
-        Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
-        Core::System::GetInstance().InvalidateCpuInstructionCaches();
-    }
+    Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
+    Core::System::GetInstance().InvalidateCpuInstructionCaches();
    p.erase(addr);
 }

@@ -1060,12 +1057,9 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
    breakpoint.addr = addr;
    breakpoint.len = len;
    Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
-
    static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
-    if (type == BreakpointType::Execute) {
-        Memory::WriteBlock(addr, btrap.data(), btrap.size());
-        Core::System::GetInstance().InvalidateCpuInstructionCaches();
-    }
+    Memory::WriteBlock(addr, btrap.data(), btrap.size());
+    Core::System::GetInstance().InvalidateCpuInstructionCaches();
    p.insert({addr, breakpoint});

    LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -216,11 +216,6 @@ private:

 /// Push ///

-template <>
-inline void ResponseBuilder::Push(s32 value) {
-    cmdbuf[index++] = static_cast<u32>(value);
-}
-
 template <>
 inline void ResponseBuilder::Push(u32 value) {
    cmdbuf[index++] = value;
@@ -239,22 +234,6 @@ inline void ResponseBuilder::Push(ResultCode value) {
    Push<u32>(0);
 }

-template <>
-inline void ResponseBuilder::Push(s8 value) {
-    PushRaw(value);
-}
-
-template <>
-inline void ResponseBuilder::Push(s16 value) {
-    PushRaw(value);
-}
-
-template <>
-inline void ResponseBuilder::Push(s64 value) {
-    Push(static_cast<u32>(value));
-    Push(static_cast<u32>(value >> 32));
-}
-
 template <>
 inline void ResponseBuilder::Push(u8 value) {
    PushRaw(value);
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -18,6 +18,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/timer.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"

@@ -85,12 +86,27 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
    }
 }

+/// The timer callback event, called when a timer is fired
+static void TimerCallback(u64 timer_handle, int cycles_late) {
+    const auto proper_handle = static_cast<Handle>(timer_handle);
+    const auto& system = Core::System::GetInstance();
+    SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);
+
+    if (timer == nullptr) {
+        LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
+        return;
+    }
+
+    timer->Signal(cycles_late);
+}
+
 struct KernelCore::Impl {
    void Initialize(KernelCore& kernel) {
        Shutdown();

        InitializeSystemResourceLimit(kernel);
        InitializeThreads();
+        InitializeTimers();
    }

    void Shutdown() {
@@ -106,6 +122,9 @@ struct KernelCore::Impl {
        thread_wakeup_callback_handle_table.Clear();
        thread_wakeup_event_type = nullptr;

+        timer_callback_handle_table.Clear();
+        timer_callback_event_type = nullptr;
+
        named_ports.clear();
    }

@@ -127,6 +146,11 @@ struct KernelCore::Impl {
            CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

+    void InitializeTimers() {
+        timer_callback_handle_table.Clear();
+        timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
+    }
+
    std::atomic<u32> next_object_id{0};
    std::atomic<u64> next_process_id{Process::ProcessIDMin};
    std::atomic<u64> next_thread_id{1};
@@ -137,6 +161,12 @@ struct KernelCore::Impl {

    SharedPtr<ResourceLimit> system_resource_limit;

+    /// The event type of the generic timer callback event
+    CoreTiming::EventType* timer_callback_event_type = nullptr;
+    // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future,
+    // allowing us to simply use a pool index or similar.
+    Kernel::HandleTable timer_callback_handle_table;
+
    CoreTiming::EventType* thread_wakeup_event_type = nullptr;
    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
    // allowing us to simply use a pool index or similar.
@@ -168,6 +198,10 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle
    return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
 }

+SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const {
+    return impl->timer_callback_handle_table.Get<Timer>(handle);
+}
+
 void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
    impl->process_list.push_back(std::move(process));
 }
@@ -213,10 +247,18 @@ u64 KernelCore::CreateNewProcessID() {
    return impl->next_process_id++;
 }

+ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) {
+    return impl->timer_callback_handle_table.Create(timer);
+}
+
 CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
    return impl->thread_wakeup_event_type;
 }

+CoreTiming::EventType* KernelCore::TimerCallbackEventType() const {
+    return impl->timer_callback_event_type;
+}
+
 Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
    return impl->thread_wakeup_callback_handle_table;
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -22,6 +22,7 @@ class HandleTable;
 class Process;
 class ResourceLimit;
 class Thread;
+class Timer;

 /// Represents a single instance of the kernel.
 class KernelCore {
@@ -50,6 +51,9 @@ public:
    /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
    SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;

+    /// Retrieves a shared pointer to a Timer instance within the timer callback handle table.
+    SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const;
+
    /// Adds the given shared pointer to an internal list of active processes.
    void AppendNewProcess(SharedPtr<Process> process);

@@ -78,6 +82,7 @@ private:
    friend class Object;
    friend class Process;
    friend class Thread;
+    friend class Timer;

    /// Creates a new object ID, incrementing the internal object ID counter.
    u32 CreateNewObjectID();
@@ -88,9 +93,15 @@ private:
    /// Creates a new thread ID, incrementing the internal thread ID counter.
    u64 CreateNewThreadID();

+    /// Creates a timer callback handle for the given timer.
+    ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer);
+
    /// Retrieves the event type used for thread wakeup callbacks.
    CoreTiming::EventType* ThreadWakeupCallbackEventType() const;

+    /// Retrieves the event type used for timer callbacks.
+    CoreTiming::EventType* TimerCallbackEventType() const;
+
    /// Provides a reference to the thread wakeup callback handle table.
    Kernel::HandleTable& ThreadWakeupCallbackHandleTable();

--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -16,6 +16,7 @@ bool Object::IsWaitable() const {
    case HandleType::ReadableEvent:
    case HandleType::Thread:
    case HandleType::Process:
+    case HandleType::Timer:
    case HandleType::ServerPort:
    case HandleType::ServerSession:
        return true;
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -25,6 +25,7 @@ enum class HandleType : u32 {
    Thread,
    Process,
    AddressArbiter,
+    Timer,
    ResourceLimit,
    ClientPort,
    ServerPort,
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -44,4 +44,8 @@ ResultCode ReadableEvent::Reset() {
    return RESULT_SUCCESS;
 }

+void ReadableEvent::WakeupAllWaitingThreads() {
+    WaitObject::WakeupAllWaitingThreads();
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -39,6 +39,8 @@ public:
    bool ShouldWait(Thread* thread) const override;
    void Acquire(Thread* thread) override;

+    void WakeupAllWaitingThreads() override;
+
    /// Unconditionally clears the readable event's state.
    void Clear();

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -597,7 +597,6 @@ enum class BreakType : u32 {
    PostNROLoad = 4,
    PreNROUnload = 5,
    PostNROUnload = 6,
-    CppException = 7,
 };

 struct BreakReason {
@@ -670,9 +669,6 @@ static void Break(u32 reason, u64 info1, u64 info2) {
                    "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1,
                    info2);
        break;
-    case BreakType::CppException:
-        LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered.");
-        break;
    default:
        LOG_WARNING(
            Debug_Emulated,
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@@ -0,0 +1,88 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/object.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/timer.h"
+
+namespace Kernel {
+
+Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {}
+Timer::~Timer() = default;
+
+SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
+    SharedPtr<Timer> timer(new Timer(kernel));
+
+    timer->reset_type = reset_type;
+    timer->signaled = false;
+    timer->name = std::move(name);
+    timer->initial_delay = 0;
+    timer->interval_delay = 0;
+    timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap();
+
+    return timer;
+}
+
+bool Timer::ShouldWait(Thread* thread) const {
+    return !signaled;
+}
+
+void Timer::Acquire(Thread* thread) {
+    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+
+    if (reset_type == ResetType::OneShot)
+        signaled = false;
+}
+
+void Timer::Set(s64 initial, s64 interval) {
+    // Ensure we get rid of any previous scheduled event
+    Cancel();
+
+    initial_delay = initial;
+    interval_delay = interval;
+
+    if (initial == 0) {
+        // Immediately invoke the callback
+        Signal(0);
+    } else {
+        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(),
+                                  callback_handle);
+    }
+}
+
+void Timer::Cancel() {
+    CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle);
+}
+
+void Timer::Clear() {
+    signaled = false;
+}
+
+void Timer::WakeupAllWaitingThreads() {
+    WaitObject::WakeupAllWaitingThreads();
+}
+
+void Timer::Signal(int cycles_late) {
+    LOG_TRACE(Kernel, "Timer {} fired", GetObjectId());
+
+    signaled = true;
+
+    // Resume all waiting threads
+    WakeupAllWaitingThreads();
+
+    if (interval_delay != 0) {
+        // Reschedule the timer with the interval delay
+        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late,
+                                  kernel.TimerCallbackEventType(), callback_handle);
+    }
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/timer.h
+++ b/src/core/hle/kernel/timer.h
@@ -0,0 +1,90 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/hle/kernel/object.h"
+#include "core/hle/kernel/wait_object.h"
+
+namespace Kernel {
+
+class KernelCore;
+
+class Timer final : public WaitObject {
+public:
+    /**
+     * Creates a timer
+     * @param kernel The kernel instance to create the timer callback handle for.
+     * @param reset_type ResetType describing how to create the timer
+     * @param name Optional name of timer
+     * @return The created Timer
+     */
+    static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type,
+                                   std::string name = "Unknown");
+
+    std::string GetTypeName() const override {
+        return "Timer";
+    }
+    std::string GetName() const override {
+        return name;
+    }
+
+    static const HandleType HANDLE_TYPE = HandleType::Timer;
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    ResetType GetResetType() const {
+        return reset_type;
+    }
+
+    u64 GetInitialDelay() const {
+        return initial_delay;
+    }
+
+    u64 GetIntervalDelay() const {
+        return interval_delay;
+    }
+
+    bool ShouldWait(Thread* thread) const override;
+    void Acquire(Thread* thread) override;
+
+    void WakeupAllWaitingThreads() override;
+
+    /**
+     * Starts the timer, with the specified initial delay and interval.
+     * @param initial Delay until the timer is first fired
+     * @param interval Delay until the timer is fired after the first time
+     */
+    void Set(s64 initial, s64 interval);
+
+    void Cancel();
+    void Clear();
+
+    /**
+     * Signals the timer, waking up any waiting threads and rescheduling it
+     * for the next interval.
+     * This method should not be called from outside the timer callback handler,
+     * lest multiple callback events get scheduled.
+     */
+    void Signal(int cycles_late);
+
+private:
+    explicit Timer(KernelCore& kernel);
+    ~Timer() override;
+
+    ResetType reset_type; ///< The ResetType of this timer
+
+    u64 initial_delay;  ///< The delay until the timer fires for the first time
+    u64 interval_delay; ///< The delay until the timer fires after the first time
+
+    bool signaled;    ///< Whether the timer has been signaled or not
+    std::string name; ///< Name of timer (optional)
+
+    /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
+    Handle callback_handle;
+};
+
+} // namespace Kernel
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -33,19 +33,19 @@ public:
     * Add a thread to wait on this object
     * @param thread Pointer to thread to add
     */
-    void AddWaitingThread(SharedPtr<Thread> thread);
+    virtual void AddWaitingThread(SharedPtr<Thread> thread);

    /**
     * Removes a thread from waiting on this object (e.g. if it was resumed already)
     * @param thread Pointer to thread to remove
     */
-    void RemoveWaitingThread(Thread* thread);
+    virtual void RemoveWaitingThread(Thread* thread);

    /**
     * Wake up all threads waiting on this object that can be awoken, in priority order,
     * and set the synchronization result and output of the thread.
     */
-    void WakeupAllWaitingThreads();
+    virtual void WakeupAllWaitingThreads();

    /**
     * Wakes up a single thread waiting on this object.
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -322,15 +322,14 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c

 void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_AM, "(STUBBED) called");
-
    // TODO(Subv): Find out how AM determines the display to use, for now just
    // create the layer in the Default display.
-    const auto display_id = nvflinger->OpenDisplay("Default");
-    const auto layer_id = nvflinger->CreateLayer(*display_id);
+    u64 display_id = nvflinger->OpenDisplay("Default");
+    u64 layer_id = nvflinger->CreateLayer(display_id);

    IPC::ResponseBuilder rb{ctx, 4};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(*layer_id);
+    rb.Push(layer_id);
 }

 void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -249,8 +249,7 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
        {300, nullptr, "OpenOverlayAppletProxy"},
        {350, nullptr, "OpenSystemApplicationProxy"},
        {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"},
-        {410, nullptr, "GetSystemAppletControllerForDebug"},
-        {1000, nullptr, "GetDebugFunctions"},
+        {401, nullptr, "GetSystemAppletControllerForDebug"},
    };
    // clang-format on

--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -2,16 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <array>
-#include <cstring>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_funcs.h"
 #include "common/common_paths.h"
-#include "common/file_util.h"
 #include "common/hex_util.h"
-#include "common/logging/log.h"
+#include "common/logging/backend.h"
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/file_sys/content_archive.h"
@@ -19,6 +12,7 @@
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/registered_cache.h"
 #include "core/file_sys/romfs.h"
+#include "core/file_sys/romfs_factory.h"
 #include "core/file_sys/vfs_types.h"
 #include "core/frontend/applets/web_browser.h"
 #include "core/hle/kernel/process.h"
@@ -152,7 +146,7 @@ void WebBrowser::Execute() {
        return;
    }

-    auto& frontend{Core::System::GetInstance().GetWebBrowser()};
+    const auto& frontend{Core::System::GetInstance().GetWebBrowser()};

    frontend.OpenPage(filename, [this] { UnpackRomFS(); }, [this] { Finalize(); });
 }
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -12,7 +12,6 @@ namespace Service::Audio {
 class IAudioIn final : public ServiceFramework<IAudioIn> {
 public:
    IAudioIn() : ServiceFramework("IAudioIn") {
-        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "GetAudioInState"},
            {1, nullptr, "StartAudioIn"},
@@ -29,24 +28,16 @@ public:
            {12, nullptr, "SetAudioInDeviceGain"},
            {13, nullptr, "GetAudioInDeviceGain"},
        };
-        // clang-format on
-
        RegisterHandlers(functions);
    }
    ~IAudioIn() = default;
 };

 AudInU::AudInU() : ServiceFramework("audin:u") {
-    // clang-format off
    static const FunctionInfo functions[] = {
-        {0, nullptr, "ListAudioIns"},
-        {1, nullptr, "OpenAudioIn"},
-        {2, nullptr, "Unknown"},
-        {3, nullptr, "OpenAudioInAuto"},
-        {4, nullptr, "ListAudioInsAuto"},
+        {0, nullptr, "ListAudioIns"},    {1, nullptr, "OpenAudioIn"},      {2, nullptr, "Unknown"},
+        {3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"},
    };
-    // clang-format on
-
    RegisterHandlers(functions);
 }

--- a/src/core/hle/service/audio/audrec_u.cpp
+++ b/src/core/hle/service/audio/audrec_u.cpp
@@ -12,7 +12,6 @@ namespace Service::Audio {
 class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> {
 public:
    IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") {
-        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "GetFinalOutputRecorderState"},
            {1, nullptr, "StartFinalOutputRecorder"},
@@ -21,13 +20,10 @@ public:
            {4, nullptr, "RegisterBufferEvent"},
            {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"},
            {6, nullptr, "ContainsFinalOutputRecorderBuffer"},
-            {7, nullptr, "GetFinalOutputRecorderBufferEndTime"},
+            {7, nullptr, "Unknown"},
            {8, nullptr, "AppendFinalOutputRecorderBufferAuto"},
            {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"},
-            {10, nullptr, "FlushFinalOutputRecorderBuffers"},
        };
-        // clang-format on
-
        RegisterHandlers(functions);
    }
    ~IFinalOutputRecorder() = default;
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -229,16 +229,14 @@ private:
 }; // namespace Audio

 AudRenU::AudRenU() : ServiceFramework("audren:u") {
-    // clang-format off
    static const FunctionInfo functions[] = {
        {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
        {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
-        {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
+        {2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
        {3, nullptr, "OpenAudioRendererAuto"},
-        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
+        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
+         "GetAudioDeviceServiceWithRevisionInfo"},
    };
-    // clang-format on
-
    RegisterHandlers(functions);
 }

@@ -315,7 +313,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
 }

-void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
+void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -20,7 +20,7 @@ public:
 private:
    void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
    void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
-    void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
+    void GetAudioDevice(Kernel::HLERequestContext& ctx);
    void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);

    enum class AudioFeatures : u32 {
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -5,6 +5,7 @@
 #include <chrono>
 #include <cstring>
 #include <memory>
+#include <optional>
 #include <vector>

 #include <opus.h>
@@ -29,66 +30,48 @@ public:
                                u32 channel_count)
        : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
          sample_rate(sample_rate), channel_count(channel_count) {
-        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
+            {0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
            {1, nullptr, "SetContext"},
-            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
+            {2, nullptr, "DecodeInterleavedForMultiStream"},
            {3, nullptr, "SetContextForMultiStream"},
-            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
-            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
+            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
+             "DecodeInterleavedWithPerformance"},
+            {5, nullptr, "Unknown5"},
+            {6, nullptr, "Unknown6"},
+            {7, nullptr, "Unknown7"},
        };
-        // clang-format on
-
        RegisterHandlers(functions);
    }

 private:
-    /// Describes extra behavior that may be asked of the decoding context.
-    enum class ExtraBehavior {
-        /// No extra behavior.
-        None,
-
-        /// Resets the decoder context back to a freshly initialized state.
-        ResetContext,
-    };
-
-    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
-    }
-
-    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
-    }
-
    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Audio, "called");

-        IPC::RequestParser rp{ctx};
-        const auto extra_behavior =
-            rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, extra_behavior);
-    }
-
-    void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
-                                 ExtraBehavior extra_behavior) {
        u32 consumed = 0;
        u32 sample_count = 0;
        std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
-
-        if (extra_behavior == ExtraBehavior::ResetContext) {
-            ResetDecoderContext();
+        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) {
+            LOG_ERROR(Audio, "Failed to decode opus data");
+            IPC::ResponseBuilder rb{ctx, 2};
+            // TODO(ogniK): Use correct error code
+            rb.Push(ResultCode(-1));
+            return;
        }
+        IPC::ResponseBuilder rb{ctx, 4};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(consumed);
+        rb.Push<u32>(sample_count);
+        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
+    }

+    void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        u32 consumed = 0;
+        u32 sample_count = 0;
+        u64 performance = 0;
+        std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
                                       performance)) {
            LOG_ERROR(Audio, "Failed to decode opus data");
@@ -97,28 +80,25 @@ private:
            rb.Push(ResultCode(-1));
            return;
        }
-
-        const u32 param_size = performance != nullptr ? 6 : 4;
-        IPC::ResponseBuilder rb{ctx, param_size};
+        IPC::ResponseBuilder rb{ctx, 6};
        rb.Push(RESULT_SUCCESS);
        rb.Push<u32>(consumed);
        rb.Push<u32>(sample_count);
-        if (performance) {
-            rb.Push<u64>(*performance);
-        }
+        rb.Push<u64>(performance);
        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
    }

-    bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
-                                   std::vector<opus_int16>& output, u64* out_performance_time) {
+    bool Decoder_DecodeInterleaved(
+        u32& consumed, u32& sample_count, const std::vector<u8>& input,
+        std::vector<opus_int16>& output,
+        std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
        const auto start_time = std::chrono::high_resolution_clock::now();
-        const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
+        std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
        if (sizeof(OpusHeader) > input.size()) {
            LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
                      sizeof(OpusHeader), input.size());
            return false;
        }
-
        OpusHeader hdr{};
        std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
        if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
@@ -126,9 +106,8 @@ private:
                      sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
            return false;
        }
-
-        const auto frame = input.data() + sizeof(OpusHeader);
-        const auto decoded_sample_count = opus_packet_get_nb_samples(
+        auto frame = input.data() + sizeof(OpusHeader);
+        auto decoded_sample_count = opus_packet_get_nb_samples(
            frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
            static_cast<opus_int32>(sample_rate));
        if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
@@ -138,9 +117,8 @@ private:
                decoded_sample_count * channel_count * sizeof(u16), raw_output_sz);
            return false;
        }
-
        const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
-        const auto out_sample_count =
+        auto out_sample_count =
            opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
        if (out_sample_count < 0) {
            LOG_ERROR(Audio,
@@ -149,24 +127,16 @@ private:
                      out_sample_count, frame_size, static_cast<u32>(hdr.sz));
            return false;
        }
-
        const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
        sample_count = out_sample_count;
        consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
-        if (out_performance_time != nullptr) {
-            *out_performance_time =
+        if (performance_time.has_value()) {
+            performance_time->get() =
                std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
        }
-
        return true;
    }

-    void ResetDecoderContext() {
-        ASSERT(decoder != nullptr);
-
-        opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
-    }
-
    struct OpusHeader {
        u32_be sz; // Needs to be BE for some odd reason
        INSERT_PADDING_WORDS(1);
@@ -187,7 +157,6 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
    const auto sample_rate = rp.Pop<u32>();
    const auto channel_count = rp.Pop<u32>();
-
    LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count);

    ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
@@ -205,10 +174,9 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {

 void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
-    const auto sample_rate = rp.Pop<u32>();
-    const auto channel_count = rp.Pop<u32>();
-    const auto buffer_sz = rp.Pop<u32>();
-
+    auto sample_rate = rp.Pop<u32>();
+    auto channel_count = rp.Pop<u32>();
+    auto buffer_sz = rp.Pop<u32>();
    LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate,
              channel_count, buffer_sz);

@@ -217,9 +185,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
               "Invalid sample rate");
    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");

-    const std::size_t worker_sz = WorkerBufferSize(channel_count);
+    std::size_t worker_sz = WorkerBufferSize(channel_count);
    ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
-
    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
        static_cast<OpusDecoder*>(operator new(worker_sz))};
    if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -19,16 +19,16 @@ public:
    explicit Bt() : ServiceFramework{"bt"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "LeClientReadCharacteristic"},
-            {1, nullptr, "LeClientReadDescriptor"},
-            {2, nullptr, "LeClientWriteCharacteristic"},
-            {3, nullptr, "LeClientWriteDescriptor"},
-            {4, nullptr, "LeClientRegisterNotification"},
-            {5, nullptr, "LeClientDeregisterNotification"},
-            {6, nullptr, "SetLeResponse"},
-            {7, nullptr, "LeSendIndication"},
-            {8, nullptr, "GetLeEventInfo"},
-            {9, &Bt::RegisterBleEvent, "RegisterBleEvent"},
+            {0, nullptr, "Unknown0"},
+            {1, nullptr, "Unknown1"},
+            {2, nullptr, "Unknown2"},
+            {3, nullptr, "Unknown3"},
+            {4, nullptr, "Unknown4"},
+            {5, nullptr, "Unknown5"},
+            {6, nullptr, "Unknown6"},
+            {7, nullptr, "Unknown7"},
+            {8, nullptr, "Unknown8"},
+            {9, &Bt::RegisterEvent, "RegisterEvent"},
        };
        // clang-format on
        RegisterHandlers(functions);
@@ -39,7 +39,7 @@ public:
    }

 private:
-    void RegisterBleEvent(Kernel::HLERequestContext& ctx) {
+    void RegisterEvent(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_BTM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -55,11 +55,11 @@ public:
    explicit BtDrv() : ServiceFramework{"btdrv"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "InitializeBluetoothDriver"},
-            {1, nullptr, "InitializeBluetooth"},
-            {2, nullptr, "EnableBluetooth"},
-            {3, nullptr, "DisableBluetooth"},
-            {4, nullptr, "CleanupBluetooth"},
+            {0, nullptr, "Unknown"},
+            {1, nullptr, "Init"},
+            {2, nullptr, "Enable"},
+            {3, nullptr, "Disable"},
+            {4, nullptr, "CleanupAndShutdown"},
            {5, nullptr, "GetAdapterProperties"},
            {6, nullptr, "GetAdapterProperty"},
            {7, nullptr, "SetAdapterProperty"},
@@ -70,91 +70,36 @@ public:
            {12, nullptr, "CancelBond"},
            {13, nullptr, "PinReply"},
            {14, nullptr, "SspReply"},
-            {15, nullptr, "GetEventInfo"},
-            {16, nullptr, "InitializeHid"},
-            {17, nullptr, "HidConnect"},
-            {18, nullptr, "HidDisconnect"},
-            {19, nullptr, "HidSendData"},
-            {20, nullptr, "HidSendData2"},
-            {21, nullptr, "HidSetReport"},
-            {22, nullptr, "HidGetReport"},
-            {23, nullptr, "HidWakeController"},
-            {24, nullptr, "HidAddPairedDevice"},
-            {25, nullptr, "HidGetPairedDevice"},
-            {26, nullptr, "CleanupHid"},
-            {27, nullptr, "HidGetEventInfo"},
-            {28, nullptr, "ExtSetTsi"},
-            {29, nullptr, "ExtSetBurstMode"},
-            {30, nullptr, "ExtSetZeroRetran"},
-            {31, nullptr, "ExtSetMcMode"},
-            {32, nullptr, "ExtStartLlrMode"},
-            {33, nullptr, "ExtExitLlrMode"},
-            {34, nullptr, "ExtSetRadio"},
-            {35, nullptr, "ExtSetVisibility"},
-            {36, nullptr, "ExtSetTbfcScan"},
-            {37, nullptr, "RegisterHidReportEvent"},
-            {38, nullptr, "HidGetReportEventInfo"},
-            {39, nullptr, "GetLatestPlr"},
-            {40, nullptr, "ExtGetPendingConnections"},
-            {41, nullptr, "GetChannelMap"},
-            {42, nullptr, "EnableBluetoothBoostSetting"},
-            {43, nullptr, "IsBluetoothBoostSettingEnabled"},
-            {44, nullptr, "EnableBluetoothAfhSetting"},
-            {45, nullptr, "IsBluetoothAfhSettingEnabled"},
-            {46, nullptr, "InitializeBluetoothLe"},
-            {47, nullptr, "EnableBluetoothLe"},
-            {48, nullptr, "DisableBluetoothLe"},
-            {49, nullptr, "CleanupBluetoothLe"},
-            {50, nullptr, "SetLeVisibility"},
-            {51, nullptr, "SetLeConnectionParameter"},
-            {52, nullptr, "SetLeDefaultConnectionParameter"},
-            {53, nullptr, "SetLeAdvertiseData"},
-            {54, nullptr, "SetLeAdvertiseParameter"},
-            {55, nullptr, "StartLeScan"},
-            {56, nullptr, "StopLeScan"},
-            {57, nullptr, "AddLeScanFilterCondition"},
-            {58, nullptr, "DeleteLeScanFilterCondition"},
-            {59, nullptr, "DeleteLeScanFilter"},
-            {60, nullptr, "ClearLeScanFilters"},
-            {61, nullptr, "EnableLeScanFilter"},
-            {62, nullptr, "RegisterLeClient"},
-            {63, nullptr, "UnregisterLeClient"},
-            {64, nullptr, "UnregisterLeClientAll"},
-            {65, nullptr, "LeClientConnect"},
-            {66, nullptr, "LeClientCancelConnection"},
-            {67, nullptr, "LeClientDisconnect"},
-            {68, nullptr, "LeClientGetAttributes"},
-            {69, nullptr, "LeClientDiscoverService"},
-            {70, nullptr, "LeClientConfigureMtu"},
-            {71, nullptr, "RegisterLeServer"},
-            {72, nullptr, "UnregisterLeServer"},
-            {73, nullptr, "LeServerConnect"},
-            {74, nullptr, "LeServerDisconnect"},
-            {75, nullptr, "CreateLeService"},
-            {76, nullptr, "StartLeService"},
-            {77, nullptr, "AddLeCharacteristic"},
-            {78, nullptr, "AddLeDescriptor"},
-            {79, nullptr, "GetLeCoreEventInfo"},
-            {80, nullptr, "LeGetFirstCharacteristic"},
-            {81, nullptr, "LeGetNextCharacteristic"},
-            {82, nullptr, "LeGetFirstDescriptor"},
-            {83, nullptr, "LeGetNextDescriptor"},
-            {84, nullptr, "RegisterLeCoreDataPath"},
-            {85, nullptr, "UnregisterLeCoreDataPath"},
-            {86, nullptr, "RegisterLeHidDataPath"},
-            {87, nullptr, "UnregisterLeHidDataPath"},
-            {88, nullptr, "RegisterLeDataPath"},
-            {89, nullptr, "UnregisterLeDataPath"},
-            {90, nullptr, "LeClientReadCharacteristic"},
-            {91, nullptr, "LeClientReadDescriptor"},
-            {92, nullptr, "LeClientWriteCharacteristic"},
-            {93, nullptr, "LeClientWriteDescriptor"},
-            {94, nullptr, "LeClientRegisterNotification"},
-            {95, nullptr, "LeClientDeregisterNotification"},
-            {96, nullptr, "GetLeHidEventInfo"},
-            {97, nullptr, "RegisterBleHidEvent"},
-            {98, nullptr, "SetLeScanParameter"},
-            {256, nullptr, "GetIsManufacturingMode"}
+            {15, nullptr, "Unknown2"},
+            {16, nullptr, "InitInterfaces"},
+            {17, nullptr, "HidHostInterface_Connect"},
+            {18, nullptr, "HidHostInterface_Disconnect"},
+            {19, nullptr, "HidHostInterface_SendData"},
+            {20, nullptr, "HidHostInterface_SendData2"},
+            {21, nullptr, "HidHostInterface_SetReport"},
+            {22, nullptr, "HidHostInterface_GetReport"},
+            {23, nullptr, "HidHostInterface_WakeController"},
+            {24, nullptr, "HidHostInterface_AddPairedDevice"},
+            {25, nullptr, "HidHostInterface_GetPairedDevice"},
+            {26, nullptr, "HidHostInterface_CleanupAndShutdown"},
+            {27, nullptr, "Unknown3"},
+            {28, nullptr, "ExtInterface_SetTSI"},
+            {29, nullptr, "ExtInterface_SetBurstMode"},
+            {30, nullptr, "ExtInterface_SetZeroRetran"},
+            {31, nullptr, "ExtInterface_SetMcMode"},
+            {32, nullptr, "ExtInterface_StartLlrMode"},
+            {33, nullptr, "ExtInterface_ExitLlrMode"},
+            {34, nullptr, "ExtInterface_SetRadio"},
+            {35, nullptr, "ExtInterface_SetVisibility"},
+            {36, nullptr, "Unknown4"},
+            {37, nullptr, "Unknown5"},
+            {38, nullptr, "HidHostInterface_GetLatestPlr"},
+            {39, nullptr, "ExtInterface_GetPendingConnections"},
+            {40, nullptr, "HidHostInterface_GetChannelMap"},
+            {41, nullptr, "SetIsBluetoothBoostEnabled"},
+            {42, nullptr, "GetIsBluetoothBoostEnabled"},
+            {43, nullptr, "SetIsBluetoothAfhEnabled"},
+            {44, nullptr, "GetIsBluetoothAfhEnabled"},
        };
        // clang-format on

--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -20,38 +20,38 @@ public:
    explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, &IBtmUserCore::AcquireBleScanEvent, "AcquireBleScanEvent"},
-            {1, nullptr, "GetBleScanFilterParameter"},
-            {2, nullptr, "GetBleScanFilterParameter2"},
-            {3, nullptr, "StartBleScanForGeneral"},
-            {4, nullptr, "StopBleScanForGeneral"},
-            {5, nullptr, "GetBleScanResultsForGeneral"},
-            {6, nullptr, "StartBleScanForPaired"},
-            {7, nullptr, "StopBleScanForPaired"},
-            {8, nullptr, "StartBleScanForSmartDevice"},
-            {9, nullptr, "StopBleScanForSmartDevice"},
-            {10, nullptr, "GetBleScanResultsForSmartDevice"},
-            {17, &IBtmUserCore::AcquireBleConnectionEvent, "AcquireBleConnectionEvent"},
-            {18, nullptr, "BleConnect"},
-            {19, nullptr, "BleDisconnect"},
-            {20, nullptr, "BleGetConnectionState"},
-            {21, nullptr, "AcquireBlePairingEvent"},
-            {22, nullptr, "BlePairDevice"},
-            {23, nullptr, "BleUnPairDevice"},
-            {24, nullptr, "BleUnPairDevice2"},
-            {25, nullptr, "BleGetPairedDevices"},
-            {26, &IBtmUserCore::AcquireBleServiceDiscoveryEvent, "AcquireBleServiceDiscoveryEvent"},
-            {27, nullptr, "GetGattServices"},
-            {28, nullptr, "GetGattService"},
-            {29, nullptr, "GetGattIncludedServices"},
-            {30, nullptr, "GetBelongingGattService"},
-            {31, nullptr, "GetGattCharacteristics"},
-            {32, nullptr, "GetGattDescriptors"},
-            {33, &IBtmUserCore::AcquireBleMtuConfigEvent, "AcquireBleMtuConfigEvent"},
-            {34, nullptr, "ConfigureBleMtu"},
-            {35, nullptr, "GetBleMtu"},
-            {36, nullptr, "RegisterBleGattDataPath"},
-            {37, nullptr, "UnregisterBleGattDataPath"},
+            {0, &IBtmUserCore::GetScanEvent, "GetScanEvent"},
+            {1, nullptr, "Unknown1"},
+            {2, nullptr, "Unknown2"},
+            {3, nullptr, "Unknown3"},
+            {4, nullptr, "Unknown4"},
+            {5, nullptr, "Unknown5"},
+            {6, nullptr, "Unknown6"},
+            {7, nullptr, "Unknown7"},
+            {8, nullptr, "Unknown8"},
+            {9, nullptr, "Unknown9"},
+            {10, nullptr, "Unknown10"},
+            {17, &IBtmUserCore::GetConnectionEvent, "GetConnectionEvent"},
+            {18, nullptr, "Unknown18"},
+            {19, nullptr, "Unknown19"},
+            {20, nullptr, "Unknown20"},
+            {21, nullptr, "Unknown21"},
+            {22, nullptr, "Unknown22"},
+            {23, nullptr, "Unknown23"},
+            {24, nullptr, "Unknown24"},
+            {25, nullptr, "Unknown25"},
+            {26, &IBtmUserCore::GetDiscoveryEvent, "AcquireBleServiceDiscoveryEventImpl"},
+            {27, nullptr, "Unknown27"},
+            {28, nullptr, "Unknown28"},
+            {29, nullptr, "Unknown29"},
+            {30, nullptr, "Unknown30"},
+            {31, nullptr, "Unknown31"},
+            {32, nullptr, "Unknown32"},
+            {33, &IBtmUserCore::GetConfigEvent, "GetConfigEvent"},
+            {34, nullptr, "Unknown34"},
+            {35, nullptr, "Unknown35"},
+            {36, nullptr, "Unknown36"},
+            {37, nullptr, "Unknown37"},
        };
        // clang-format on
        RegisterHandlers(functions);
@@ -68,7 +68,7 @@ public:
    }

 private:
-    void AcquireBleScanEvent(Kernel::HLERequestContext& ctx) {
+    void GetScanEvent(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_BTM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -76,7 +76,7 @@ private:
        rb.PushCopyObjects(scan_event.readable);
    }

-    void AcquireBleConnectionEvent(Kernel::HLERequestContext& ctx) {
+    void GetConnectionEvent(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_BTM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -84,7 +84,7 @@ private:
        rb.PushCopyObjects(connection_event.readable);
    }

-    void AcquireBleServiceDiscoveryEvent(Kernel::HLERequestContext& ctx) {
+    void GetDiscoveryEvent(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_BTM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -92,7 +92,7 @@ private:
        rb.PushCopyObjects(service_discovery.readable);
    }

-    void AcquireBleMtuConfigEvent(Kernel::HLERequestContext& ctx) {
+    void GetConfigEvent(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_BTM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -111,14 +111,14 @@ public:
    explicit BTM_USR() : ServiceFramework{"btm:u"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, &BTM_USR::GetCore, "GetCore"},
+            {0, &BTM_USR::GetCoreImpl, "GetCoreImpl"},
        };
        // clang-format on
        RegisterHandlers(functions);
    }

 private:
-    void GetCore(Kernel::HLERequestContext& ctx) {
+    void GetCoreImpl(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_BTM, "called");

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -134,64 +134,26 @@ public:
        static const FunctionInfo functions[] = {
            {0, nullptr, "Unknown1"},
            {1, nullptr, "Unknown2"},
-            {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"},
+            {2, nullptr, "RegisterSystemEventForConnectedDeviceConditionImpl"},
            {3, nullptr, "Unknown3"},
            {4, nullptr, "Unknown4"},
            {5, nullptr, "Unknown5"},
            {6, nullptr, "Unknown6"},
            {7, nullptr, "Unknown7"},
-            {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"},
+            {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfoImpl"},
            {9, nullptr, "Unknown8"},
            {10, nullptr, "Unknown9"},
            {11, nullptr, "Unknown10"},
            {12, nullptr, "Unknown11"},
            {13, nullptr, "Unknown12"},
-            {14, nullptr, "EnableRadio"},
-            {15, nullptr, "DisableRadio"},
+            {14, nullptr, "EnableRadioImpl"},
+            {15, nullptr, "DisableRadioImpl"},
            {16, nullptr, "Unknown13"},
            {17, nullptr, "Unknown14"},
            {18, nullptr, "Unknown15"},
            {19, nullptr, "Unknown16"},
            {20, nullptr, "Unknown17"},
            {21, nullptr, "Unknown18"},
-            {22, nullptr, "Unknown19"},
-            {23, nullptr, "Unknown20"},
-            {24, nullptr, "Unknown21"},
-            {25, nullptr, "Unknown22"},
-            {26, nullptr, "Unknown23"},
-            {27, nullptr, "Unknown24"},
-            {28, nullptr, "Unknown25"},
-            {29, nullptr, "Unknown26"},
-            {30, nullptr, "Unknown27"},
-            {31, nullptr, "Unknown28"},
-            {32, nullptr, "Unknown29"},
-            {33, nullptr, "Unknown30"},
-            {34, nullptr, "Unknown31"},
-            {35, nullptr, "Unknown32"},
-            {36, nullptr, "Unknown33"},
-            {37, nullptr, "Unknown34"},
-            {38, nullptr, "Unknown35"},
-            {39, nullptr, "Unknown36"},
-            {40, nullptr, "Unknown37"},
-            {41, nullptr, "Unknown38"},
-            {42, nullptr, "Unknown39"},
-            {43, nullptr, "Unknown40"},
-            {44, nullptr, "Unknown41"},
-            {45, nullptr, "Unknown42"},
-            {46, nullptr, "Unknown43"},
-            {47, nullptr, "Unknown44"},
-            {48, nullptr, "Unknown45"},
-            {49, nullptr, "Unknown46"},
-            {50, nullptr, "Unknown47"},
-            {51, nullptr, "Unknown48"},
-            {52, nullptr, "Unknown49"},
-            {53, nullptr, "Unknown50"},
-            {54, nullptr, "Unknown51"},
-            {55, nullptr, "Unknown52"},
-            {56, nullptr, "Unknown53"},
-            {57, nullptr, "Unknown54"},
-            {58, nullptr, "Unknown55"},
-            {59, nullptr, "Unknown56"},
        };
        // clang-format on

@@ -204,7 +166,7 @@ public:
    explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "RegisterSystemEventForDiscovery"},
+            {0, nullptr, "RegisterSystemEventForDiscoveryImpl"},
            {1, nullptr, "Unknown1"},
            {2, nullptr, "Unknown2"},
            {3, nullptr, "Unknown3"},
@@ -213,10 +175,6 @@ public:
            {6, nullptr, "Unknown6"},
            {7, nullptr, "Unknown7"},
            {8, nullptr, "Unknown8"},
-            {9, nullptr, "Unknown9"},
-            {10, nullptr, "Unknown10"},
-            {11, nullptr, "Unknown11"},
-            {12, nullptr, "Unknown11"},
        };
        // clang-format on

@@ -229,16 +187,16 @@ public:
    explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "StartGamepadPairing"},
-            {1, nullptr, "CancelGamepadPairing"},
-            {2, nullptr, "ClearGamepadPairingDatabase"},
-            {3, nullptr, "GetPairedGamepadCount"},
-            {4, nullptr, "EnableRadio"},
-            {5, nullptr, "DisableRadio"},
-            {6, nullptr, "GetRadioOnOff"},
-            {7, nullptr, "AcquireRadioEvent"},
-            {8, nullptr, "AcquireGamepadPairingEvent"},
-            {9, nullptr, "IsGamepadPairingStarted"},
+            {0, nullptr, "StartGamepadPairingImpl"},
+            {1, nullptr, "CancelGamepadPairingImpl"},
+            {2, nullptr, "ClearGamepadPairingDatabaseImpl"},
+            {3, nullptr, "GetPairedGamepadCountImpl"},
+            {4, nullptr, "EnableRadioImpl"},
+            {5, nullptr, "DisableRadioImpl"},
+            {6, nullptr, "GetRadioOnOffImpl"},
+            {7, nullptr, "AcquireRadioEventImpl"},
+            {8, nullptr, "AcquireGamepadPairingEventImpl"},
+            {9, nullptr, "IsGamepadPairingStartedImpl"},
        };
        // clang-format on

@@ -251,7 +209,7 @@ public:
    explicit BTM_SYS() : ServiceFramework{"btm:sys"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, &BTM_SYS::GetCore, "GetCore"},
+            {0, &BTM_SYS::GetCoreImpl, "GetCoreImpl"},
        };
        // clang-format on

@@ -259,7 +217,7 @@ public:
    }

 private:
-    void GetCore(Kernel::HLERequestContext& ctx) {
+    void GetCoreImpl(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_BTM, "called");

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -627,8 +627,8 @@ private:
 FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
    // clang-format off
    static const FunctionInfo functions[] = {
-        {0, nullptr, "OpenFileSystem"},
-        {1, &FSP_SRV::SetCurrentProcess, "SetCurrentProcess"},
+        {0, nullptr, "MountContent"},
+        {1, &FSP_SRV::Initialize, "Initialize"},
        {2, nullptr, "OpenDataFileSystemByCurrentProcess"},
        {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"},
        {8, nullptr, "OpenFileSystemWithId"},
@@ -637,10 +637,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {12, nullptr, "OpenBisStorage"},
        {13, nullptr, "InvalidateBisCache"},
        {17, nullptr, "OpenHostFileSystem"},
-        {18, &FSP_SRV::OpenSdCardFileSystem, "OpenSdCardFileSystem"},
+        {18, &FSP_SRV::MountSdCard, "MountSdCard"},
        {19, nullptr, "FormatSdCardFileSystem"},
        {21, nullptr, "DeleteSaveDataFileSystem"},
-        {22, &FSP_SRV::CreateSaveDataFileSystem, "CreateSaveDataFileSystem"},
+        {22, &FSP_SRV::CreateSaveData, "CreateSaveData"},
        {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"},
        {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"},
        {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"},
@@ -652,8 +652,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {32, nullptr, "ExtendSaveDataFileSystem"},
        {33, nullptr, "DeleteCacheStorage"},
        {34, nullptr, "GetCacheStorageSize"},
-        {35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
-        {51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
+        {51, &FSP_SRV::MountSaveData, "MountSaveData"},
        {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"},
        {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"},
        {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"},
@@ -665,26 +664,21 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {64, nullptr, "OpenSaveDataInternalStorageFileSystem"},
        {65, nullptr, "UpdateSaveDataMacForDebug"},
        {66, nullptr, "WriteSaveDataFileSystemExtraData2"},
-        {67, nullptr, "FindSaveDataWithFilter"},
-        {68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"},
        {80, nullptr, "OpenSaveDataMetaFile"},
        {81, nullptr, "OpenSaveDataTransferManager"},
        {82, nullptr, "OpenSaveDataTransferManagerVersion2"},
        {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"},
-        {84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"},
        {100, nullptr, "OpenImageDirectoryFileSystem"},
        {110, nullptr, "OpenContentStorageFileSystem"},
-        {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
        {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"},
        {201, nullptr, "OpenDataStorageByProgramId"},
        {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
-        {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
+        {203, &FSP_SRV::OpenRomStorage, "OpenRomStorage"},
        {400, nullptr, "OpenDeviceOperator"},
        {500, nullptr, "OpenSdCardDetectionEventNotifier"},
        {501, nullptr, "OpenGameCardDetectionEventNotifier"},
        {510, nullptr, "OpenSystemDataUpdateEventNotifier"},
        {511, nullptr, "NotifySystemDataUpdateEvent"},
-        {520, nullptr, "SimulateGameCardDetectionEvent"},
        {600, nullptr, "SetCurrentPosixTime"},
        {601, nullptr, "QuerySaveDataTotalSize"},
        {602, nullptr, "VerifySaveDataFileSystem"},
@@ -723,8 +717,6 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {1008, nullptr, "OpenRegisteredUpdatePartition"},
        {1009, nullptr, "GetAndClearMemoryReportInfo"},
        {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
-        {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
-        {1200, nullptr, "OpenMultiCommitManager"},
    };
    // clang-format on
    RegisterHandlers(functions);
@@ -732,7 +724,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {

 FSP_SRV::~FSP_SRV() = default;

-void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_FS, "(STUBBED) called");

    IPC::ResponseBuilder rb{ctx, 2};
@@ -751,7 +743,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
    rb.Push(ResultCode(-1));
 }

-void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_FS, "called");

    IFileSystem filesystem(OpenSDMC().Unwrap());
@@ -761,7 +753,7 @@ void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<IFileSystem>(std::move(filesystem));
 }

-void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};

    auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
@@ -775,7 +767,7 @@ void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

-void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};

    auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
@@ -801,7 +793,7 @@ void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {

 void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem");
-    OpenSaveDataFileSystem(ctx);
+    MountSaveData(ctx);
 }

 void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) {
@@ -889,7 +881,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<IStorage>(std::move(storage));
 }

-void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
+void FSP_SRV::OpenRomStorage(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};

    auto storage_id = rp.PopRaw<FileSys::StorageId>();
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -19,17 +19,17 @@ public:
    ~FSP_SRV() override;

 private:
-    void SetCurrentProcess(Kernel::HLERequestContext& ctx);
+    void Initialize(Kernel::HLERequestContext& ctx);
    void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx);
-    void OpenSdCardFileSystem(Kernel::HLERequestContext& ctx);
-    void CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx);
-    void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
+    void MountSdCard(Kernel::HLERequestContext& ctx);
+    void CreateSaveData(Kernel::HLERequestContext& ctx);
+    void MountSaveData(Kernel::HLERequestContext& ctx);
    void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
    void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
    void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
    void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
    void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
-    void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
+    void OpenRomStorage(Kernel::HLERequestContext& ctx);

    FileSys::VirtualFile romfs;
 };
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -40,10 +40,10 @@ public:
            {6, nullptr, "CloseContentStorageForcibly"},
            {7, nullptr, "CloseContentMetaDatabaseForcibly"},
            {8, nullptr, "CleanupContentMetaDatabase"},
-            {9, nullptr, "ActivateContentStorage"},
-            {10, nullptr, "InactivateContentStorage"},
-            {11, nullptr, "ActivateContentMetaDatabase"},
-            {12, nullptr, "InactivateContentMetaDatabase"},
+            {9, nullptr, "OpenContentStorage2"},
+            {10, nullptr, "CloseContentStorage"},
+            {11, nullptr, "OpenContentMetaDatabase2"},
+            {12, nullptr, "CloseContentMetaDatabase"},
        };
        // clang-format on

--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -43,7 +43,7 @@ public:
            {11, nullptr, "CalculateApplicationOccupiedSize"},
            {16, nullptr, "PushApplicationRecord"},
            {17, nullptr, "ListApplicationRecordContentMeta"},
-            {19, nullptr, "LaunchApplicationOld"},
+            {19, nullptr, "LaunchApplication"},
            {21, nullptr, "GetApplicationContentPath"},
            {22, nullptr, "TerminateApplication"},
            {23, nullptr, "ResolveApplicationContentPath"},
@@ -96,10 +96,10 @@ public:
            {86, nullptr, "EnableApplicationCrashReport"},
            {87, nullptr, "IsApplicationCrashReportEnabled"},
            {90, nullptr, "BoostSystemMemoryResourceLimit"},
-            {91, nullptr, "DeprecatedLaunchApplication"},
-            {92, nullptr, "GetRunningApplicationProgramId"},
+            {91, nullptr, "Unknown1"},
+            {92, nullptr, "Unknown2"},
            {93, nullptr, "GetMainApplicationProgramIndex"},
-            {94, nullptr, "LaunchApplication"},
+            {94, nullptr, "LaunchApplication2"},
            {95, nullptr, "GetApplicationLaunchInfo"},
            {96, nullptr, "AcquireApplicationLaunchInfo"},
            {97, nullptr, "GetMainApplicationProgramIndex2"},
@@ -163,7 +163,7 @@ public:
            {907, nullptr, "WithdrawApplicationUpdateRequest"},
            {908, nullptr, "ListApplicationRecordInstalledContentMeta"},
            {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"},
-            {910, nullptr, "HasApplicationRecord"},
+            {910, nullptr, "Unknown3"},
            {911, nullptr, "SetPreInstalledApplication"},
            {912, nullptr, "ClearPreInstalledApplicationFlag"},
            {1000, nullptr, "RequestVerifyApplicationDeprecated"},
@@ -219,10 +219,10 @@ public:
            {2015, nullptr, "CompareSystemDeliveryInfo"},
            {2016, nullptr, "ListNotCommittedContentMeta"},
            {2017, nullptr, "CreateDownloadTask"},
-            {2018, nullptr, "GetApplicationDeliveryInfoHash"},
-            {2050, nullptr, "GetApplicationRightsOnClient"},
-            {2100, nullptr, "GetApplicationTerminateResult"},
-            {2101, nullptr, "GetRawApplicationTerminateResult"},
+            {2018, nullptr, "Unknown4"},
+            {2050, nullptr, "Unknown5"},
+            {2100, nullptr, "Unknown6"},
+            {2101, nullptr, "Unknown7"},
            {2150, nullptr, "CreateRightsEnvironment"},
            {2151, nullptr, "DestroyRightsEnvironment"},
            {2152, nullptr, "ActivateRightsEnvironment"},
@@ -237,10 +237,10 @@ public:
            {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"},
            {2190, nullptr, "GetRightsEnvironmentHandleForApplication"},
            {2199, nullptr, "GetRightsEnvironmentCountForDebug"},
-            {2200, nullptr, "GetGameCardApplicationCopyIdentifier"},
-            {2201, nullptr, "GetInstalledApplicationCopyIdentifier"},
-            {2250, nullptr, "RequestReportActiveELicence"},
-            {2300, nullptr, "ListEventLog"},
+            {2200, nullptr, "Unknown8"},
+            {2201, nullptr, "Unknown9"},
+            {2250, nullptr, "Unknown10"},
+            {2300, nullptr, "Unknown11"},
        };
        // clang-format on

@@ -355,7 +355,6 @@ public:
        static const FunctionInfo functions[] = {
            {21, nullptr, "GetApplicationContentPath"},
            {23, nullptr, "ResolveApplicationContentPath"},
-            {93, nullptr, "GetRunningApplicationProgramId"},
        };
        // clang-format on

@@ -390,11 +389,6 @@ public:
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "RequestLinkDevice"},
-            {1, nullptr, "RequestCleanupAllPreInstalledApplications"},
-            {2, nullptr, "RequestCleanupPreInstalledApplication"},
-            {3, nullptr, "RequestSyncRights"},
-            {4, nullptr, "RequestUnlinkDevice"},
-            {5, nullptr, "RequestRevokeAllELicense"},
        };
        // clang-format on

@@ -409,7 +403,7 @@ public:
        static const FunctionInfo functions[] = {
            {100, nullptr, "ResetToFactorySettings"},
            {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
-            {102, nullptr, "ResetToFactorySettingsForRefurbishment"},
+            {102, nullptr, "ResetToFactorySettingsForRefurbishment "},
        };
        // clang-format on

--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3

    auto& instance = Core::System::GetInstance();
    instance.GetPerfStats().EndGameFrame();
-    instance.Renderer().SwapBuffers(framebuffer);
+    instance.GPU().SwapBuffers(framebuffer);
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -178,7 +178,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    auto& gpu = system_instance.GPU();
    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
    ASSERT(cpu_addr);
-    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+    gpu.FlushRegion(*cpu_addr, itr->second.size);
+    gpu.InvalidateRegion(*cpu_addr, itr->second.size);

    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
    return 0;
 }

-static void PushGPUEntries(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
-    dma_pusher.Push(std::move(entries));
-    dma_pusher.DispatchCalls();
-}
-
 u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
        UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
                params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
    Memory::ReadBlock(params.address, entries.data(),
                      params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <optional>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
@@ -21,6 +22,7 @@
 #include "core/hle/service/nvflinger/nvflinger.h"
 #include "core/perf_stats.h"
 #include "video_core/renderer_base.h"
+#include "video_core/video_core.h"

 namespace Service::NVFlinger {

@@ -28,6 +30,12 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

 NVFlinger::NVFlinger() {
+    // Add the different displays to the list of displays.
+    displays.emplace_back(0, "Default");
+    displays.emplace_back(1, "External");
+    displays.emplace_back(2, "Edid");
+    displays.emplace_back(3, "Internal");
+
    // Schedule the screen composition events
    composition_event =
        CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -46,120 +54,66 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
    nvdrv = std::move(instance);
 }

-std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
-    LOG_DEBUG(Service, "Opening \"{}\" display", name);
+u64 NVFlinger::OpenDisplay(std::string_view name) {
+    LOG_WARNING(Service, "Opening display {}", name);

    // TODO(Subv): Currently we only support the Default display.
    ASSERT(name == "Default");

-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.name == name; });
-    if (itr == displays.end()) {
-        return {};
-    }
+    auto itr = std::find_if(displays.begin(), displays.end(),
+                            [&](const Display& display) { return display.name == name; });
+
+    ASSERT(itr != displays.end());

    return itr->id;
 }

-std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
-    auto* const display = FindDisplay(display_id);
+u64 NVFlinger::CreateLayer(u64 display_id) {
+    auto& display = GetDisplay(display_id);

-    if (display == nullptr) {
-        return {};
-    }
+    ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment");

-    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
-
-    const u64 layer_id = next_layer_id++;
-    const u32 buffer_queue_id = next_buffer_queue_id++;
+    u64 layer_id = next_layer_id++;
+    u32 buffer_queue_id = next_buffer_queue_id++;
    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
-    display->layers.emplace_back(layer_id, buffer_queue);
+    display.layers.emplace_back(layer_id, buffer_queue);
    buffer_queues.emplace_back(std::move(buffer_queue));
    return layer_id;
 }

-std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
-    const auto* const layer = FindLayer(display_id, layer_id);
-
-    if (layer == nullptr) {
-        return {};
-    }
-
-    return layer->buffer_queue->GetId();
+u32 NVFlinger::GetBufferQueueId(u64 display_id, u64 layer_id) {
+    const auto& layer = GetLayer(display_id, layer_id);
+    return layer.buffer_queue->GetId();
 }

-Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
-    auto* const display = FindDisplay(display_id);
-
-    if (display == nullptr) {
-        return nullptr;
-    }
-
-    return display->vsync_event.readable;
+Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) {
+    return GetDisplay(display_id).vsync_event.readable;
 }

-std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
-    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [&](const auto& queue) { return queue->GetId() == id; });
+std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const {
+    auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
+                            [&](const auto& queue) { return queue->GetId() == id; });

    ASSERT(itr != buffer_queues.end());
    return *itr;
 }

-Display* NVFlinger::FindDisplay(u64 display_id) {
-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.id == display_id; });
+Display& NVFlinger::GetDisplay(u64 display_id) {
+    auto itr = std::find_if(displays.begin(), displays.end(),
+                            [&](const Display& display) { return display.id == display_id; });

-    if (itr == displays.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    ASSERT(itr != displays.end());
+    return *itr;
 }

-const Display* NVFlinger::FindDisplay(u64 display_id) const {
-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.id == display_id; });
+Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) {
+    auto& display = GetDisplay(display_id);

-    if (itr == displays.end()) {
-        return nullptr;
-    }
+    auto itr = std::find_if(display.layers.begin(), display.layers.end(),
+                            [&](const Layer& layer) { return layer.id == layer_id; });

-    return &*itr;
-}
-
-Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
-    auto* const display = FindDisplay(display_id);
-
-    if (display == nullptr) {
-        return nullptr;
-    }
-
-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
-}
-
-const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
-    const auto* const display = FindDisplay(display_id);
-
-    if (display == nullptr) {
-        return nullptr;
-    }
-
-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    ASSERT(itr != display.layers.end());
+    return *itr;
 }

 void NVFlinger::Compose() {
@@ -187,11 +141,11 @@ void NVFlinger::Compose() {

            // There was no queued buffer to draw, render previous frame
            system_instance.GetPerfStats().EndGameFrame();
-            system_instance.Renderer().SwapBuffers({});
+            system_instance.GPU().SwapBuffers({});
            continue;
        }

-        const auto& igbp_buffer = buffer->get().igbp_buffer;
+        auto& igbp_buffer = buffer->get().igbp_buffer;

        // Now send the buffer to the GPU for drawing.
        // TODO(Subv): Support more than just disp0. The display device selection is probably based
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,9 +4,7 @@

 #pragma once

-#include <array>
 #include <memory>
-#include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -58,55 +56,35 @@ public:
    /// Sets the NVDrv module instance to use to send buffers to the GPU.
    void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);

-    /// Opens the specified display and returns the ID.
-    ///
-    /// If an invalid display name is provided, then an empty optional is returned.
-    std::optional<u64> OpenDisplay(std::string_view name);
+    /// Opens the specified display and returns the id.
+    u64 OpenDisplay(std::string_view name);

-    /// Creates a layer on the specified display and returns the layer ID.
-    ///
-    /// If an invalid display ID is specified, then an empty optional is returned.
-    std::optional<u64> CreateLayer(u64 display_id);
+    /// Creates a layer on the specified display and returns the layer id.
+    u64 CreateLayer(u64 display_id);

-    /// Finds the buffer queue ID of the specified layer in the specified display.
-    ///
-    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
-    std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
+    /// Gets the buffer queue id of the specified layer in the specified display.
+    u32 GetBufferQueueId(u64 display_id, u64 layer_id);

    /// Gets the vsync event for the specified display.
-    ///
-    /// If an invalid display ID is provided, then nullptr is returned.
-    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
+    Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id);

-    /// Obtains a buffer queue identified by the ID.
-    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
+    /// Obtains a buffer queue identified by the id.
+    std::shared_ptr<BufferQueue> GetBufferQueue(u32 id) const;

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
    void Compose();

 private:
-    /// Finds the display identified by the specified ID.
-    Display* FindDisplay(u64 display_id);
+    /// Returns the display identified by the specified id.
+    Display& GetDisplay(u64 display_id);

-    /// Finds the display identified by the specified ID.
-    const Display* FindDisplay(u64 display_id) const;
-
-    /// Finds the layer identified by the specified ID in the desired display.
-    Layer* FindLayer(u64 display_id, u64 layer_id);
-
-    /// Finds the layer identified by the specified ID in the desired display.
-    const Layer* FindLayer(u64 display_id, u64 layer_id) const;
+    /// Returns the layer identified by the specified id in the desired display.
+    Layer& GetLayer(u64 display_id, u64 layer_id);

    std::shared_ptr<Nvidia::Module> nvdrv;

-    std::array<Display, 5> displays{{
-        {0, "Default"},
-        {1, "External"},
-        {2, "Edid"},
-        {3, "Internal"},
-        {4, "Null"},
-    }};
+    std::vector<Display> displays;
    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -13,7 +13,7 @@ public:
    explicit BootMode() : ServiceFramework{"pm:bm"} {
        static const FunctionInfo functions[] = {
            {0, &BootMode::GetBootMode, "GetBootMode"},
-            {1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"},
+            {1, nullptr, "SetMaintenanceBoot"},
        };
        RegisterHandlers(functions);
    }
@@ -24,19 +24,8 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(boot_mode);
+        rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode
    }
-
-    void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Service_PM, "called");
-
-        boot_mode = SystemBootMode::Maintenance;
-
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_SUCCESS);
-    }
-
-    SystemBootMode boot_mode = SystemBootMode::Normal;
 };

 class DebugMonitor final : public ServiceFramework<DebugMonitor> {
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -9,12 +9,7 @@ class ServiceManager;
 }

 namespace Service::PM {
-
-enum class SystemBootMode {
-    Normal,
-    Maintenance,
-};
-
+enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 };
 /// Registers all PM services with the specified service manager.
 void InstallInterfaces(SM::ServiceManager& service_manager);

--- a/src/core/hle/service/psc/psc.cpp
+++ b/src/core/hle/service/psc/psc.cpp
@@ -17,13 +17,13 @@ public:
    explicit PSC_C() : ServiceFramework{"psc:c"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "Initialize"},
-            {1, nullptr, "DispatchRequest"},
-            {2, nullptr, "GetResult"},
-            {3, nullptr, "GetState"},
-            {4, nullptr, "Cancel"},
-            {5, nullptr, "PrintModuleInformation"},
-            {6, nullptr, "GetModuleInformation"},
+            {0, nullptr, "Unknown1"},
+            {1, nullptr, "Unknown2"},
+            {2, nullptr, "Unknown3"},
+            {3, nullptr, "Unknown4"},
+            {4, nullptr, "Unknown5"},
+            {5, nullptr, "Unknown6"},
+            {6, nullptr, "Unknown7"},
        };
        // clang-format on

@@ -39,8 +39,7 @@ public:
            {0, nullptr, "Initialize"},
            {1, nullptr, "GetRequest"},
            {2, nullptr, "Acknowledge"},
-            {3, nullptr, "Finalize"},
-            {4, nullptr, "AcknowledgeEx"},
+            {3, nullptr, "Unknown1"},
        };
        // clang-format on

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -34,7 +34,6 @@ namespace Service::VI {

 constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
 constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
-constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};

 struct DisplayInfo {
    /// The name of this particular display.
@@ -525,7 +524,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  static_cast<u32>(transaction), flags);

-        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        auto buffer_queue = nv_flinger->GetBufferQueue(id);

        if (transaction == TransactionId::Connect) {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -559,7 +558,7 @@ private:
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+                        auto buffer_queue = nv_flinger->GetBufferQueue(id);
                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
                        ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");

@@ -629,7 +628,7 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        const auto buffer_queue = nv_flinger->GetBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -705,14 +704,13 @@ private:
        rb.Push(RESULT_SUCCESS);
    }

-    // This function currently does nothing but return a success error code in
-    // the vi library itself, so do the same thing, but log out the passed in values.
    void SetLayerVisibility(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
        const u64 layer_id = rp.Pop<u64>();
        const bool visibility = rp.Pop<bool>();

-        LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility);
+        LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id,
+                    visibility);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -839,16 +837,11 @@ private:
                    "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
                    unknown, display, aruid);

-        const auto layer_id = nv_flinger->CreateLayer(display);
-        if (!layer_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
+        const u64 layer_id = nv_flinger->CreateLayer(display);

        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
-        rb.Push(*layer_id);
+        rb.Push(layer_id);
    }

    void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -956,16 +949,9 @@ private:

        ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");

-        const auto display_id = nv_flinger->OpenDisplay(name);
-        if (!display_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
-
        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u64>(*display_id);
+        rb.Push<u64>(nv_flinger->OpenDisplay(name));
    }

    void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1056,21 +1042,10 @@ private:

        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);

-        const auto display_id = nv_flinger->OpenDisplay(display_name);
-        if (!display_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
+        const u64 display_id = nv_flinger->OpenDisplay(display_name);
+        const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);

-        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
-        if (!buffer_queue_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
-
-        NativeWindow native_window{*buffer_queue_id};
+        NativeWindow native_window{buffer_queue_id};
        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1086,24 +1061,13 @@ private:

        // TODO(Subv): What's the difference between a Stray and a Managed layer?

-        const auto layer_id = nv_flinger->CreateLayer(display_id);
-        if (!layer_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
+        const u64 layer_id = nv_flinger->CreateLayer(display_id);
+        const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);

-        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
-        if (!buffer_queue_id) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
-
-        NativeWindow native_window{*buffer_queue_id};
+        NativeWindow native_window{buffer_queue_id};
        IPC::ResponseBuilder rb{ctx, 6};
        rb.Push(RESULT_SUCCESS);
-        rb.Push(*layer_id);
+        rb.Push(layer_id);
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
    }

@@ -1123,12 +1087,7 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);

-        const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
-        if (!vsync_event) {
-            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ERR_NOT_FOUND);
-            return;
-        }
+        const auto vsync_event = nv_flinger->GetVsyncEvent(display_id);

        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -178,8 +178,6 @@ public:

    /**
     * Get the banner (typically banner section) of the application
-     * In the context of NX, this is the animation that displays in the bottom right of the screen
-     * when a game boots. Stored in GIF format.
     * @param buffer Reference to buffer to store data
     * @return ResultStatus result of function
     */
@@ -189,8 +187,6 @@ public:

    /**
     * Get the logo (typically logo section) of the application
-     * In the context of NX, this is the static image that displays in the top left of the screen
-     * when a game boots. Stored in JPEG format.
     * @param buffer Reference to buffer to store data
     * @return ResultStatus result of function
     */
--- a/src/core/loader/nax.cpp
+++ b/src/core/loader/nax.cpp
@@ -79,13 +79,4 @@ u64 AppLoader_NAX::ReadRomFSIVFCOffset() const {
 ResultStatus AppLoader_NAX::ReadProgramId(u64& out_program_id) {
    return nca_loader->ReadProgramId(out_program_id);
 }
-
-ResultStatus AppLoader_NAX::ReadBanner(std::vector<u8>& buffer) {
-    return nca_loader->ReadBanner(buffer);
-}
-
-ResultStatus AppLoader_NAX::ReadLogo(std::vector<u8>& buffer) {
-    return nca_loader->ReadLogo(buffer);
-}
-
 } // namespace Loader
--- a/src/core/loader/nax.h
+++ b/src/core/loader/nax.h
@@ -39,9 +39,6 @@ public:
    u64 ReadRomFSIVFCOffset() const override;
    ResultStatus ReadProgramId(u64& out_program_id) override;

-    ResultStatus ReadBanner(std::vector<u8>& buffer) override;
-    ResultStatus ReadLogo(std::vector<u8>& buffer) override;
-
 private:
    std::unique_ptr<FileSys::NAX> nax;
    std::unique_ptr<AppLoader_NCA> nca_loader;
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -84,23 +84,4 @@ ResultStatus AppLoader_NCA::ReadProgramId(u64& out_program_id) {
    return ResultStatus::Success;
 }

-ResultStatus AppLoader_NCA::ReadBanner(std::vector<u8>& buffer) {
-    if (nca == nullptr || nca->GetStatus() != ResultStatus::Success)
-        return ResultStatus::ErrorNotInitialized;
-    const auto logo = nca->GetLogoPartition();
-    if (logo == nullptr)
-        return ResultStatus::ErrorNoIcon;
-    buffer = logo->GetFile("StartupMovie.gif")->ReadAllBytes();
-    return ResultStatus::Success;
-}
-
-ResultStatus AppLoader_NCA::ReadLogo(std::vector<u8>& buffer) {
-    if (nca == nullptr || nca->GetStatus() != ResultStatus::Success)
-        return ResultStatus::ErrorNotInitialized;
-    const auto logo = nca->GetLogoPartition();
-    if (logo == nullptr)
-        return ResultStatus::ErrorNoIcon;
-    buffer = logo->GetFile("NintendoLogo.png")->ReadAllBytes();
-    return ResultStatus::Success;
-}
 } // namespace Loader
--- a/src/core/loader/nca.h
+++ b/src/core/loader/nca.h
@@ -39,9 +39,6 @@ public:
    u64 ReadRomFSIVFCOffset() const override;
    ResultStatus ReadProgramId(u64& out_program_id) override;

-    ResultStatus ReadBanner(std::vector<u8>& buffer) override;
-    ResultStatus ReadLogo(std::vector<u8>& buffer) override;
-
 private:
    std::unique_ptr<FileSys::NCA> nca;
    std::unique_ptr<AppLoader_DeconstructedRomDirectory> directory_loader;
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -166,13 +166,4 @@ ResultStatus AppLoader_NSP::ReadManualRomFS(FileSys::VirtualFile& file) {
    file = nca->GetRomFS();
    return file == nullptr ? ResultStatus::ErrorNoRomFS : ResultStatus::Success;
 }
-
-ResultStatus AppLoader_NSP::ReadBanner(std::vector<u8>& buffer) {
-    return secondary_loader->ReadBanner(buffer);
-}
-
-ResultStatus AppLoader_NSP::ReadLogo(std::vector<u8>& buffer) {
-    return secondary_loader->ReadLogo(buffer);
-}
-
 } // namespace Loader
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -46,9 +46,6 @@ public:
    ResultStatus ReadControlData(FileSys::NACP& nacp) override;
    ResultStatus ReadManualRomFS(FileSys::VirtualFile& file) override;

-    ResultStatus ReadBanner(std::vector<u8>& buffer) override;
-    ResultStatus ReadLogo(std::vector<u8>& buffer) override;
-
 private:
    std::unique_ptr<FileSys::NSP> nsp;
    std::unique_ptr<AppLoader> secondary_loader;
--- a/src/core/loader/xci.cpp
+++ b/src/core/loader/xci.cpp
@@ -137,12 +137,4 @@ ResultStatus AppLoader_XCI::ReadManualRomFS(FileSys::VirtualFile& file) {
    return file == nullptr ? ResultStatus::ErrorNoRomFS : ResultStatus::Success;
 }

-ResultStatus AppLoader_XCI::ReadBanner(std::vector<u8>& buffer) {
-    return nca_loader->ReadBanner(buffer);
-}
-
-ResultStatus AppLoader_XCI::ReadLogo(std::vector<u8>& buffer) {
-    return nca_loader->ReadLogo(buffer);
-}
-
 } // namespace Loader
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -46,9 +46,6 @@ public:
    ResultStatus ReadControlData(FileSys::NACP& control) override;
    ResultStatus ReadManualRomFS(FileSys::VirtualFile& file) override;

-    ResultStatus ReadBanner(std::vector<u8>& buffer) override;
-    ResultStatus ReadLogo(std::vector<u8>& buffer) override;
-
 private:
    std::unique_ptr<FileSys::XCI> xci;
    std::unique_ptr<AppLoader_NCA> nca_loader;
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -166,9 +166,6 @@ T Read(const VAddr vaddr) {
        return value;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
    case PageType::Unmapped:
@@ -199,9 +196,6 @@ void Write(const VAddr vaddr, const T data) {
        return;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
    case PageType::Unmapped:
@@ -357,16 +351,17 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
        const VAddr overlap_end = std::min(end, region_end);
        const VAddr overlap_size = overlap_end - overlap_start;

-        auto& rasterizer = system_instance.Renderer().Rasterizer();
+        auto& gpu = system_instance.GPU();
        switch (mode) {
        case FlushMode::Flush:
-            rasterizer.FlushRegion(overlap_start, overlap_size);
+            gpu.FlushRegion(overlap_start, overlap_size);
            break;
        case FlushMode::Invalidate:
-            rasterizer.InvalidateRegion(overlap_start, overlap_size);
+            gpu.InvalidateRegion(overlap_start, overlap_size);
            break;
        case FlushMode::FlushAndInvalidate:
-            rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
+            gpu.FlushRegion(overlap_start, overlap_size);
+            gpu.InvalidateRegion(overlap_start, overlap_size);
            break;
        }
    };
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -74,33 +74,4 @@ void Apply() {
    Service::HID::ReloadInputDevices();
 }

-template <typename T>
-void LogSetting(const std::string& name, const T& value) {
-    LOG_INFO(Config, "{}: {}", name, value);
-}
-
-void LogSettings() {
-    LOG_INFO(Config, "yuzu Configuration:");
-    LogSetting("System_UseDockedMode", Settings::values.use_docked_mode);
-    LogSetting("System_EnableNfc", Settings::values.enable_nfc);
-    LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
-    LogSetting("System_CurrentUser", Settings::values.current_user);
-    LogSetting("System_LanguageIndex", Settings::values.language_index);
-    LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
-    LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
-    LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
-    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
-    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
-    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
-    LogSetting("Audio_OutputEngine", Settings::values.sink_id);
-    LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
-    LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
-    LogSetting("DataStorage_UseVirtualSd", Settings::values.use_virtual_sd);
-    LogSetting("DataStorage_NandDir", Settings::values.nand_dir);
-    LogSetting("DataStorage_SdmcDir", Settings::values.sdmc_dir);
-    LogSetting("Debugging_UseGdbstub", Settings::values.use_gdbstub);
-    LogSetting("Debugging_GdbstubPort", Settings::values.gdbstub_port);
-    LogSetting("Debugging_ProgramArgs", Settings::values.program_args);
-}
-
 } // namespace Settings
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -392,6 +392,7 @@ struct Values {
    bool use_frame_limit;
    u16 frame_limit;
    bool use_accurate_gpu_emulation;
+    bool use_asynchronous_gpu_emulation;

    float bg_red;
    float bg_green;
@@ -425,5 +426,4 @@ struct Values {
 } extern values;

 void Apply();
-void LogSettings();
 } // namespace Settings
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -160,6 +160,8 @@ TelemetrySession::TelemetrySession() {
    AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
             Settings::values.use_accurate_gpu_emulation);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
+             Settings::values.use_asynchronous_gpu_emulation);
    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
             Settings::values.use_docked_mode);
 }
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,8 @@ add_library(video_core STATIC
    engines/shader_header.h
    gpu.cpp
    gpu.h
+    gpu_thread.cpp
+    gpu_thread.h
    macro_interpreter.cpp
    macro_interpreter.h
    memory_manager.cpp
@@ -59,35 +61,6 @@ add_library(video_core STATIC
    renderer_opengl/renderer_opengl.h
    renderer_opengl/utils.cpp
    renderer_opengl/utils.h
-    shader/decode/arithmetic.cpp
-    shader/decode/arithmetic_immediate.cpp
-    shader/decode/bfe.cpp
-    shader/decode/bfi.cpp
-    shader/decode/shift.cpp
-    shader/decode/arithmetic_integer.cpp
-    shader/decode/arithmetic_integer_immediate.cpp
-    shader/decode/arithmetic_half.cpp
-    shader/decode/arithmetic_half_immediate.cpp
-    shader/decode/ffma.cpp
-    shader/decode/hfma2.cpp
-    shader/decode/conversion.cpp
-    shader/decode/memory.cpp
-    shader/decode/float_set_predicate.cpp
-    shader/decode/integer_set_predicate.cpp
-    shader/decode/half_set_predicate.cpp
-    shader/decode/predicate_set_register.cpp
-    shader/decode/predicate_set_predicate.cpp
-    shader/decode/register_set_predicate.cpp
-    shader/decode/float_set.cpp
-    shader/decode/integer_set.cpp
-    shader/decode/half_set.cpp
-    shader/decode/video.cpp
-    shader/decode/xmad.cpp
-    shader/decode/other.cpp
-    shader/decode.cpp
-    shader/shader_ir.cpp
-    shader/shader_ir.h
-    shader/track.cpp
    surface.cpp
    surface.h
    textures/astc.cpp
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -17,6 +17,13 @@ DmaPusher::~DmaPusher() = default;

 MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));

+void DmaPusher::QueuePendingCalls() {
+    for (auto& entry : dma_writebuffer) {
+        dma_readbuffer.push(std::move(entry));
+    }
+    dma_writebuffer.clear();
+}
+
 void DmaPusher::DispatchCalls() {
    MICROPROFILE_SCOPE(DispatchCalls);

@@ -89,9 +96,9 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    } else if (ib_enable && !dma_pushbuffer.empty()) {
+    } else if (ib_enable && !dma_readbuffer.empty()) {
        // Current pushbuffer empty, but we have more IB entries to read
-        const CommandList& command_list{dma_pushbuffer.front()};
+        const CommandList& command_list{dma_readbuffer.front()};
        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
        dma_get = command_list_header.addr;
        dma_put = dma_get + command_list_header.size * sizeof(u32);
@@ -99,7 +106,7 @@ bool DmaPusher::Step() {

        if (dma_pushbuffer_subindex >= command_list.size()) {
            // We've gone through the current list, remove it from the queue
-            dma_pushbuffer.pop();
+            dma_readbuffer.pop();
            dma_pushbuffer_subindex = 0;
        }
    } else {
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -61,9 +61,10 @@ public:
    ~DmaPusher();

    void Push(CommandList&& entries) {
-        dma_pushbuffer.push(std::move(entries));
+        dma_writebuffer.push_back(std::move(entries));
    }

+    void QueuePendingCalls();
    void DispatchCalls();

 private:
@@ -75,15 +76,16 @@ private:

    GPU& gpu;

-    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
-    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer
+    std::vector<CommandList> dma_writebuffer;
+    std::queue<CommandList> dma_readbuffer;
+    std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer

    struct DmaState {
        u32 method;            ///< Current method
        u32 subchannel;        ///< Current subchannel
        u32 method_count;      ///< Current method count
        u32 length_pending;    ///< Large NI command length pending
-        bool non_incrementing; ///< Current command's NI flag
+        bool non_incrementing; ///< Current command<EFBFBD>s NI flag
    };

    DmaState dma_state{};
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -46,7 +46,7 @@ void KeplerMemory::ProcessData(u32 data) {
    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
    // We do this before actually writing the new data because the destination address might contain
    // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(dest_address, sizeof(u32));
+    Core::System::GetInstance().GPU().InvalidateRegion(dest_address, sizeof(u32));

    Memory::Write32(dest_address, data);
    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -37,7 +37,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
        regs.viewports[viewport].depth_range_near = 0.0f;
        regs.viewports[viewport].depth_range_far = 1.0f;
    }
-
    // Doom and Bomberman seems to use the uninitialized registers and just enable blend
    // so initialize blend registers with sane values
    regs.blend.equation_rgb = Regs::Blend::Equation::Add;
@@ -67,7 +66,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
    regs.stencil_back_func_func = Regs::ComparisonOp::Always;
    regs.stencil_back_func_mask = 0xFFFFFFFF;
    regs.stencil_back_mask = 0xFFFFFFFF;
-
    // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
    // register carrying a default value. Assume it's OpenGL's default (1).
    regs.point_size = 1.0f;
@@ -80,9 +78,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
        regs.color_mask[color_mask].B.Assign(1);
        regs.color_mask[color_mask].A.Assign(1);
    }
-
-    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
-    regs.rt_separate_frag_data = 1;
 }

 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
@@ -140,25 +135,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {

    if (regs.reg_array[method_call.method] != method_call.argument) {
        regs.reg_array[method_call.method] = method_call.argument;
-        // Color buffers
-        constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
-        constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method_call.method >= first_rt_reg &&
-            method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
-        }
-
-        // Zeta buffer
-        constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
-            dirty_flags.zeta_buffer = true;
-        }
-
        // Shader
        constexpr u32 shader_registers_count =
            sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1089,17 +1089,12 @@ public:
    MemoryManager& memory_manager;

    struct DirtyFlags {
-        u8 color_buffer = 0xFF;
-        bool zeta_buffer = true;
-
        bool shaders = true;

        bool vertex_attrib_format = true;
        u32 vertex_array = 0xFFFFFFFF;

        void OnMemoryWrite() {
-            color_buffer = 0xFF;
-            zeta_buffer = true;
            shaders = true;
            vertex_array = 0xFFFFFFFF;
        }
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -87,12 +87,12 @@ void MaxwellDMA::HandleCopy() {
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        rasterizer.FlushRegion(source_cpu, src_size);
+        Core::System::GetInstance().GPU().FlushRegion(source_cpu, src_size);

        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu, dst_size);
+        Core::System::GetInstance().GPU().InvalidateRegion(dest_cpu, dst_size);
    };

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -208,8 +208,6 @@ enum class UniformType : u64 {
    SignedShort = 3,
    Single = 4,
    Double = 5,
-    Quad = 6,
-    UnsignedQuad = 7,
 };

 enum class StoreType : u64 {
@@ -217,9 +215,9 @@ enum class StoreType : u64 {
    Signed8 = 1,
    Unsigned16 = 2,
    Signed16 = 3,
-    Bits32 = 4,
-    Bits64 = 5,
-    Bits128 = 6,
+    Bytes32 = 4,
+    Bytes64 = 5,
+    Bytes128 = 6,
 };

 enum class IMinMaxExchange : u64 {
@@ -399,10 +397,6 @@ struct IpaMode {
    bool operator!=(const IpaMode& a) const {
        return !operator==(a);
    }
-    bool operator<(const IpaMode& a) const {
-        return std::tie(interpolation_mode, sampling_mode) <
-               std::tie(a.interpolation_mode, a.sampling_mode);
-    }
 };

 enum class SystemVariable : u64 {
@@ -650,7 +644,6 @@ union Instruction {
            BitField<37, 2, HalfPrecision> precision;
            BitField<32, 1, u64> saturate;

-            BitField<31, 1, u64> negate_b;
            BitField<30, 1, u64> negate_c;
            BitField<35, 2, HalfType> type_c;
        } rr;
@@ -786,12 +779,6 @@ union Instruction {
        BitField<44, 2, u64> unknown;
    } st_l;

-    union {
-        BitField<48, 3, UniformType> type;
-        BitField<46, 2, u64> cache_mode;
-        BitField<20, 24, s64> immediate_offset;
-    } ldg;
-
    union {
        BitField<0, 3, u64> pred0;
        BitField<3, 3, u64> pred3;
@@ -981,10 +968,6 @@ union Instruction {
            }
            return false;
        }
-
-        bool IsComponentEnabled(std::size_t component) const {
-            return ((1ULL << component) & component_mask) != 0;
-        }
    } txq;

    union {
@@ -1252,19 +1235,11 @@ union Instruction {
    union {
        BitField<20, 14, u64> offset;
        BitField<34, 5, u64> index;
-
-        u64 GetOffset() const {
-            return offset * 4;
-        }
    } cbuf34;

    union {
        BitField<20, 16, s64> offset;
        BitField<36, 5, u64> index;
-
-        s64 GetOffset() const {
-            return offset;
-        }
    } cbuf36;

    // Unsure about the size of this one.
@@ -1456,7 +1431,6 @@ public:
        PredicateSetRegister,
        RegisterSetPredicate,
        Conversion,
-        Video,
        Xmad,
        Unknown,
    };
@@ -1588,8 +1562,8 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
-            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
-            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
+            INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
+            INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -106,7 +106,7 @@ struct Header {
        } ps;
    };

-    u64 GetLocalMemorySize() const {
+    u64 GetLocalMemorySize() {
        return (common1.shader_local_memory_low_size |
                (common2.shader_local_memory_high_size << 24));
    }
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,15 +3,15 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "core/core_timing.h"
-#include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"

 namespace Tegra {

@@ -26,7 +26,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    UNREACHABLE();
 }

-GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(VideoCore::RendererBase& renderer) : renderer{renderer} {
+    auto& rasterizer{renderer.Rasterizer()};
    memory_manager = std::make_unique<Tegra::MemoryManager>();
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
@@ -34,6 +35,10 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
+
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread = std::make_unique<VideoCore::GPUThread>(renderer, *dma_pusher);
+    }
 }

 GPU::~GPU() = default;
@@ -62,6 +67,41 @@ const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

+void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->PushGPUEntries(std::move(entries));
+    } else {
+        dma_pusher->Push(std::move(entries));
+        dma_pusher->QueuePendingCalls();
+        dma_pusher->DispatchCalls();
+    }
+}
+
+void GPU::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->SwapBuffers(std::move(framebuffer));
+    } else {
+        renderer.SwapBuffers(std::move(framebuffer));
+    }
+}
+
+void GPU::FlushRegion(VAddr addr, u64 size) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->FlushRegion(addr, size);
+    } else {
+        renderer.Rasterizer().FlushRegion(addr, size);
+    }
+}
+
+void GPU::InvalidateRegion(VAddr addr, u64 size) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->InvalidateRegion(addr, size);
+    } else {
+        renderer.Rasterizer().InvalidateRegion(addr, size);
+    }
+}
+
 u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
    ASSERT(format != RenderTargetFormat::NONE);

@@ -126,36 +166,9 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
    }
 }

-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
 enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    Unk2c = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    Unk70 = 0x1C,
-    Unk74 = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};
-
-enum class GpuSemaphoreOperation {
-    AcquireEqual = 0x1,
-    WriteLong = 0x2,
-    AcquireGequal = 0x4,
-    AcquireMask = 0x8,
+    BindObject = 0,
+    CountBufferMethods = 0x40,
 };

 void GPU::CallMethod(const MethodCall& method_call) {
@@ -164,78 +177,20 @@ void GPU::CallMethod(const MethodCall& method_call) {

    ASSERT(method_call.subchannel < bound_engines.size());

-    if (ExecuteMethodOnEngine(method_call)) {
-        CallEngineMethod(method_call);
-    } else {
-        CallPullerMethod(method_call);
+    if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) {
+        // Bind the current subchannel to the desired engine id.
+        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+                  method_call.argument);
+        bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
+        return;
    }
-}

-bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
-    const auto method = static_cast<BufferMethods>(method_call.method);
-    return method >= BufferMethods::NonPullerMethods;
-}
+    if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+        // TODO(Subv): Research and implement these methods.
+        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+        return;
+    }

-void GPU::CallPullerMethod(const MethodCall& method_call) {
-    regs.reg_array[method_call.method] = method_call.argument;
-    const auto method = static_cast<BufferMethods>(method_call.method);
-
-    switch (method) {
-    case BufferMethods::BindObject: {
-        ProcessBindMethod(method_call);
-        break;
-    }
-    case BufferMethods::Nop:
-    case BufferMethods::SemaphoreAddressHigh:
-    case BufferMethods::SemaphoreAddressLow:
-    case BufferMethods::SemaphoreSequence:
-    case BufferMethods::RefCnt:
-        break;
-    case BufferMethods::SemaphoreTrigger: {
-        ProcessSemaphoreTriggerMethod();
-        break;
-    }
-    case BufferMethods::NotifyIntr: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
-        break;
-    }
-    case BufferMethods::WrcacheFlush: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
-        break;
-    }
-    case BufferMethods::Unk28: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
-        break;
-    }
-    case BufferMethods::Unk2c: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
-        break;
-    }
-    case BufferMethods::SemaphoreAcquire: {
-        ProcessSemaphoreAcquire();
-        break;
-    }
-    case BufferMethods::SemaphoreRelease: {
-        ProcessSemaphoreRelease();
-        break;
-    }
-    case BufferMethods::Yield: {
-        // TODO(Kmather73): Research and implement this method.
-        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
-        break;
-    }
-    default:
-        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
-                  static_cast<u32>(method));
-        break;
-    }
-}
-
-void GPU::CallEngineMethod(const MethodCall& method_call) {
    const EngineID engine = bound_engines[method_call.subchannel];

    switch (engine) {
@@ -259,76 +214,4 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
    }
 }

-void GPU::ProcessBindMethod(const MethodCall& method_call) {
-    // Bind the current subchannel to the desired engine id.
-    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-              method_call.argument);
-    bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
-}
-
-void GPU::ProcessSemaphoreTriggerMethod() {
-    const auto semaphoreOperationMask = 0xF;
-    const auto op =
-        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
-    if (op == GpuSemaphoreOperation::WriteLong) {
-        auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-        struct Block {
-            u32 sequence;
-            u32 zeros = 0;
-            u64 timestamp;
-        };
-
-        Block block{};
-        block.sequence = regs.semaphore_sequence;
-        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
-        // CoreTiming
-        block.timestamp = CoreTiming::GetTicks();
-        Memory::WriteBlock(*address, &block, sizeof(block));
-    } else {
-        const auto address =
-            memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-        const u32 word = Memory::Read32(*address);
-        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
-            (op == GpuSemaphoreOperation::AcquireGequal &&
-             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
-            (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
-            // Nothing to do in this case
-        } else {
-            regs.acquire_source = true;
-            regs.acquire_value = regs.semaphore_sequence;
-            if (op == GpuSemaphoreOperation::AcquireEqual) {
-                regs.acquire_active = true;
-                regs.acquire_mode = false;
-            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
-                regs.acquire_active = true;
-                regs.acquire_mode = true;
-            } else if (op == GpuSemaphoreOperation::AcquireMask) {
-                // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
-                // semaphore_sequence, gives a non-0 result
-                LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
-            } else {
-                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
-            }
-        }
-    }
-}
-
-void GPU::ProcessSemaphoreRelease() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    Memory::Write32(*address, regs.semaphore_release);
-}
-
-void GPU::ProcessSemaphoreAcquire() {
-    const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
-    const u32 word = Memory::Read32(*address);
-    const auto value = regs.semaphore_acquire;
-    if (word != value) {
-        regs.acquire_active = true;
-        regs.acquire_value = value;
-        // TODO(kemathe73) figure out how to do the acquire_timeout
-        regs.acquire_mode = false;
-        regs.acquire_source = false;
-    }
-}
-
 } // namespace Tegra
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,8 +13,9 @@
 #include "video_core/memory_manager.h"

 namespace VideoCore {
-class RasterizerInterface;
-}
+class GPUThread;
+class RendererBase;
+} // namespace VideoCore

 namespace Tegra {

@@ -117,7 +118,7 @@ enum class EngineID {

 class GPU final {
 public:
-    explicit GPU(VideoCore::RasterizerInterface& rasterizer);
+    explicit GPU(VideoCore::RendererBase& renderer);
    ~GPU();

    struct MethodCall {
@@ -156,49 +157,23 @@ public:
    /// Returns a const reference to the GPU DMA pusher.
    const Tegra::DmaPusher& DmaPusher() const;

-    struct Regs {
-        static constexpr size_t NUM_REGS = 0x100;
+    /// Push GPU command entries to be processed
+    void PushGPUEntries(Tegra::CommandList&& entries);

-        union {
-            struct {
-                INSERT_PADDING_WORDS(0x4);
-                struct {
-                    u32 address_high;
-                    u32 address_low;
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);

-                    GPUVAddr SmaphoreAddress() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } smaphore_address;
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    void FlushRegion(VAddr addr, u64 size);

-                u32 semaphore_sequence;
-                u32 semaphore_trigger;
-                INSERT_PADDING_WORDS(0xC);
-
-                // The puser and the puller share the reference counter, the pusher only has read
-                // access
-                u32 reference_count;
-                INSERT_PADDING_WORDS(0x5);
-
-                u32 semaphore_acquire;
-                u32 semaphore_release;
-                INSERT_PADDING_WORDS(0xE4);
-
-                // Puller state
-                u32 acquire_mode;
-                u32 acquire_source;
-                u32 acquire_active;
-                u32 acquire_timeout;
-                u32 acquire_value;
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(VAddr addr, u64 size);

 private:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
    std::unique_ptr<Tegra::MemoryManager> memory_manager;
+    std::unique_ptr<VideoCore::GPUThread> gpu_thread;

    /// Mapping of command subchannels to their bound engine ids.
    std::array<EngineID, 8> bound_engines = {};
@@ -214,36 +189,7 @@ private:
    /// Inline memory engine
    std::unique_ptr<Engines::KeplerMemory> kepler_memory;

-    void ProcessBindMethod(const MethodCall& method_call);
-    void ProcessSemaphoreTriggerMethod();
-    void ProcessSemaphoreRelease();
-    void ProcessSemaphoreAcquire();
-
-    // Calls a GPU puller method.
-    void CallPullerMethod(const MethodCall& method_call);
-    // Calls a GPU engine method.
-    void CallEngineMethod(const MethodCall& method_call);
-    // Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+    VideoCore::RendererBase& renderer;
 };

-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(GPU::Regs, field_name) == position * 4,                                 \
-                  "Field " #field_name " has invalid position")
-
-ASSERT_REG_POSITION(smaphore_address, 0x4);
-ASSERT_REG_POSITION(semaphore_sequence, 0x6);
-ASSERT_REG_POSITION(semaphore_trigger, 0x7);
-ASSERT_REG_POSITION(reference_count, 0x14);
-ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
-ASSERT_REG_POSITION(semaphore_release, 0x1B);
-
-ASSERT_REG_POSITION(acquire_mode, 0x100);
-ASSERT_REG_POSITION(acquire_source, 0x101);
-ASSERT_REG_POSITION(acquire_active, 0x102);
-ASSERT_REG_POSITION(acquire_timeout, 0x103);
-ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-
 } // namespace Tegra
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,135 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/frontend/scope_acquire_window_context.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace {
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+                      VideoCore::GPUThreadState& state) {
+
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+    while (state.is_running) {
+        bool is_dma_pending{};
+        bool is_swapbuffers_pending{};
+
+        {
+            // Wait for CPU thread to send GPU commands
+            std::unique_lock<std::mutex> lock{state.signal_mutex};
+            state.signal_condition.wait(lock, [&] {
+                return state.is_dma_pending || state.is_swapbuffers_pending || !state.is_running;
+            });
+
+            if (!state.is_running) {
+                return;
+            }
+
+            is_dma_pending = state.is_dma_pending;
+            is_swapbuffers_pending = state.is_swapbuffers_pending;
+
+            if (is_dma_pending) {
+                dma_pusher.QueuePendingCalls();
+                state.is_dma_pending = false;
+            }
+        }
+
+        {
+            // Cache management
+            std::lock_guard<std::recursive_mutex> lock{state.cache_mutex};
+
+            for (const auto& region : state.flush_regions) {
+                renderer.Rasterizer().FlushRegion(region.addr, region.size);
+            }
+
+            for (const auto& region : state.invalidate_regions) {
+                renderer.Rasterizer().InvalidateRegion(region.addr, region.size);
+            }
+
+            state.flush_regions.clear();
+            state.invalidate_regions.clear();
+        }
+
+        if (is_dma_pending) {
+            // Process pending DMA pushbuffer commands
+            std::lock_guard<std::mutex> lock{state.running_mutex};
+            dma_pusher.DispatchCalls();
+        }
+
+        if (is_swapbuffers_pending) {
+            // Process pending SwapBuffers
+            renderer.SwapBuffers(state.pending_swapbuffers_config);
+            state.is_swapbuffers_pending = false;
+            state.signal_condition.notify_one();
+        }
+    }
+}
+} // Anonymous namespace
+
+namespace VideoCore {
+
+GPUThread::GPUThread(RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
+    : dma_pusher{dma_pusher} {
+    thread = std::make_unique<std::thread>(RunThread, std::ref(renderer), std::ref(dma_pusher),
+                                           std::ref(state));
+}
+
+GPUThread::~GPUThread() {
+    {
+        // Notify GPU thread that a shutdown is pending
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.is_running = false;
+    }
+
+    state.signal_condition.notify_one();
+    thread->join();
+}
+
+void GPUThread::PushGPUEntries(Tegra::CommandList&& entries) {
+    if (entries.empty()) {
+        return;
+    }
+
+    {
+        // Notify GPU thread that data is available
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        dma_pusher.Push(std::move(entries));
+        state.is_dma_pending = true;
+    }
+
+    state.signal_condition.notify_one();
+}
+
+void GPUThread::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+
+    {
+        // Notify GPU thread that we should SwapBuffers
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.pending_swapbuffers_config = framebuffer;
+        state.is_swapbuffers_pending = true;
+    }
+
+    state.signal_condition.notify_one();
+
+    {
+        // Wait for SwapBuffers
+        std::unique_lock<std::mutex> lock{state.signal_mutex};
+        state.signal_condition.wait(lock, [this] { return !state.is_swapbuffers_pending; });
+    }
+}
+
+void GPUThread::FlushRegion(VAddr addr, u64 size) {
+    std::lock_guard<std::recursive_mutex> lock{state.cache_mutex};
+    state.flush_regions.push_back({addr, size});
+}
+
+void GPUThread::InvalidateRegion(VAddr addr, u64 size) {
+    std::lock_guard<std::recursive_mutex> lock{state.cache_mutex};
+    state.invalidate_regions.push_back({addr, size});
+}
+
+} // namespace VideoCore
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,67 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+struct FramebufferConfig;
+}
+
+namespace VideoCore {
+
+class RendererBase;
+
+struct GPUThreadState final {
+    bool is_running{true};
+    bool is_dma_pending{};
+    bool is_swapbuffers_pending{};
+    std::optional<Tegra::FramebufferConfig> pending_swapbuffers_config;
+    std::condition_variable signal_condition;
+    std::condition_variable running_condition;
+    std::mutex signal_mutex;
+    std::mutex running_mutex;
+    std::recursive_mutex cache_mutex;
+
+    struct MemoryRegion final {
+        const VAddr addr;
+        const u64 size;
+    };
+
+    std::vector<MemoryRegion> flush_regions;
+    std::vector<MemoryRegion> invalidate_regions;
+};
+
+class GPUThread final {
+public:
+    explicit GPUThread(RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    ~GPUThread();
+
+    /// Push GPU command entries to be processed
+    void PushGPUEntries(Tegra::CommandList&& entries);
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    void FlushRegion(VAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(VAddr addr, u64 size);
+
+private:
+    GPUThreadState state;
+    std::unique_ptr<std::thread> thread;
+    Tegra::DmaPusher& dma_pusher;
+};
+
+} // namespace VideoCore
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <functional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
@@ -12,14 +11,6 @@

 namespace VideoCore {

-enum class LoadCallbackStage {
-    Prepare,
-    Decompile,
-    Build,
-    Complete,
-};
-using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
-
 class RasterizerInterface {
 public:
    virtual ~RasterizerInterface() {}
@@ -39,16 +30,17 @@ public:
    /// Notify rasterizer that any caches of the specified region should be invalidated
    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;

-    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    /// and invalidated
-    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
-
    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
        return false;
    }

+    /// Attempt to use a faster method to fill a region
+    virtual bool AccelerateFill(const void* config) {
+        return false;
+    }
+
    /// Attempt to use a faster method to display the framebuffer to screen
    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                                   u32 pixel_stride) {
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -14,7 +14,7 @@
 namespace OpenGL {

 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
-    : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
+    : RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}

 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                      std::size_t alignment, bool cache) {
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,13 +4,8 @@

 #include <glad/glad.h>

-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
 #include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/utils.h"

 namespace OpenGL {
@@ -23,72 +18,7 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
 }

-void CachedGlobalRegion::Reload(u32 size_) {
-    constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
-
-    size = size_;
-    if (size > max_size) {
-        size = max_size;
-        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
-                     max_size);
-    }
-
-    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
-}
-
-GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
-    const auto search{reserve.find(addr)};
-    if (search == reserve.end()) {
-        return {};
-    }
-    return search->second;
-}
-
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
-    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
-    if (!region) {
-        // No reserved surface available, create a new one and reserve it
-        region = std::make_shared<CachedGlobalRegion>(addr, size);
-        ReserveGlobalRegion(region);
-    }
-    region->Reload(size);
-    return region;
-}
-
-void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
-    reserve[region->GetAddr()] = region;
-}
-
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
    : RasterizerCache{rasterizer} {}

-GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
-    const GLShader::GlobalMemoryEntry& global_region,
-    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
-
-    auto& gpu{Core::System::GetInstance().GPU()};
-    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
-    const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
-        cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
-    ASSERT(cbuf_addr);
-
-    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
-    const auto size = Memory::Read32(*cbuf_addr + 8);
-    const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
-    ASSERT(actual_addr);
-
-    // Look up global region in the cache based on address
-    GlobalRegion region = TryGet(*actual_addr);
-
-    if (!region) {
-        // No global region found - create a new one
-        region = GetUncachedGlobalRegion(*actual_addr, size);
-        Register(region);
-    }
-
-    return region;
-}
-
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,13 +5,9 @@
 #pragma once

 #include <memory>
-#include <unordered_map>
-
 #include <glad/glad.h>

-#include "common/assert.h"
 #include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"

@@ -44,9 +40,6 @@ public:
        return buffer.handle;
    }

-    /// Reloads the global region from guest memory
-    void Reload(u32 size_);
-
    // TODO(Rodrigo): When global memory is written (STG), implement flushing
    void Flush() override {
        UNIMPLEMENTED();
@@ -62,17 +55,6 @@ private:
 class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
 public:
    explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
-
-    /// Gets the current specified shader stage program
-    GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
-                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
-
-private:
-    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
-    void ReserveGlobalRegion(const GlobalRegion& region);
-
-    std::unordered_map<VAddr, GlobalRegion> reserve;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -135,31 +135,27 @@ void RasterizerOpenGL::CheckExtensions() {
    }
 }

-GLuint RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SetupVertexFormat() {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

-    if (!gpu.dirty_flags.vertex_attrib_format) {
-        return state.draw.vertex_array;
-    }
+    if (!gpu.dirty_flags.vertex_attrib_format)
+        return;
    gpu.dirty_flags.vertex_attrib_format = false;

    MICROPROFILE_SCOPE(OpenGL_VAO);

    auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
-    auto& vao_entry = iter->second;
+    auto& VAO = iter->second;

    if (is_cache_miss) {
-        vao_entry.Create();
-        const GLuint vao = vao_entry.handle;
+        VAO.Create();
+        state.draw.vertex_array = VAO.handle;
+        state.ApplyVertexBufferState();

-        // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob
-        // that fails to properly create the vertex array if it's not bound even after creating it
-        // with glCreateVertexArrays
-        state.draw.vertex_array = vao;
-        state.ApplyVertexArrayState();
-
-        glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
+        // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
+        // around.
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());

        // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
        // Enables the first 16 vertex attributes always, as we don't know which ones are actually
@@ -167,7 +163,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
        // for now to avoid OpenGL errors.
        // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
        // assume every shader uses them all.
-        for (u32 index = 0; index < 16; ++index) {
+        for (unsigned index = 0; index < 16; ++index) {
            const auto& attrib = regs.vertex_attrib_format[index];

            // Ignore invalid attributes.
@@ -182,29 +178,28 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {

            ASSERT(buffer.IsEnabled());

-            glEnableVertexArrayAttrib(vao, index);
+            glEnableVertexAttribArray(index);
            if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
                attrib.type ==
                    Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
-                glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
-                                           MaxwellToGL::VertexType(attrib), attrib.offset);
+                glVertexAttribIFormat(index, attrib.ComponentCount(),
+                                      MaxwellToGL::VertexType(attrib), attrib.offset);
            } else {
-                glVertexArrayAttribFormat(
-                    vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
-                    attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+                glVertexAttribFormat(index, attrib.ComponentCount(),
+                                     MaxwellToGL::VertexType(attrib),
+                                     attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
            }
-            glVertexArrayAttribBinding(vao, index, attrib.buffer);
+            glVertexAttribBinding(index, attrib.buffer);
        }
    }
+    state.draw.vertex_array = VAO.handle;
+    state.ApplyVertexBufferState();

    // Rebinding the VAO invalidates the vertex buffer bindings.
    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
-
-    state.draw.vertex_array = vao_entry.handle;
-    return vao_entry.handle;
 }

-void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
+void RasterizerOpenGL::SetupVertexBuffer() {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

@@ -222,7 +217,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
        if (!vertex_array.IsEnabled())
            continue;

-        const Tegra::GPUVAddr start = vertex_array.StartAddress();
+        Tegra::GPUVAddr start = vertex_array.StartAddress();
        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();

        ASSERT(end > start);
@@ -230,18 +225,21 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
        const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);

        // Bind the vertex array to the buffer at the current offset.
-        glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset,
-                                  vertex_array.stride);
+        glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
+                           vertex_array.stride);

        if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
            // Enable vertex buffer instancing with the specified divisor.
-            glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
+            glVertexBindingDivisor(index, vertex_array.divisor);
        } else {
            // Disable the vertex buffer instancing.
-            glVertexArrayBindingDivisor(vao, index, 0);
+            glVertexBindingDivisor(index, 0);
        }
    }

+    // Implicit set by glBindVertexBuffer. Stupid glstate handling...
+    state.draw.vertex_buffer = buffer_cache.GetHandle();
+
    gpu.dirty_flags.vertex_array = 0;
 }

@@ -297,7 +295,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
    MICROPROFILE_SCOPE(OpenGL_Shader);
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();

-    BaseBindings base_bindings;
+    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
+    // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
+    u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
+    u32 current_texture_bindpoint = 0;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};

    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,42 +322,50 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        const GLintptr offset = buffer_cache.UploadHostMemory(
            &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));

-        // Bind the emulation info buffer
-        glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
+        // Bind the buffer
+        glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
+                          offset, static_cast<GLsizeiptr>(sizeof(ubo)));

        Shader shader{shader_cache.GetStageProgram(program)};
-        const auto [program_handle, next_bindings] =
-            shader->GetProgramHandle(primitive_mode, base_bindings);

        switch (program) {
        case Maxwell::ShaderProgram::VertexA:
-        case Maxwell::ShaderProgram::VertexB:
-            shader_program_manager->UseProgrammableVertexShader(program_handle);
+        case Maxwell::ShaderProgram::VertexB: {
+            shader_program_manager->UseProgrammableVertexShader(
+                shader->GetProgramHandle(primitive_mode));
            break;
-        case Maxwell::ShaderProgram::Geometry:
-            shader_program_manager->UseProgrammableGeometryShader(program_handle);
+        }
+        case Maxwell::ShaderProgram::Geometry: {
+            shader_program_manager->UseProgrammableGeometryShader(
+                shader->GetProgramHandle(primitive_mode));
            break;
-        case Maxwell::ShaderProgram::Fragment:
-            shader_program_manager->UseProgrammableFragmentShader(program_handle);
+        }
+        case Maxwell::ShaderProgram::Fragment: {
+            shader_program_manager->UseProgrammableFragmentShader(
+                shader->GetProgramHandle(primitive_mode));
            break;
+        }
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
                         shader_config.enable.Value(), shader_config.offset);
            UNREACHABLE();
        }

-        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
-        SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
-        SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
-        SetupTextures(stage_enum, shader, program_handle, base_bindings);
+        // Configure the const buffers for this shader stage.
+        current_constbuffer_bindpoint =
+            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
+                              current_constbuffer_bindpoint);
+
+        // Configure the textures for this shader stage.
+        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
+                                                  primitive_mode, current_texture_bindpoint);

        // Workaround for Intel drivers.
        // When a clip distance is enabled but not set in the shader it crops parts of the screen
        // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
        // clip distances only when it's written by a shader stage.
        for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
-            clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
+            clip_distances[i] |= shader->GetShaderEntries().clip_distances[i];
        }

        // When VertexA is enabled, we have dual vertex shaders
@@ -364,8 +373,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            // VertexB was combined with VertexA, so we skip the VertexB iteration
            index++;
        }
-
-        base_bindings = next_bindings;
    }

    SyncClipEnabled(clip_distances);
@@ -477,30 +484,21 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
        cached_pages.add({pages_interval, delta});
 }

-std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
-    OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
-    std::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
+                                             bool using_depth_fb, bool preserve_contents,
+                                             std::optional<std::size_t> single_color_target) {
    MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
-
-    const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
-                                                 single_color_target};
-    if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
-        !gpu.dirty_flags.zeta_buffer) {
-        // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
-        // single color targets). This is done because the guest registers may not change but the
-        // host framebuffer may contain different attachments
-        return current_depth_stencil_usage;
-    }
-    current_framebuffer_config_state = fb_config_state;
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

    Surface depth_surface;
    if (using_depth_fb) {
        depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
    }

-    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
+    // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+    // used to enable multiple render targets. However, it is left unset on all games that I have
+    // tested.
+    UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);

    // Bind the framebuffer surfaces
    current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -561,14 +559,12 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
        depth_surface->MarkAsModified(true, res_cache);

        fbkey.zeta = depth_surface->Texture().handle;
-        fbkey.stencil_enable = regs.stencil_enable &&
-                               depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
+        fbkey.stencil_enable = regs.stencil_enable;
    }

    SetupCachedFramebuffer(fbkey, current_state);
-    SyncViewport(current_state);

-    return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
+    SyncViewport(current_state);
 }

 void RasterizerOpenGL::Clear() {
@@ -636,8 +632,8 @@ void RasterizerOpenGL::Clear() {
        return;
    }

-    const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
-        clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
+    ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
+                          regs.clear_buffers.RT.Value());
    if (regs.clear_flags.scissor) {
        SyncScissorTest(clear_state);
    }
@@ -652,11 +648,11 @@ void RasterizerOpenGL::Clear() {
        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
    }

-    if (clear_depth && clear_stencil) {
+    if (use_depth && use_stencil) {
        glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
-    } else if (clear_depth) {
+    } else if (use_depth) {
        glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
-    } else if (clear_stencil) {
+    } else if (use_stencil) {
        glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
    }
 }
@@ -691,6 +687,9 @@ void RasterizerOpenGL::DrawArrays() {
    // Draw the vertex batch
    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;

+    state.draw.vertex_buffer = buffer_cache.GetHandle();
+    state.ApplyVertexBufferState();
+
    std::size_t buffer_size = CalculateVertexArraysSize();

    // Add space for index buffer (keeping in mind non-core primitives)
@@ -720,9 +719,8 @@ void RasterizerOpenGL::DrawArrays() {
        gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
    }

-    const GLuint vao = SetupVertexFormat();
-    SetupVertexBuffer(vao);
-
+    SetupVertexFormat();
+    SetupVertexBuffer();
    DrawParameters params = SetupDraw();
    SetupShaders(params.primitive_mode);

@@ -765,11 +763,6 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    buffer_cache.InvalidateRegion(addr, size);
 }

-void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
-    FlushRegion(addr, size);
-    InvalidateRegion(addr, size);
-}
-
 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
@@ -783,6 +776,11 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs
    return true;
 }

+bool RasterizerOpenGL::AccelerateFill(const void* config) {
+    UNREACHABLE();
+    return true;
+}
+
 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
                                         VAddr framebuffer_addr, u32 pixel_stride) {
    if (!framebuffer_addr) {
@@ -912,14 +910,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
    }
 }

-void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                                         const Shader& shader, GLuint program_handle,
-                                         BaseBindings base_bindings) {
+u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
+                                        GLenum primitive_mode, u32 current_bindpoint) {
    MICROPROFILE_SCOPE(OpenGL_UBO);
    const auto& gpu = Core::System::GetInstance().GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
    const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
-    const auto& entries = shader->GetShaderEntries().const_buffers;
+    const auto& entries = shader->GetShaderEntries().const_buffer_entries;

    constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
    std::array<GLuint, max_binds> bind_buffers;
@@ -954,7 +951,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
            }
        } else {
            // Buffer is accessed directly, upload just what we use
-            size = used_buffer.GetSize();
+            size = used_buffer.GetSize() * sizeof(float);
        }

        // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
@@ -962,73 +959,75 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
        size = Common::AlignUp(size, sizeof(GLvec4));
        ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");

-        const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
+        GLintptr const_buffer_offset = buffer_cache.UploadMemory(
            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));

+        // Now configure the bindpoint of the buffer inside the shader
+        glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
+                              shader->GetProgramResourceIndex(used_buffer),
+                              current_bindpoint + bindpoint);
+
        // Prepare values for multibind
        bind_buffers[bindpoint] = buffer_cache.GetHandle();
        bind_offsets[bindpoint] = const_buffer_offset;
        bind_sizes[bindpoint] = size;
    }

-    // The first binding is reserved for emulation values
-    const GLuint ubo_base_binding = base_bindings.cbuf + 1;
-    glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
+    glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
                       bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
+
+    return current_bindpoint + static_cast<u32>(entries.size());
 }

-void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                                          const Shader& shader, GLenum primitive_mode,
-                                          BaseBindings base_bindings) {
-    // TODO(Rodrigo): Use ARB_multi_bind here
-    const auto& entries = shader->GetShaderEntries().global_memory_entries;
-
-    for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
-        const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = base_bindings.gmem + bindpoint;
-        const auto& region = global_cache.GetGlobalRegion(entry, stage);
-
-        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
-    }
-}
-
-void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
-                                     GLuint program_handle, BaseBindings base_bindings) {
+u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
+                                    GLenum primitive_mode, u32 current_unit) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
    const auto& gpu = Core::System::GetInstance().GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
-    const auto& entries = shader->GetShaderEntries().samplers;
+    const auto& entries = shader->GetShaderEntries().texture_samplers;

-    ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
+    ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
               "Exceeded the number of active textures.");

    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
        const auto& entry = entries[bindpoint];
-        const u32 current_bindpoint = base_bindings.sampler + bindpoint;
-        auto& unit = state.texture_units[current_bindpoint];
+        const u32 current_bindpoint = current_unit + bindpoint;
+
+        // Bind the uniform to the sampler.
+
+        glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
+                           shader->GetUniformLocation(entry), current_bindpoint);

        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
+
        if (!texture.enabled) {
-            unit.texture = 0;
+            state.texture_units[current_bindpoint].texture = 0;
            continue;
        }

        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
-
        Surface surface = res_cache.GetTextureSurface(texture, entry);
        if (surface != nullptr) {
-            unit.texture =
+            const GLuint handle =
                entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
-            unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
-            unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
-            unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
-            unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
-            unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
+            const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
+            state.texture_units[current_bindpoint].texture = handle;
+            state.texture_units[current_bindpoint].target = target;
+            state.texture_units[current_bindpoint].swizzle.r =
+                MaxwellToGL::SwizzleSource(texture.tic.x_source);
+            state.texture_units[current_bindpoint].swizzle.g =
+                MaxwellToGL::SwizzleSource(texture.tic.y_source);
+            state.texture_units[current_bindpoint].swizzle.b =
+                MaxwellToGL::SwizzleSource(texture.tic.z_source);
+            state.texture_units[current_bindpoint].swizzle.a =
+                MaxwellToGL::SwizzleSource(texture.tic.w_source);
        } else {
            // Can occur when texture addr is null or its memory is unmapped/invalid
-            unit.texture = 0;
+            state.texture_units[current_bindpoint].texture = 0;
        }
    }
+
+    return current_unit + static_cast<u32>(entries.size());
 }

 void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -53,9 +53,9 @@ public:
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
+    bool AccelerateFill(const void* config) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
@@ -98,48 +98,36 @@ private:
        float max_anisotropic = 1.0f;
    };

-    struct FramebufferConfigState {
-        bool using_color_fb{};
-        bool using_depth_fb{};
-        bool preserve_contents{};
-        std::optional<std::size_t> single_color_target;
-
-        bool operator==(const FramebufferConfigState& rhs) const {
-            return std::tie(using_color_fb, using_depth_fb, preserve_contents,
-                            single_color_target) == std::tie(rhs.using_color_fb, rhs.using_depth_fb,
-                                                             rhs.preserve_contents,
-                                                             rhs.single_color_target);
-        }
-        bool operator!=(const FramebufferConfigState& rhs) const {
-            return !operator==(rhs);
-        }
-    };
-
    /**
     * Configures the color and depth framebuffer states.
     * @param use_color_fb If true, configure color framebuffers.
     * @param using_depth_fb If true, configure the depth/stencil framebuffer.
     * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
     * @param single_color_target Specifies if a single color buffer target should be used.
-     * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
-     * (requires using_depth_fb to be true)
     */
-    std::pair<bool, bool> ConfigureFramebuffers(
-        OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true,
-        bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
+    void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
+                               bool using_depth_fb = true, bool preserve_contents = true,
+                               std::optional<std::size_t> single_color_target = {});

-    /// Configures the current constbuffers to use for the draw command.
-    void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
-                           GLuint program_handle, BaseBindings base_bindings);
+    /**
+     * Configures the current constbuffers to use for the draw command.
+     * @param stage The shader stage to configure buffers for.
+     * @param shader The shader object that contains the specified stage.
+     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
+                          GLenum primitive_mode, u32 current_bindpoint);

-    /// Configures the current global memory entries to use for the draw command.
-    void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                            const Shader& shader, GLenum primitive_mode,
-                            BaseBindings base_bindings);
-
-    /// Configures the current textures to use for the draw command.
-    void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
-                       GLuint program_handle, BaseBindings base_bindings);
+    /**
+     * Configures the current textures to use for the draw command.
+     * @param stage The shader stage to configure textures for.
+     * @param shader The shader object that contains the specified stage.
+     * @param current_unit The offset at which to start counting unused texture units.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
+                      GLenum primitive_mode, u32 current_unit);

    /// Syncs the viewport and depth range to match the guest state
    void SyncViewport(OpenGLState& current_state);
@@ -214,8 +202,6 @@ private:
        vertex_array_cache;

    std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
-    FramebufferConfigState current_framebuffer_config_state;
-    std::pair<bool, bool> current_depth_stencil_usage{};

    std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;

@@ -228,10 +214,8 @@ private:

    std::size_t CalculateIndexBufferSize() const;

-    /// Updates and returns a vertex array object representing current vertex format
-    GLuint SetupVertexFormat();
-
-    void SetupVertexBuffer(GLuint vao);
+    void SetupVertexFormat();
+    void SetupVertexBuffer();

    DrawParameters SetupDraw();

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -128,7 +128,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
    params.unaligned_height = config.tic.Height();
    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
-    params.identity = SurfaceClass::Uploaded;

    switch (params.target) {
    case SurfaceTarget::Texture1D:
@@ -168,7 +167,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    }

    params.is_layered = SurfaceTargetIsLayered(params.target);
-    params.is_array = SurfaceTargetIsArray(params.target);
    params.max_mip_level = config.tic.max_mip_level + 1;
    params.rt = {};

@@ -196,7 +194,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.height = config.height;
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
-    params.identity = SurfaceClass::RenderTarget;
    params.depth = 1;
    params.max_mip_level = 1;
    params.is_layered = false;
@@ -232,7 +229,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.height = zeta_height;
    params.unaligned_height = zeta_height;
    params.target = SurfaceTarget::Texture2D;
-    params.identity = SurfaceClass::DepthBuffer;
    params.depth = 1;
    params.max_mip_level = 1;
    params.is_layered = false;
@@ -261,7 +257,6 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.height = config.height;
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
-    params.identity = SurfaceClass::Copy;
    params.depth = 1;
    params.max_mip_level = 1;
    params.rt = {};
@@ -579,7 +574,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)

    ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);

-    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
+    LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
+                  SurfaceParams::SurfaceTargetName(params.target));

    // Clamp size to mapped GPU memory region
    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -734,6 +730,7 @@ void CachedSurface::FlushGLBuffer() {
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
                                        params.height);
+    ASSERT(params.type != SurfaceType::Fill);
    const u8* const texture_src_data = Memory::GetPointer(params.addr);
    ASSERT(texture_src_data);
    if (params.is_tiled) {
@@ -880,13 +877,10 @@ void CachedSurface::EnsureTextureView() {
    UNIMPLEMENTED_IF(gl_is_compressed);

    const GLenum target{TargetLayer()};
-    const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
-    constexpr GLuint min_layer = 0;
-    constexpr GLuint min_level = 0;

    texture_view.Create();
-    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
-                  params.max_mip_level, min_layer, num_layers);
+    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
+                  params.max_mip_level, 0, 1);

    OpenGLState cur_state = OpenGLState::GetCurState();
    const auto& old_tex = cur_state.texture_units[0];
@@ -903,6 +897,9 @@ void CachedSurface::EnsureTextureView() {

 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
 void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+    if (params.type == SurfaceType::Fill)
+        return;
+
    MICROPROFILE_SCOPE(OpenGL_TextureUL);

    for (u32 i = 0; i < params.max_mip_level; i++)
@@ -922,16 +919,9 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
 }

 Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
-    auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
-    const auto& regs{gpu.regs};
-
-    if (!gpu.dirty_flags.zeta_buffer) {
-        return last_depth_buffer;
-    }
-    gpu.dirty_flags.zeta_buffer = false;
-
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
    if (!regs.zeta.Address() || !regs.zeta_enable) {
-        return last_depth_buffer = {};
+        return {};
    }

    SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
@@ -939,31 +929,25 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
        regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
        regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};

-    return last_depth_buffer = GetSurface(depth_params, preserve_contents);
+    return GetSurface(depth_params, preserve_contents);
 }

 Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
-    auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
-    const auto& regs{gpu.regs};
-
-    if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
-        return last_color_buffers[index];
-    }
-    gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};

    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);

    if (index >= regs.rt_control.count) {
-        return last_color_buffers[index] = {};
+        return {};
    }

    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
-        return last_color_buffers[index] = {};
+        return {};
    }

    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};

-    return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+    return GetSurface(color_params, preserve_contents);
 }

 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -35,14 +35,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;

 struct SurfaceParams {
-
-    enum class SurfaceClass {
-        Uploaded,
-        RenderTarget,
-        DepthBuffer,
-        Copy,
-    };
-
    static std::string SurfaceTargetName(SurfaceTarget target) {
        switch (target) {
        case SurfaceTarget::Texture1D:
@@ -218,48 +210,6 @@ struct SurfaceParams {
    /// Initializes parameters for caching, should be called after everything has been initialized
    void InitCacheParameters(Tegra::GPUVAddr gpu_addr);

-    std::string TargetName() const {
-        switch (target) {
-        case SurfaceTarget::Texture1D:
-            return "1D";
-        case SurfaceTarget::Texture2D:
-            return "2D";
-        case SurfaceTarget::Texture3D:
-            return "3D";
-        case SurfaceTarget::Texture1DArray:
-            return "1DArray";
-        case SurfaceTarget::Texture2DArray:
-            return "2DArray";
-        case SurfaceTarget::TextureCubemap:
-            return "Cube";
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
-            UNREACHABLE();
-            return fmt::format("TUK({})", static_cast<u32>(target));
-        }
-    }
-
-    std::string ClassName() const {
-        switch (identity) {
-        case SurfaceClass::Uploaded:
-            return "UP";
-        case SurfaceClass::RenderTarget:
-            return "RT";
-        case SurfaceClass::DepthBuffer:
-            return "DB";
-        case SurfaceClass::Copy:
-            return "CP";
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
-            UNREACHABLE();
-            return fmt::format("CUK({})", static_cast<u32>(identity));
-        }
-    }
-
-    std::string IdentityString() const {
-        return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
-    }
-
    bool is_tiled;
    u32 block_width;
    u32 block_height;
@@ -273,10 +223,8 @@ struct SurfaceParams {
    u32 depth;
    u32 unaligned_height;
    SurfaceTarget target;
-    SurfaceClass identity;
    u32 max_mip_level;
    bool is_layered;
-    bool is_array;
    bool srgb_conversion;
    // Parameters used for caching
    VAddr addr;
@@ -307,7 +255,6 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
    static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
        SurfaceReserveKey res;
        res.state = params;
-        res.state.identity = {}; // Ignore the origin of the texture
        res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
        res.state.rt = {};       // Ignore rt config in caching
        return res;
@@ -347,7 +294,7 @@ public:
    }

    const OGLTexture& TextureLayer() {
-        if (params.is_array) {
+        if (params.is_layered) {
            return Texture();
        }
        EnsureTextureView();
@@ -449,9 +396,6 @@ private:
    /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
    /// using the new format.
    OGLBuffer copy_pbo;
-
-    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
-    Surface last_depth_buffer;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -117,7 +117,7 @@ void OGLBuffer::Create() {
        return;

    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glCreateBuffers(1, &handle);
+    glGenBuffers(1, &handle);
 }

 void OGLBuffer::Release() {
@@ -126,6 +126,7 @@ void OGLBuffer::Release() {

    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
    glDeleteBuffers(1, &handle);
+    OpenGLState::GetCurState().ResetBuffer(handle).Apply();
    handle = 0;
 }

@@ -151,7 +152,7 @@ void OGLVertexArray::Create() {
        return;

    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glCreateVertexArrays(1, &handle);
+    glGenVertexArrays(1, &handle);
 }

 void OGLVertexArray::Release() {
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -10,15 +10,11 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/utils.h"
-#include "video_core/shader/shader_ir.h"

 namespace OpenGL {

-using VideoCommon::Shader::ProgramCode;
-
 /// Gets the address for the specified shader stage program
 static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -28,31 +24,42 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
 }

 /// Gets the shader program code from memory for the specified address
-static ProgramCode GetShaderCode(VAddr addr) {
-    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+static GLShader::ProgramCode GetShaderCode(VAddr addr) {
+    GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
    return program_code;
 }

-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
-    switch (program_type) {
-    case Maxwell::ShaderProgram::VertexA:
-    case Maxwell::ShaderProgram::VertexB:
-        return GL_VERTEX_SHADER;
-    case Maxwell::ShaderProgram::Geometry:
-        return GL_GEOMETRY_SHADER;
-    case Maxwell::ShaderProgram::Fragment:
-        return GL_FRAGMENT_SHADER;
-    default:
-        return GL_NONE;
+/// Helper function to set shader uniform block bindings for a single shader stage
+static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
+                                         Maxwell::ShaderStage binding, std::size_t expected_size) {
+    const GLuint ub_index = glGetUniformBlockIndex(shader, name);
+    if (ub_index == GL_INVALID_INDEX) {
+        return;
    }
+
+    GLint ub_size = 0;
+    glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
+    ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
+               "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
+    glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
+}
+
+/// Sets shader uniform block bindings for an entire shader program
+static void SetShaderUniformBlockBindings(GLuint shader) {
+    SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
+                                 sizeof(GLShader::MaxwellUniformData));
+    SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
+                                 sizeof(GLShader::MaxwellUniformData));
+    SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
+                                 sizeof(GLShader::MaxwellUniformData));
 }

 CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
    : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {

    GLShader::ProgramResult program_result;
+    GLenum gl_type{};

    switch (program_type) {
    case Maxwell::ShaderProgram::VertexA:
@@ -63,14 +70,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
    case Maxwell::ShaderProgram::VertexB:
        CalculateProperties();
        program_result = GLShader::GenerateVertexShader(setup);
+        gl_type = GL_VERTEX_SHADER;
        break;
    case Maxwell::ShaderProgram::Geometry:
        CalculateProperties();
        program_result = GLShader::GenerateGeometryShader(setup);
+        gl_type = GL_GEOMETRY_SHADER;
        break;
    case Maxwell::ShaderProgram::Fragment:
        CalculateProperties();
        program_result = GLShader::GenerateFragmentShader(setup);
+        gl_type = GL_FRAGMENT_SHADER;
        break;
    default:
        LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
@@ -78,105 +88,59 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
        return;
    }

-    code = program_result.first;
    entries = program_result.second;
    shader_length = entries.shader_length;
-}

-std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
-                                                                BaseBindings base_bindings) {
-    GLuint handle{};
-    if (program_type == Maxwell::ShaderProgram::Geometry) {
-        handle = GetGeometryShader(primitive_mode, base_bindings);
+    if (program_type != Maxwell::ShaderProgram::Geometry) {
+        OGLShader shader;
+        shader.Create(program_result.first.c_str(), gl_type);
+        program.Create(true, shader.handle);
+        SetShaderUniformBlockBindings(program.handle);
+        LabelGLObject(GL_PROGRAM, program.handle, addr);
    } else {
-        const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
-        auto& program = entry->second;
-        if (is_cache_miss) {
-            std::string source = AllocateBindings(base_bindings);
-            source += code;
-
-            OGLShader shader;
-            shader.Create(source.c_str(), GetShaderType(program_type));
-            program.Create(true, shader.handle);
-            LabelGLObject(GL_PROGRAM, program.handle, addr);
-        }
-
-        handle = program.handle;
-    }
-
-    // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
-    // emulation values
-    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
-    base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
-    base_bindings.sampler += static_cast<u32>(entries.samplers.size());
-
-    return {handle, base_bindings};
-}
-
-std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
-    std::string code = "#version 430 core\n";
-    code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
-
-    for (const auto& cbuf : entries.const_buffers) {
-        code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
-    }
-
-    for (const auto& gmem : entries.global_memory_entries) {
-        code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
-                            gmem.GetCbufOffset(), base_bindings.gmem++);
-    }
-
-    for (const auto& sampler : entries.samplers) {
-        code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
-                            base_bindings.sampler++);
-    }
-
-    return code;
-}
-
-GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
-    const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
-    auto& programs = entry->second;
-
-    switch (primitive_mode) {
-    case GL_POINTS:
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
-    case GL_LINES:
-    case GL_LINE_STRIP:
-        return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
-    case GL_LINES_ADJACENCY:
-    case GL_LINE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
-                                   "ShaderLinesAdjacency");
-    case GL_TRIANGLES:
-    case GL_TRIANGLE_STRIP:
-    case GL_TRIANGLE_FAN:
-        return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
-                                   "ShaderTriangles");
-    case GL_TRIANGLES_ADJACENCY:
-    case GL_TRIANGLE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
-                                   "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
-    default:
-        UNREACHABLE_MSG("Unknown primitive mode.");
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+        // Store shader's code to lazily build it on draw
+        geometry_programs.code = program_result.first;
    }
 }

-GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
+GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
+    const auto search{resource_cache.find(buffer.GetHash())};
+    if (search == resource_cache.end()) {
+        const GLuint index{
+            glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
+        resource_cache[buffer.GetHash()] = index;
+        return index;
+    }
+
+    return search->second;
+}
+
+GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
+    const auto search{uniform_cache.find(sampler.GetHash())};
+    if (search == uniform_cache.end()) {
+        const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
+        uniform_cache[sampler.GetHash()] = index;
+        return index;
+    }
+
+    return search->second;
+}
+
+GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
                                         const std::string& glsl_topology, u32 max_vertices,
                                         const std::string& debug_name) {
    if (target_program.handle != 0) {
        return target_program.handle;
    }
-    std::string source = AllocateBindings(base_bindings);
+    std::string source = "#version 430 core\n";
    source += "layout (" + glsl_topology + ") in;\n";
    source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
-    source += code;
+    source += geometry_programs.code;

    OGLShader shader;
    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
    target_program.Create(true, shader.handle);
+    SetShaderUniformBlockBindings(target_program.handle);
    LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
    return target_program.handle;
 };
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -7,15 +7,11 @@
 #include <array>
 #include <map>
 #include <memory>
-#include <tuple>
-
-#include <glad/glad.h>

 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"

 namespace OpenGL {
@@ -26,16 +22,6 @@ class RasterizerOpenGL;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-struct BaseBindings {
-    u32 cbuf{};
-    u32 gmem{};
-    u32 sampler{};
-
-    bool operator<(const BaseBindings& rhs) const {
-        return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
-    }
-};
-
 class CachedShader final : public RasterizerCacheObject {
 public:
    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
@@ -57,45 +43,70 @@ public:
    }

    /// Gets the GL program handle for the shader
-    std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
-                                                      BaseBindings base_bindings);
+    GLuint GetProgramHandle(GLenum primitive_mode) {
+        if (program_type != Maxwell::ShaderProgram::Geometry) {
+            return program.handle;
+        }
+        switch (primitive_mode) {
+        case GL_POINTS:
+            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
+        case GL_LINES:
+        case GL_LINE_STRIP:
+            return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
+        case GL_LINES_ADJACENCY:
+        case GL_LINE_STRIP_ADJACENCY:
+            return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
+                                       "ShaderLinesAdjacency");
+        case GL_TRIANGLES:
+        case GL_TRIANGLE_STRIP:
+        case GL_TRIANGLE_FAN:
+            return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
+                                       "ShaderTriangles");
+        case GL_TRIANGLES_ADJACENCY:
+        case GL_TRIANGLE_STRIP_ADJACENCY:
+            return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
+                                       6, "ShaderTrianglesAdjacency");
+        default:
+            UNREACHABLE_MSG("Unknown primitive mode.");
+            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
+        }
+    }
+
+    /// Gets the GL program resource location for the specified resource, caching as needed
+    GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
+
+    /// Gets the GL uniform location for the specified resource, caching as needed
+    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);

 private:
+    /// Generates a geometry shader or returns one that already exists.
+    GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
+                               u32 max_vertices, const std::string& debug_name);
+
+    void CalculateProperties();
+
+    VAddr addr;
+    std::size_t shader_length;
+    Maxwell::ShaderProgram program_type;
+    GLShader::ShaderSetup setup;
+    GLShader::ShaderEntries entries;
+
+    // Non-geometry program.
+    OGLProgram program;
+
    // Geometry programs. These are needed because GLSL needs an input topology but it's not
    // declared by the hardware. Workaround this issue by generating a different shader per input
    // topology class.
-    struct GeometryPrograms {
+    struct {
+        std::string code;
        OGLProgram points;
        OGLProgram lines;
        OGLProgram lines_adjacency;
        OGLProgram triangles;
        OGLProgram triangles_adjacency;
-    };
+    } geometry_programs;

-    std::string AllocateBindings(BaseBindings base_bindings);
-
-    GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
-
-    /// Generates a geometry shader or returns one that already exists.
-    GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
-                               const std::string& glsl_topology, u32 max_vertices,
-                               const std::string& debug_name);
-
-    void CalculateProperties();
-
-    VAddr addr{};
-    std::size_t shader_length{};
-    Maxwell::ShaderProgram program_type{};
-    GLShader::ShaderSetup setup;
-    GLShader::ShaderEntries entries;
-
-    std::string code;
-
-    std::map<BaseBindings, OGLProgram> programs;
-    std::map<BaseBindings, GeometryPrograms> geometry_programs;
-
-    std::map<u32, GLuint> cbuf_resource_cache;
-    std::map<u32, GLuint> gmem_resource_cache;
+    std::map<u32, GLuint> resource_cache;
    std::map<u32, GLint> uniform_cache;
 };

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,106 +5,21 @@
 #pragma once

 #include <array>
+#include <functional>
+#include <optional>
 #include <string>
-#include <utility>
-#include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/shader/shader_ir.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"

-namespace VideoCommon::Shader {
-class ShaderIR;
-}
+namespace OpenGL::GLShader::Decompiler {

-namespace OpenGL::GLShader {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
-    explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
-                              Maxwell::ShaderStage stage, const std::string& name, u32 index)
-        : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-    u32 GetIndex() const {
-        return index;
-    }
-
-private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
-    u32 index{};
-};
-
-class SamplerEntry : public VideoCommon::Shader::Sampler {
-public:
-    explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
-                          const std::string& name)
-        : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
-};
-
-class GlobalMemoryEntry {
-public:
-    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
-                               std::string name)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
-
-    u32 GetCbufIndex() const {
-        return cbuf_index;
-    }
-
-    u32 GetCbufOffset() const {
-        return cbuf_offset;
-    }
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-private:
-    u32 cbuf_index{};
-    u32 cbuf_offset{};
-    Maxwell::ShaderStage stage{};
-    std::string name;
-};
-
-struct ShaderEntries {
-    std::vector<ConstBufferEntry> const_buffers;
-    std::vector<SamplerEntry> samplers;
-    std::vector<GlobalMemoryEntry> global_memory_entries;
-    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
-    std::size_t shader_length{};
-};
-
-using ProgramResult = std::pair<std::string, ShaderEntries>;
+using Tegra::Engines::Maxwell3D;

 std::string GetCommonDeclarations();

-ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
-                        const std::string& suffix);
+std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+                                              Maxwell3D::Regs::ShaderStage stage,
+                                              const std::string& suffix);

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader::Decompiler
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -7,57 +7,63 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
-#include "video_core/shader/shader_ir.h"

 namespace OpenGL::GLShader {

 using Tegra::Engines::Maxwell3D;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::ShaderIR;

 static constexpr u32 PROGRAM_OFFSET{10};

 ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
+    std::string out = "#version 430 core\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
    out += "// Shader Unique Id: VS" + id + "\n\n";
-    out += GetCommonDeclarations();
+    out += Decompiler::GetCommonDeclarations();

    out += R"(
+
 layout (location = 0) out vec4 position;

-layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
+layout(std140) uniform vs_config {
    vec4 viewport_flip;
    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
 };
-
 )";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
-    ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
+
+    if (setup.IsDualProgram()) {
+        out += "bool exec_vertex_b();\n";
+    }
+
+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
+            .value_or(ProgramResult());

    out += program.first;

    if (setup.IsDualProgram()) {
-        ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
        ProgramResult program_b =
-            Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
-
+            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
+                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
+                .value_or(ProgramResult());
        out += program_b.first;
    }

    out += R"(
+
 void main() {
    position = vec4(0.0, 0.0, 0.0, 0.0);
-    execute_vertex();
+    exec_vertex();
 )";

    if (setup.IsDualProgram()) {
-        out += "    execute_vertex_b();";
+        out += "    exec_vertex_b();";
    }

    out += R"(
+
    // Check if the flip stage is VertexB
    // Config pack's second value is flip_stage
    if (config_pack[1] == 1) {
@@ -71,62 +77,73 @@ void main() {
    if (config_pack[1] == 1) {
        position.w = 1.0;
    }
-})";
+}
+
+)";

    return {out, program.second};
 }

 ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
-    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
+    // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
    out += "// Shader Unique Id: GS" + id + "\n\n";
-    out += GetCommonDeclarations();
+    out += Decompiler::GetCommonDeclarations();
+    out += "bool exec_geometry();\n";

+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
+            .value_or(ProgramResult());
    out += R"(
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
 layout (location = 0) in vec4 gs_position[];
 layout (location = 0) out vec4 position;

-layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
+layout (std140) uniform gs_config {
    vec4 viewport_flip;
    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
 };

-)";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
-    ProgramResult program =
-        Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
-    out += program.first;
-
-    out += R"(
 void main() {
-    execute_geometry();
-};)";
+    exec_geometry();
+}

+)";
+    out += program.first;
    return {out, program.second};
 }

 ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
+    std::string out = "#version 430 core\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
    out += "// Shader Unique Id: FS" + id + "\n\n";
-    out += GetCommonDeclarations();
+    out += Decompiler::GetCommonDeclarations();
+    out += "bool exec_fragment();\n";

+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
+            .value_or(ProgramResult());
    out += R"(
-layout (location = 0) out vec4 FragColor0;
-layout (location = 1) out vec4 FragColor1;
-layout (location = 2) out vec4 FragColor2;
-layout (location = 3) out vec4 FragColor3;
-layout (location = 4) out vec4 FragColor4;
-layout (location = 5) out vec4 FragColor5;
-layout (location = 6) out vec4 FragColor6;
-layout (location = 7) out vec4 FragColor7;
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 4) out vec4 FragColor4;
+layout(location = 5) out vec4 FragColor5;
+layout(location = 6) out vec4 FragColor6;
+layout(location = 7) out vec4 FragColor7;

 layout (location = 0) in vec4 position;

-layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
+layout (std140) uniform fs_config {
    vec4 viewport_flip;
    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
    uvec4 alpha_test;
@@ -156,20 +173,12 @@ bool AlphaFunc(in float value) {
    }
 }

-)";
-    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
-    ProgramResult program =
-        Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
-
-    out += program.first;
-
-    out += R"(
 void main() {
-    execute_fragment();
+    exec_fragment();
 }

 )";
+    out += program.first;
    return {out, program.second};
 }
-
-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -10,12 +10,164 @@

 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/shader_ir.h"

 namespace OpenGL::GLShader {

-using VideoCommon::Shader::ProgramCode;
+constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+using ProgramCode = std::vector<u64>;
+
+enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
+
+class ConstBufferEntry {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+    void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
+        is_used = true;
+        this->index = static_cast<unsigned>(index);
+        this->stage = stage;
+        max_offset = std::max(max_offset, static_cast<unsigned>(offset));
+    }
+
+    void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
+        is_used = true;
+        is_indirect = true;
+        this->index = static_cast<unsigned>(index);
+        this->stage = stage;
+    }
+
+    bool IsUsed() const {
+        return is_used;
+    }
+
+    bool IsIndirect() const {
+        return is_indirect;
+    }
+
+    unsigned GetIndex() const {
+        return index;
+    }
+
+    unsigned GetSize() const {
+        return max_offset + 1;
+    }
+
+    std::string GetName() const {
+        return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
+    }
+
+    u32 GetHash() const {
+        return (static_cast<u32>(stage) << 16) | index;
+    }
+
+private:
+    static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = {
+        "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c",
+    };
+
+    bool is_used{};
+    bool is_indirect{};
+    unsigned index{};
+    unsigned max_offset{};
+    Maxwell::ShaderStage stage;
+};
+
+class SamplerEntry {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+    SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
+                 Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+        : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array),
+          is_shadow(is_shadow) {}
+
+    std::size_t GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetIndex() const {
+        return sampler_index;
+    }
+
+    Maxwell::ShaderStage GetStage() const {
+        return stage;
+    }
+
+    std::string GetName() const {
+        return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
+               std::to_string(sampler_index);
+    }
+
+    std::string GetTypeString() const {
+        using Tegra::Shader::TextureType;
+        std::string glsl_type;
+
+        switch (type) {
+        case TextureType::Texture1D:
+            glsl_type = "sampler1D";
+            break;
+        case TextureType::Texture2D:
+            glsl_type = "sampler2D";
+            break;
+        case TextureType::Texture3D:
+            glsl_type = "sampler3D";
+            break;
+        case TextureType::TextureCube:
+            glsl_type = "samplerCube";
+            break;
+        default:
+            UNIMPLEMENTED();
+        }
+        if (is_array)
+            glsl_type += "Array";
+        if (is_shadow)
+            glsl_type += "Shadow";
+        return glsl_type;
+    }
+
+    Tegra::Shader::TextureType GetType() const {
+        return type;
+    }
+
+    bool IsArray() const {
+        return is_array;
+    }
+
+    bool IsShadow() const {
+        return is_shadow;
+    }
+
+    u32 GetHash() const {
+        return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
+    }
+
+    static std::string GetArrayName(Maxwell::ShaderStage stage) {
+        return TextureSamplerNames[static_cast<std::size_t>(stage)];
+    }
+
+private:
+    static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
+        "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
+    };
+
+    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+    /// instruction.
+    std::size_t offset;
+    Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used.
+    std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
+    Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
+    bool is_array;  ///< Whether the texture is being sampled as an array texture or not.
+    bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not.
+};
+
+struct ShaderEntries {
+    std::vector<ConstBufferEntry> const_buffer_entries;
+    std::vector<SamplerEntry> texture_samplers;
+    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
+    std::size_t shader_length;
+};
+
+using ProgramResult = std::pair<std::string, ShaderEntries>;

 struct ShaderSetup {
    explicit ShaderSetup(ProgramCode program_code) {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -83,6 +83,8 @@ OpenGLState::OpenGLState() {
    draw.read_framebuffer = 0;
    draw.draw_framebuffer = 0;
    draw.vertex_array = 0;
+    draw.vertex_buffer = 0;
+    draw.uniform_buffer = 0;
    draw.shader_program = 0;
    draw.program_pipeline = 0;

@@ -503,6 +505,7 @@ void OpenGLState::ApplySamplers() const {
 }

 void OpenGLState::ApplyFramebufferState() const {
+    // Framebuffer
    if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
    }
@@ -511,10 +514,16 @@ void OpenGLState::ApplyFramebufferState() const {
    }
 }

-void OpenGLState::ApplyVertexArrayState() const {
+void OpenGLState::ApplyVertexBufferState() const {
+    // Vertex array
    if (draw.vertex_array != cur_state.draw.vertex_array) {
        glBindVertexArray(draw.vertex_array);
    }
+
+    // Vertex buffer
+    if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
+        glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
+    }
 }

 void OpenGLState::ApplyDepthClamp() const {
@@ -534,7 +543,11 @@ void OpenGLState::ApplyDepthClamp() const {

 void OpenGLState::Apply() const {
    ApplyFramebufferState();
-    ApplyVertexArrayState();
+    ApplyVertexBufferState();
+    // Uniform buffer
+    if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
+        glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);
+    }

    // Shader program
    if (draw.shader_program != cur_state.draw.shader_program) {
@@ -625,6 +638,16 @@ OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
    return *this;
 }

+OpenGLState& OpenGLState::ResetBuffer(GLuint handle) {
+    if (draw.vertex_buffer == handle) {
+        draw.vertex_buffer = 0;
+    }
+    if (draw.uniform_buffer == handle) {
+        draw.uniform_buffer = 0;
+    }
+    return *this;
+}
+
 OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
    if (draw.vertex_array == handle) {
        draw.vertex_array = 0;
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -154,6 +154,8 @@ public:
        GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
        GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
        GLuint vertex_array;     // GL_VERTEX_ARRAY_BINDING
+        GLuint vertex_buffer;    // GL_ARRAY_BUFFER_BINDING
+        GLuint uniform_buffer;   // GL_UNIFORM_BUFFER_BINDING
        GLuint shader_program;   // GL_CURRENT_PROGRAM
        GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
    } draw;
@@ -204,10 +206,10 @@ public:
    }
    /// Apply this state as the current OpenGL state
    void Apply() const;
-    /// Apply only the state affecting the framebuffer
+    /// Apply only the state afecting the framebuffer
    void ApplyFramebufferState() const;
-    /// Apply only the state affecting the vertex array
-    void ApplyVertexArrayState() const;
+    /// Apply only the state afecting the vertex buffer
+    void ApplyVertexBufferState() const;
    /// Set the initial OpenGL state
    static void ApplyDefaultState();
    /// Resets any references to the given resource
@@ -215,6 +217,7 @@ public:
    OpenGLState& ResetSampler(GLuint handle);
    OpenGLState& ResetProgram(GLuint handle);
    OpenGLState& ResetPipeline(GLuint handle);
+    OpenGLState& ResetBuffer(GLuint handle);
    OpenGLState& ResetVertexArray(GLuint handle);
    OpenGLState& ResetFramebuffer(GLuint handle);
    void EmulateViewportWithScissor();
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -15,12 +15,13 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

 namespace OpenGL {

-OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
-    : buffer_size(size) {
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
+    : gl_target(target), buffer_size(size) {
    gl_buffer.Create();
+    glBindBuffer(gl_target, gl_buffer.handle);

    GLsizeiptr allocate_size = size;
-    if (vertex_data_usage) {
+    if (target == GL_ARRAY_BUFFER) {
        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
        // read position is near the end and is an out-of-bound access to the vertex buffer. This is
        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
@@ -34,17 +35,18 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
        coherent = prefer_coherent;
        const GLbitfield flags =
            GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
-        glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
-        mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
-            gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
+        glBufferStorage(gl_target, allocate_size, nullptr, flags);
+        mapped_ptr = static_cast<u8*>(glMapBufferRange(
+            gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
    } else {
-        glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
+        glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
    }
 }

 OGLStreamBuffer::~OGLStreamBuffer() {
    if (persistent) {
-        glUnmapNamedBuffer(gl_buffer.handle);
+        glBindBuffer(gl_target, gl_buffer.handle);
+        glUnmapBuffer(gl_target);
    }
    gl_buffer.Release();
 }
@@ -72,7 +74,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
        invalidate = true;

        if (persistent) {
-            glUnmapNamedBuffer(gl_buffer.handle);
+            glUnmapBuffer(gl_target);
        }
    }

@@ -82,7 +84,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
                           (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
                           (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
        mapped_ptr = static_cast<u8*>(
-            glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
+            glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
        mapped_offset = buffer_pos;
    }

@@ -93,11 +95,11 @@ void OGLStreamBuffer::Unmap(GLsizeiptr size) {
    ASSERT(size <= mapped_size);

    if (!coherent && size > 0) {
-        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
+        glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
    }

    if (!persistent) {
-        glUnmapNamedBuffer(gl_buffer.handle);
+        glUnmapBuffer(gl_target);
    }

    buffer_pos += size;
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -13,7 +13,7 @@ namespace OpenGL {

 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
+    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
    ~OGLStreamBuffer();

    GLuint GetHandle() const;
@@ -33,6 +33,7 @@ public:

 private:
    OGLBuffer gl_buffer;
+    GLenum gl_target;

    bool coherent = false;
    bool persistent = false;
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -233,20 +233,20 @@ void RendererOpenGL::InitOpenGLObjects() {

    // Generate VAO
    vertex_array.Create();
+
    state.draw.vertex_array = vertex_array.handle;
+    state.draw.vertex_buffer = vertex_buffer.handle;
+    state.draw.uniform_buffer = 0;
+    state.Apply();

    // Attach vertex data to VAO
-    glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_position, 2, GL_FLOAT, GL_FALSE,
-                              offsetof(ScreenRectVertex, position));
-    glVertexArrayAttribFormat(vertex_array.handle, attrib_tex_coord, 2, GL_FLOAT, GL_FALSE,
-                              offsetof(ScreenRectVertex, tex_coord));
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_position, 0);
-    glVertexArrayAttribBinding(vertex_array.handle, attrib_tex_coord, 0);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_position);
-    glEnableVertexArrayAttrib(vertex_array.handle, attrib_tex_coord);
-    glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
-                              sizeof(ScreenRectVertex));
+    glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
+    glVertexAttribPointer(attrib_position, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
+                          (GLvoid*)offsetof(ScreenRectVertex, position));
+    glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex),
+                          (GLvoid*)offsetof(ScreenRectVertex, tex_coord));
+    glEnableVertexAttribArray(attrib_position);
+    glEnableVertexAttribArray(attrib_tex_coord);

    // Allocate textures for the screen
    screen_info.texture.resource.Create();
@@ -358,12 +358,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
    state.texture_units[0].texture = screen_info.display_texture;
    state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
    // Workaround brigthness problems in SMO by enabling sRGB in the final output
-    // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
+    // if it has been used in the frame
+    // Needed because of this bug in QT
+    // QTBUG-50987
    state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
    state.Apply();
-    glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
+    glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-    // Restore default state
+    // restore default state
    state.framebuffer_srgb.enabled = false;
    state.texture_units[0].texture = 0;
    state.Apply();
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -1,206 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-#include <set>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/engines/shader_header.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-namespace {
-
-/// Merges exit method of two parallel branches.
-constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
-    if (a == ExitMethod::Undetermined) {
-        return b;
-    }
-    if (b == ExitMethod::Undetermined) {
-        return a;
-    }
-    if (a == b) {
-        return a;
-    }
-    return ExitMethod::Conditional;
-}
-
-/**
- * Returns whether the instruction at the specified offset is a 'sched' instruction.
- * Sched instructions always appear before a sequence of 3 instructions.
- */
-constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
-    constexpr u32 SchedPeriod = 4;
-    u32 absolute_offset = offset - main_offset;
-
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-} // namespace
-
-void ShaderIR::Decode() {
-    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
-
-    std::set<u32> labels;
-    const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
-    if (exit_method != ExitMethod::AlwaysEnd) {
-        UNREACHABLE_MSG("Program does not always end");
-    }
-
-    if (labels.empty()) {
-        basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
-        return;
-    }
-
-    labels.insert(main_offset);
-
-    for (const u32 label : labels) {
-        const auto next_it = labels.lower_bound(label + 1);
-        const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
-
-        basic_blocks.insert({label, DecodeRange(label, next_label)});
-    }
-}
-
-ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
-    const auto [iter, inserted] =
-        exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
-    ExitMethod& exit_method = iter->second;
-    if (!inserted)
-        return exit_method;
-
-    for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
-        coverage_begin = std::min(coverage_begin, offset);
-        coverage_end = std::max(coverage_end, offset + 1);
-
-        const Instruction instr = {program_code[offset]};
-        const auto opcode = OpCode::Decode(instr);
-        if (!opcode)
-            continue;
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::EXIT: {
-            // The EXIT instruction can be predicated, which means that the shader can conditionally
-            // end on this instruction. We have to consider the case where the condition is not met
-            // and check the exit method of that other basic block.
-            using Tegra::Shader::Pred;
-            if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                return exit_method = ExitMethod::AlwaysEnd;
-            } else {
-                const ExitMethod not_met = Scan(offset + 1, end, labels);
-                return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
-            }
-        }
-        case OpCode::Id::BRA: {
-            const u32 target = offset + instr.bra.GetBranchTarget();
-            labels.insert(target);
-            const ExitMethod no_jmp = Scan(offset + 1, end, labels);
-            const ExitMethod jmp = Scan(target, end, labels);
-            return exit_method = ParallelExit(no_jmp, jmp);
-        }
-        case OpCode::Id::SSY:
-        case OpCode::Id::PBK: {
-            // The SSY and PBK use a similar encoding as the BRA instruction.
-            UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
-                                 "Constant buffer branching is not supported");
-            const u32 target = offset + instr.bra.GetBranchTarget();
-            labels.insert(target);
-            // Continue scanning for an exit method.
-            break;
-        }
-        }
-    }
-    return exit_method = ExitMethod::AlwaysReturn;
-}
-
-BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
-    BasicBlock basic_block;
-    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
-        pc = DecodeInstr(basic_block, pc);
-    }
-    return basic_block;
-}
-
-u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
-    // Ignore sched instructions when generating code.
-    if (IsSchedInstruction(pc, main_offset)) {
-        return pc + 1;
-    }
-
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    // Decoding failure
-    if (!opcode) {
-        UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
-        return pc + 1;
-    }
-
-    bb.push_back(
-        Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
-
-    using Tegra::Shader::Pred;
-    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
-                         "NeverExecute predicate not implemented");
-
-    static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
-        decoders = {
-            {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
-            {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
-            {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
-            {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
-            {OpCode::Type::Shift, &ShaderIR::DecodeShift},
-            {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
-            {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
-            {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
-            {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
-            {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
-            {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
-            {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
-            {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
-            {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
-            {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
-            {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
-            {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
-            {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
-            {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
-            {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
-            {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
-            {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
-            {OpCode::Type::Video, &ShaderIR::DecodeVideo},
-            {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
-        };
-
-    std::vector<Node> tmp_block;
-    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
-        pc = (this->*decoder->second)(tmp_block, bb, pc);
-    } else {
-        pc = DecodeOther(tmp_block, bb, pc);
-    }
-
-    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
-    // executed.
-    const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
-    const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-
-    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
-        bb.push_back(
-            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
-    } else {
-        for (auto& node : tmp_block) {
-            bb.push_back(std::move(node));
-        }
-    }
-
-    return pc + 1;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -1,155 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::SubOp;
-
-u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-
-    Node op_b = [&]() -> Node {
-        if (instr.is_b_imm) {
-            return GetImmediate19(instr);
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::MOV_C:
-    case OpCode::Id::MOV_R: {
-        // MOV does not have neither 'abs' nor 'neg' bits.
-        SetRegister(bb, instr.gpr0, op_b);
-        break;
-    }
-    case OpCode::Id::FMUL_C:
-    case OpCode::Id::FMUL_R:
-    case OpCode::Id::FMUL_IMM: {
-        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
-        UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
-                             instr.fmul.tab5cb8_2.Value());
-        UNIMPLEMENTED_IF_MSG(
-            instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
-            instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
-
-        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
-
-        // TODO(Rodrigo): Should precise be used when there's a postfactor?
-        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
-
-        if (instr.fmul.postfactor != 0) {
-            auto postfactor = static_cast<s32>(instr.fmul.postfactor);
-
-            // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
-            // logic.
-            if (postfactor >= 4) {
-                postfactor = 7 - postfactor;
-            } else {
-                postfactor = 0 - postfactor;
-            }
-
-            if (postfactor > 0) {
-                value = Operation(OperationCode::FMul, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << postfactor)));
-            } else {
-                value = Operation(OperationCode::FDiv, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << -postfactor)));
-            }
-        }
-
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FADD_C:
-    case OpCode::Id::FADD_R:
-    case OpCode::Id::FADD_IMM: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::MUFU: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-
-        Node value = [&]() {
-            switch (instr.sub_op) {
-            case SubOp::Cos:
-                return Operation(OperationCode::FCos, PRECISE, op_a);
-            case SubOp::Sin:
-                return Operation(OperationCode::FSin, PRECISE, op_a);
-            case SubOp::Ex2:
-                return Operation(OperationCode::FExp2, PRECISE, op_a);
-            case SubOp::Lg2:
-                return Operation(OperationCode::FLog2, PRECISE, op_a);
-            case SubOp::Rcp:
-                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
-            case SubOp::Rsq:
-                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
-            case SubOp::Sqrt:
-                return Operation(OperationCode::FSqrt, PRECISE, op_a);
-            default:
-                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
-                                  static_cast<unsigned>(instr.sub_op.Value()));
-                return Immediate(0);
-            }
-        }();
-        value = GetSaturatedFloat(value, instr.alu.saturate_d);
-
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FMNMX_C:
-    case OpCode::Id::FMNMX_R:
-    case OpCode::Id::FMNMX_IMM: {
-        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-
-        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
-
-        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
-        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
-        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
-        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::RRO_C:
-    case OpCode::Id::RRO_R:
-    case OpCode::Id::RRO_IMM: {
-        // Currently RRO is only implemented as a register move.
-        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
-        SetRegister(bb, instr.gpr0, op_b);
-        LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -1,70 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
-        opcode->get().GetId() == OpCode::Id::HADD2_R) {
-        UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
-    }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
-
-    const bool negate_a =
-        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
-    const bool negate_b =
-        opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
-
-    const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
-
-    // instr.alu_half.type_a
-
-    Node op_b = [&]() {
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_C:
-        case OpCode::Id::HMUL2_C:
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        case OpCode::Id::HADD2_R:
-        case OpCode::Id::HMUL2_R:
-            return GetRegister(instr.gpr20);
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
-
-    Node value = [&]() {
-        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_C:
-        case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, meta, op_a, op_b);
-        case OpCode::Id::HMUL2_C:
-        case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, meta, op_a, op_b);
-        default:
-            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
-            return Immediate(0);
-        }
-    }();
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
-
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -1,51 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-        UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
-    } else {
-        UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
-    }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
-                         "Half float immediate saturation not implemented");
-
-    Node op_a = GetRegister(instr.gpr8);
-    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
-
-    const Node op_b = UnpackHalfImmediate(instr, true);
-
-    Node value = [&]() {
-        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
-        switch (opcode->get().GetId()) {
-        case OpCode::Id::HADD2_IMM:
-            return Operation(OperationCode::HAdd, meta, op_a, op_b);
-        case OpCode::Id::HMUL2_IMM:
-            return Operation(OperationCode::HMul, meta, op_a, op_b);
-        default:
-            UNREACHABLE();
-            return Immediate(0);
-        }
-    }();
-    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
-
-    SetRegister(bb, instr.gpr0, value);
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -1,52 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-
-u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::MOV32_IMM: {
-        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
-        break;
-    }
-    case OpCode::Id::FMUL32_IMM: {
-        Node value =
-            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
-        value = GetSaturatedFloat(value, instr.fmul32.saturate);
-
-        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::FADD32I: {
-        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
-                                                instr.fadd32i.negate_a);
-        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
-                                                instr.fadd32i.negate_b);
-
-        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
-        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
-                          opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -1,287 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/engines/shader_bytecode.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Shader::IAdd3Height;
-using Tegra::Shader::Instruction;
-using Tegra::Shader::OpCode;
-using Tegra::Shader::Pred;
-using Tegra::Shader::Register;
-
-u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
-    const Instruction instr = {program_code[pc]};
-    const auto opcode = OpCode::Decode(instr);
-
-    Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [&]() {
-        if (instr.is_b_imm) {
-            return Immediate(instr.alu.GetSignedImm20_20());
-        } else if (instr.is_b_gpr) {
-            return GetRegister(instr.gpr20);
-        } else {
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
-        }
-    }();
-
-    switch (opcode->get().GetId()) {
-    case OpCode::Id::IADD_C:
-    case OpCode::Id::IADD_R:
-    case OpCode::Id::IADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
-        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
-
-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::IADD3_C:
-    case OpCode::Id::IADD3_R:
-    case OpCode::Id::IADD3_IMM: {
-        Node op_c = GetRegister(instr.gpr39);
-
-        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
-            switch (height) {
-            case IAdd3Height::None:
-                return value;
-            case IAdd3Height::LowerHalfWord:
-                return BitfieldExtract(value, 0, 16);
-            case IAdd3Height::UpperHalfWord:
-                return BitfieldExtract(value, 16, 16);
-            default:
-                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
-                return Immediate(0);
-            }
-        };
-
-        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
-            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
-            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
-            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
-        }
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
-        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
-
-        const Node value = [&]() {
-            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
-            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
-                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
-            }
-            const Node shifted = [&]() {
-                switch (instr.iadd3.mode) {
-                case Tegra::Shader::IAdd3Mode::RightShift:
-                    // TODO(tech4me): According to
-                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
-                    // The addition between op_a and op_b should be done in uint33, more
-                    // investigation required
-                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
-                                     Immediate(16));
-                case Tegra::Shader::IAdd3Mode::LeftShift:
-                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
-                                     Immediate(16));
-                default:
-                    return add_ab;
-                }
-            }();
-            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
-        }();
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::ISCADD_C:
-    case OpCode::Id::ISCADD_R:
-    case OpCode::Id::ISCADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in ISCADD is not implemented");
-
-        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
-        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
-
-        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
-        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::POPC_C:
-    case OpCode::Id::POPC_R:
-    case OpCode::Id::POPC_IMM: {
-        if (instr.popc.invert) {
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-        }
-        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::SEL_C:
-    case OpCode::Id::SEL_R:
-    case OpCode::Id::SEL_IMM: {
-        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
-        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::LOP_C:
-    case OpCode::Id::LOP_R:
-    case OpCode::Id::LOP_IMM: {
-        if (instr.alu.lop.invert_a)
-            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
-        if (instr.alu.lop.invert_b)
-            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
-
-        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
-                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
-                            instr.generates_cc);
-        break;
-    }
-    case OpCode::Id::LOP3_C:
-    case OpCode::Id::LOP3_R:
-    case OpCode::Id::LOP3_IMM: {
-        const Node op_c = GetRegister(instr.gpr39);
-        const Node lut = [&]() {
-            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
-                return Immediate(instr.alu.lop3.GetImmLut28());
-            } else {
-                return Immediate(instr.alu.lop3.GetImmLut48());
-            }
-        }();
-
-        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
-        break;
-    }
-    case OpCode::Id::IMNMX_C:
-    case OpCode::Id::IMNMX_R:
-    case OpCode::Id::IMNMX_IMM: {
-        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-
-        const bool is_signed = instr.imnmx.is_signed;
-
-        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
-        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
-        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
-        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
-
-        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
-        break;
-    }
-    case OpCode::Id::LEA_R2:
-    case OpCode::Id::LEA_R1:
-    case OpCode::Id::LEA_IMM:
-    case OpCode::Id::LEA_RZ:
-    case OpCode::Id::LEA_HI: {
-        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::LEA_R2: {
-                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
-                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
-            }
-
-            case OpCode::Id::LEA_R1: {
-                const bool neg = instr.lea.r1.neg != 0;
-                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        GetRegister(instr.gpr20),
-                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
-            }
-
-            case OpCode::Id::LEA_IMM: {
-                const bool neg = instr.lea.imm.neg != 0;
-                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
-                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
-            }
-
-            case OpCode::Id::LEA_RZ: {
-                const bool neg = instr.lea.rz.neg != 0;
-                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
-                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
-                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
-            }
-
-            case OpCode::Id::LEA_HI:
-            default:
-                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
-
-                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
-                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
-            }
-        }();
-
-        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
-                             "Unhandled LEA Predicate");
-
-        const Node shifted_c =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
-        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
-        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
-
-        SetRegister(bb, instr.gpr0, value);
-
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
-    }
-
-    return pc;
-}
-
-void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
-                                    Node imm_lut, bool sets_cc) {
-    constexpr u32 lop_iterations = 32;
-    const Node one = Immediate(1);
-    const Node two = Immediate(2);
-
-    Node value{};
-    for (u32 i = 0; i < lop_iterations; ++i) {
-        const Node shift_amount = Immediate(i);
-
-        const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
-        const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
-
-        const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
-        const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
-        const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
-
-        const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
-        const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
-        const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
-
-        const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
-        const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
-
-        const Node shifted_bit =
-            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
-        const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
-
-        const Node right =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
-
-        if (i > 0) {
-            value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
-        } else {
-            value = right;
-        }
-    }
-
-    SetInternalFlagsFromInteger(bb, value, sets_cc);
-    SetRegister(bb, dest, value);
-}
-
-} // namespace VideoCommon::Shader
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
bunnei	c7f9124824	gpu_thread: Handle cache management before DMA commands.	2019-01-12 02:41:22 -05:00
bunnei	19ce7abf07	configure_graphics: Disallow changing use_asynchronous_gpu_emulation while running.	2019-01-12 01:36:47 -05:00
bunnei	0bad8394e6	gpu: Move flush and invalidate to GPU thread.	2019-01-12 01:36:47 -05:00
bunnei	9799dcdb7f	gl_rasterizer: Flush and invalidate when GPU thread is idle.	2019-01-12 01:36:46 -05:00
bunnei	1690ea9902	gpu: Move command processing to another thread.	2019-01-12 01:36:46 -05:00
bunnei	208c599463	gpu: Refactor command and swap buffers interface for asynch.	2019-01-12 01:36:46 -05:00
bunnei	85b2c3b051	gpu: Refactor to take RendererBase instead of RasterizerInterface.	2019-01-12 01:36:45 -05:00
bunnei	7b2041a32e	frontend: Refactor ScopeAcquireWindowContext out of renderer_opengl.	2019-01-12 01:36:45 -05:00
bunnei	5daa646d62	settings: Add new graphics setting for use_asynchronous_gpu_emulation.	2019-01-12 01:36:45 -05:00
bunnei	6e589d9d59	memory: Remove HLE lock on Read/Write.	2019-01-12 01:36:45 -05:00
bunnei	353d066264	core: Set is_powered_on before GPU is initialized.	2019-01-12 01:36:44 -05:00