add UUID validation check

Co-authored-by: VolcaEM <63682805+VolcaEM@users.noreply.github.com>
Validate uuid and fix returns
2020-05-24 01:13:49 +12:00 · 2020-05-18 11:10:12 +12:00 · 2020-05-18 10:37:25 +12:00 · 2020-05-18 01:01:40 +12:00 · 2020-05-18 00:55:32 +12:00 · 2020-05-18 00:08:41 +12:00
43 changed files with 420 additions and 814 deletions
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -12,17 +12,17 @@ namespace SystemVersionData {
 // This section should reflect the best system version to describe yuzu's HLE api.
 // TODO(DarkLordZach): Update when HLE gets better.

-constexpr u8 VERSION_MAJOR = 10;
-constexpr u8 VERSION_MINOR = 0;
-constexpr u8 VERSION_MICRO = 2;
+constexpr u8 VERSION_MAJOR = 5;
+constexpr u8 VERSION_MINOR = 1;
+constexpr u8 VERSION_MICRO = 0;

-constexpr u8 REVISION_MAJOR = 1;
+constexpr u8 REVISION_MAJOR = 3;
 constexpr u8 REVISION_MINOR = 0;

 constexpr char PLATFORM_STRING[] = "NX";
-constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc";
-constexpr char DISPLAY_VERSION[] = "10.0.2";
-constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0";
+constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
+constexpr char DISPLAY_VERSION[] = "5.1.0";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";

 } // namespace SystemVersionData

--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -38,11 +38,10 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing,
    cur_entry.sampling_number = last_entry.sampling_number + 1;
    cur_entry.sampling_number2 = cur_entry.sampling_number;

-    cur_entry.key.fill(0);
-    cur_entry.modifier = 0;
-
    for (std::size_t i = 0; i < keyboard_keys.size(); ++i) {
-        cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE));
+        for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) {
+            cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k);
+        }
    }

    for (std::size_t i = 0; i < keyboard_mods.size(); ++i) {
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -177,8 +177,7 @@ private:
    void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_NIFM, "called");

-        ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c,
-                   "SfNetworkProfileData is not the correct size");
+        ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size");
        u128 uuid{};
        auto buffer = ctx.ReadBuffer();
        std::memcpy(&uuid, buffer.data() + 8, sizeof(u128));
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -112,7 +112,6 @@ void LogSettings() {
    LogSetting("Renderer_UseAsynchronousGpuEmulation",
               Settings::values.use_asynchronous_gpu_emulation);
    LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
-    LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
    LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
    LogSetting("Audio_OutputEngine", Settings::values.sink_id);
    LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -446,7 +446,6 @@ struct Values {
    GPUAccuracy gpu_accuracy;
    bool use_asynchronous_gpu_emulation;
    bool use_vsync;
-    bool use_assembly_shaders;
    bool force_30fps_mode;
    bool use_fast_gpu_time;

--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -201,7 +201,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation);
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
-    AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
    AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
 }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_library(video_core STATIC
    buffer_cache/buffer_block.h
    buffer_cache/buffer_cache.h
-    buffer_cache/map_interval.cpp
    buffer_cache/map_interval.h
    dirty_flags.cpp
    dirty_flags.h
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,12 +12,11 @@
 #include <utility>
 #include <vector>

-#include <boost/container/small_vector.hpp>
+#include <boost/icl/interval_map.hpp>
 #include <boost/icl/interval_set.hpp>
-#include <boost/intrusive/set.hpp>
+#include <boost/range/iterator_range.hpp>

 #include "common/alignment.h"
-#include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/core.h"
@@ -30,12 +29,10 @@

 namespace VideoCommon {

+using MapInterval = std::shared_ptr<MapIntervalBase>;
+
 template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
 class BufferCache {
-    using IntervalSet = boost::icl::interval_set<VAddr>;
-    using IntervalType = typename IntervalSet::interval_type;
-    using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
-
 public:
    using BufferInfo = std::pair<BufferType, u64>;

@@ -43,12 +40,14 @@ public:
                            bool is_written = false, bool use_fast_cbuf = false) {
        std::lock_guard lock{mutex};

-        const auto& memory_manager = system.GPU().MemoryManager();
-        const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr_opt =
+            system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+
        if (!cpu_addr_opt) {
            return {GetEmptyBuffer(size), 0};
        }
-        const VAddr cpu_addr = *cpu_addr_opt;
+
+        VAddr cpu_addr = *cpu_addr_opt;

        // Cache management is a big overhead, so only cache entries with a given size.
        // TODO: Figure out which size is the best for given games.
@@ -56,45 +55,38 @@ public:
        if (use_fast_cbuf || size < max_stream_size) {
            if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
                auto& memory_manager = system.GPU().MemoryManager();
-                const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
                if (use_fast_cbuf) {
-                    u8* dest;
-                    if (is_granular) {
-                        dest = memory_manager.GetPointer(gpu_addr);
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return ConstBufferUpload(host_ptr, size);
                    } else {
                        staging_buffer.resize(size);
-                        dest = staging_buffer.data();
-                        memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return ConstBufferUpload(staging_buffer.data(), size);
                    }
-                    return ConstBufferUpload(dest, size);
-                }
-                if (is_granular) {
-                    u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
-                    return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
-                        std::memcpy(dest, host_ptr, size);
-                    });
                } else {
-                    return StreamBufferUpload(
-                        size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
-                            memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
-                        });
+                    if (memory_manager.IsGranularRange(gpu_addr, size)) {
+                        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+                        return StreamBufferUpload(host_ptr, size, alignment);
+                    } else {
+                        staging_buffer.resize(size);
+                        memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+                        return StreamBufferUpload(staging_buffer.data(), size, alignment);
+                    }
                }
            }
        }

-        OwnerBuffer block = GetBlock(cpu_addr, size);
-        MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
-        if (!map) {
-            return {GetEmptyBuffer(size), 0};
-        }
+        auto block = GetBlock(cpu_addr, size);
+        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
        if (is_written) {
            map->MarkAsModified(true, GetModifiedTicks());
            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
                MarkForAsyncFlush(map);
            }
-            if (!map->is_written) {
-                map->is_written = true;
-                MarkRegionAsWritten(map->start, map->end - 1);
+            if (!map->IsWritten()) {
+                map->MarkAsWritten(true);
+                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
            }
        }

@@ -105,9 +97,7 @@ public:
    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
                                std::size_t alignment = 4) {
        std::lock_guard lock{mutex};
-        return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
-            std::memcpy(dest, raw_pointer, size);
-        });
+        return StreamBufferUpload(raw_pointer, size, alignment);
    }

    void Map(std::size_t max_size) {
@@ -142,11 +132,12 @@ public:
    void FlushRegion(VAddr addr, std::size_t size) {
        std::lock_guard lock{mutex};

-        VectorMapInterval objects = GetMapsInRange(addr, size);
-        std::sort(objects.begin(), objects.end(),
-                  [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
-        for (MapInterval* object : objects) {
-            if (object->is_modified && object->is_registered) {
+        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
+            return a->GetModificationTick() < b->GetModificationTick();
+        });
+        for (auto& object : objects) {
+            if (object->IsModified() && object->IsRegistered()) {
                mutex.unlock();
                FlushMap(object);
                mutex.lock();
@@ -157,9 +148,9 @@ public:
    bool MustFlushRegion(VAddr addr, std::size_t size) {
        std::lock_guard lock{mutex};

-        const VectorMapInterval objects = GetMapsInRange(addr, size);
-        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
-            return map->is_modified && map->is_registered;
+        const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
+            return map->IsModified() && map->IsRegistered();
        });
    }

@@ -167,8 +158,9 @@ public:
    void InvalidateRegion(VAddr addr, u64 size) {
        std::lock_guard lock{mutex};

-        for (auto& object : GetMapsInRange(addr, size)) {
-            if (object->is_registered) {
+        std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        for (auto& object : objects) {
+            if (object->IsRegistered()) {
                Unregister(object);
            }
        }
@@ -177,10 +169,10 @@ public:
    void OnCPUWrite(VAddr addr, std::size_t size) {
        std::lock_guard lock{mutex};

-        for (MapInterval* object : GetMapsInRange(addr, size)) {
-            if (object->is_memory_marked && object->is_registered) {
+        for (const auto& object : GetMapsInRange(addr, size)) {
+            if (object->IsMemoryMarked() && object->IsRegistered()) {
                UnmarkMemory(object);
-                object->is_sync_pending = true;
+                object->SetSyncPending(true);
                marked_for_unregister.emplace_back(object);
            }
        }
@@ -189,9 +181,9 @@ public:
    void SyncGuestHost() {
        std::lock_guard lock{mutex};

-        for (auto& object : marked_for_unregister) {
-            if (object->is_registered) {
-                object->is_sync_pending = false;
+        for (const auto& object : marked_for_unregister) {
+            if (object->IsRegistered()) {
+                object->SetSyncPending(false);
                Unregister(object);
            }
        }
@@ -200,9 +192,9 @@ public:

    void CommitAsyncFlushes() {
        if (uncommitted_flushes) {
-            auto commit_list = std::make_shared<std::list<MapInterval*>>();
-            for (MapInterval* map : *uncommitted_flushes) {
-                if (map->is_registered && map->is_modified) {
+            auto commit_list = std::make_shared<std::list<MapInterval>>();
+            for (auto& map : *uncommitted_flushes) {
+                if (map->IsRegistered() && map->IsModified()) {
                    // TODO(Blinkhawk): Implement backend asynchronous flushing
                    // AsyncFlushMap(map)
                    commit_list->push_back(map);
@@ -236,8 +228,8 @@ public:
            committed_flushes.pop_front();
            return;
        }
-        for (MapInterval* map : *flush_list) {
-            if (map->is_registered) {
+        for (MapInterval& map : *flush_list) {
+            if (map->IsRegistered()) {
                // TODO(Blinkhawk): Replace this for reading the asynchronous flush
                FlushMap(map);
            }
@@ -273,60 +265,61 @@ protected:
    }

    /// Register an object into the cache
-    MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
-        const VAddr cpu_addr = new_map.start;
+    void Register(const MapInterval& new_map, bool inherit_written = false) {
+        const VAddr cpu_addr = new_map->GetStart();
        if (!cpu_addr) {
            LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
-                         new_map.gpu_addr);
-            return nullptr;
-        }
-        const std::size_t size = new_map.end - new_map.start;
-        new_map.is_registered = true;
-        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
-        new_map.is_memory_marked = true;
-        if (inherit_written) {
-            MarkRegionAsWritten(new_map.start, new_map.end - 1);
-            new_map.is_written = true;
-        }
-        MapInterval* const storage = mapped_addresses_allocator.Allocate();
-        *storage = new_map;
-        mapped_addresses.insert(*storage);
-        return storage;
-    }
-
-    void UnmarkMemory(MapInterval* map) {
-        if (!map->is_memory_marked) {
+                         new_map->GetGpuAddress());
            return;
        }
-        const std::size_t size = map->end - map->start;
-        rasterizer.UpdatePagesCachedCount(map->start, size, -1);
-        map->is_memory_marked = false;
+        const std::size_t size = new_map->GetEnd() - new_map->GetStart();
+        new_map->MarkAsRegistered(true);
+        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
+        mapped_addresses.insert({interval, new_map});
+        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+        new_map->SetMemoryMarked(true);
+        if (inherit_written) {
+            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
+            new_map->MarkAsWritten(true);
+        }
+    }
+
+    void UnmarkMemory(const MapInterval& map) {
+        if (!map->IsMemoryMarked()) {
+            return;
+        }
+        const std::size_t size = map->GetEnd() - map->GetStart();
+        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
+        map->SetMemoryMarked(false);
    }

    /// Unregisters an object from the cache
-    void Unregister(MapInterval* map) {
+    void Unregister(const MapInterval& map) {
        UnmarkMemory(map);
-        map->is_registered = false;
-        if (map->is_sync_pending) {
-            map->is_sync_pending = false;
+        map->MarkAsRegistered(false);
+        if (map->IsSyncPending()) {
            marked_for_unregister.remove(map);
+            map->SetSyncPending(false);
        }
-        if (map->is_written) {
-            UnmarkRegionAsWritten(map->start, map->end - 1);
+        if (map->IsWritten()) {
+            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
        }
-        const auto it = mapped_addresses.find(*map);
-        ASSERT(it != mapped_addresses.end());
-        mapped_addresses.erase(it);
-        mapped_addresses_allocator.Release(map);
+        const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
+        mapped_addresses.erase(delete_interval);
    }

 private:
-    MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
-                            std::size_t size) {
-        const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
+    MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
+        return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
+    }
+
+    MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
+                           const std::size_t size) {
+        std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
        if (overlaps.empty()) {
            auto& memory_manager = system.GPU().MemoryManager();
            const VAddr cpu_addr_end = cpu_addr + size;
+            MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
            if (memory_manager.IsGranularRange(gpu_addr, size)) {
                u8* host_ptr = memory_manager.GetPointer(gpu_addr);
                UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
@@ -335,12 +328,13 @@ private:
                memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
                UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
            }
-            return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
+            Register(new_map);
+            return new_map;
        }

        const VAddr cpu_addr_end = cpu_addr + size;
        if (overlaps.size() == 1) {
-            MapInterval* const current_map = overlaps[0];
+            MapInterval& current_map = overlaps[0];
            if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
                return current_map;
            }
@@ -350,39 +344,35 @@ private:
        bool write_inheritance = false;
        bool modified_inheritance = false;
        // Calculate new buffer parameters
-        for (MapInterval* overlap : overlaps) {
-            new_start = std::min(overlap->start, new_start);
-            new_end = std::max(overlap->end, new_end);
-            write_inheritance |= overlap->is_written;
-            modified_inheritance |= overlap->is_modified;
+        for (auto& overlap : overlaps) {
+            new_start = std::min(overlap->GetStart(), new_start);
+            new_end = std::max(overlap->GetEnd(), new_end);
+            write_inheritance |= overlap->IsWritten();
+            modified_inheritance |= overlap->IsModified();
        }
        GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
        for (auto& overlap : overlaps) {
            Unregister(overlap);
        }
        UpdateBlock(block, new_start, new_end, overlaps);
-
-        const MapInterval new_map{new_start, new_end, new_gpu_addr};
-        MapInterval* const map = Register(new_map, write_inheritance);
-        if (!map) {
-            return nullptr;
-        }
+        MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
        if (modified_inheritance) {
-            map->MarkAsModified(true, GetModifiedTicks());
+            new_map->MarkAsModified(true, GetModifiedTicks());
            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
-                MarkForAsyncFlush(map);
+                MarkForAsyncFlush(new_map);
            }
        }
-        return map;
+        Register(new_map, write_inheritance);
+        return new_map;
    }

    void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
-                     const VectorMapInterval& overlaps) {
+                     std::vector<MapInterval>& overlaps) {
        const IntervalType base_interval{start, end};
        IntervalSet interval_set{};
        interval_set.add(base_interval);
        for (auto& overlap : overlaps) {
-            const IntervalType subtract{overlap->start, overlap->end};
+            const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
            interval_set.subtract(subtract);
        }
        for (auto& interval : interval_set) {
@@ -396,24 +386,18 @@ private:
        }
    }

-    VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
-        VectorMapInterval result;
+    std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
        if (size == 0) {
-            return result;
+            return {};
        }

-        const VAddr addr_end = addr + size;
-        auto it = mapped_addresses.lower_bound(addr);
-        if (it != mapped_addresses.begin()) {
-            --it;
+        std::vector<MapInterval> objects{};
+        const IntervalType interval{addr, addr + size};
+        for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
+            objects.push_back(pair.second);
        }
-        while (it != mapped_addresses.end() && it->start < addr_end) {
-            if (it->Overlaps(addr, addr_end)) {
-                result.push_back(&*it);
-            }
-            ++it;
-        }
-        return result;
+
+        return objects;
    }

    /// Returns a ticks counter used for tracking when cached objects were last modified
@@ -421,20 +405,20 @@ private:
        return ++modified_ticks;
    }

-    void FlushMap(MapInterval* map) {
-        const std::size_t size = map->end - map->start;
-        OwnerBuffer block = blocks[map->start >> block_page_bits];
+    void FlushMap(MapInterval map) {
+        std::size_t size = map->GetEnd() - map->GetStart();
+        OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
        staging_buffer.resize(size);
-        DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
-        system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
+        DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
+        system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
        map->MarkAsModified(false, 0);
    }

-    template <typename Callable>
-    BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
+    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
+                                  std::size_t alignment) {
        AlignBuffer(alignment);
        const std::size_t uploaded_offset = buffer_offset;
-        callable(buffer_ptr);
+        std::memcpy(buffer_ptr, raw_pointer, size);

        buffer_ptr += size;
        buffer_offset += size;
@@ -531,7 +515,7 @@ private:
            } else {
                written_pages[page_start] = 1;
            }
-            ++page_start;
+            page_start++;
        }
    }

@@ -547,7 +531,7 @@ private:
                    written_pages.erase(it);
                }
            }
-            ++page_start;
+            page_start++;
        }
    }

@@ -558,14 +542,14 @@ private:
            if (written_pages.count(page_start) > 0) {
                return true;
            }
-            ++page_start;
+            page_start++;
        }
        return false;
    }

-    void MarkForAsyncFlush(MapInterval* map) {
+    void MarkForAsyncFlush(MapInterval& map) {
        if (!uncommitted_flushes) {
-            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
+            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
        }
        uncommitted_flushes->insert(map);
    }
@@ -582,9 +566,10 @@ private:
    u64 buffer_offset = 0;
    u64 buffer_offset_base = 0;

-    MapIntervalAllocator mapped_addresses_allocator;
-    boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
-        mapped_addresses;
+    using IntervalSet = boost::icl::interval_set<VAddr>;
+    using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
+    using IntervalType = typename IntervalCache::interval_type;
+    IntervalCache mapped_addresses;

    static constexpr u64 write_page_bit = 11;
    std::unordered_map<u64, u32> written_pages;
@@ -598,10 +583,10 @@ private:
    u64 modified_ticks = 0;

    std::vector<u8> staging_buffer;
-    std::list<MapInterval*> marked_for_unregister;
+    std::list<MapInterval> marked_for_unregister;

-    std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
-    std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
+    std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;

    std::recursive_mutex mutex;
 };
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
-    FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
-    *new_chunk = std::make_unique<Chunk>();
-    FillFreeList(**new_chunk);
-    new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
-    const std::size_t old_size = free_list.size();
-    free_list.resize(old_size + chunk.data.size());
-    std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
-                   [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -4,89 +4,104 @@

 #pragma once

-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
 #include "common/common_types.h"
 #include "video_core/gpu.h"

 namespace VideoCommon {

-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
-    MapInterval() = default;
-
-    /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
-    explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
-        : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
-    bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
-        return start <= other_start && other_end <= end;
-    }
-
-    bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
-        return start < other_end && other_start < end;
-    }
-
-    void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
-        is_modified = is_modified_;
-        ticks = ticks_;
-    }
-
-    boost::intrusive::set_member_hook<> member_hook_;
-    VAddr start = 0;
-    VAddr end = 0;
-    GPUVAddr gpu_addr = 0;
-    u64 ticks = 0;
-    bool is_written = false;
-    bool is_modified = false;
-    bool is_registered = false;
-    bool is_memory_marked = false;
-    bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
-    constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
-        return lhs.start < rhs.start;
-    }
-};
-
-class MapIntervalAllocator {
+class MapIntervalBase {
 public:
-    MapIntervalAllocator();
-    ~MapIntervalAllocator();
+    MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
+        : start{start}, end{end}, gpu_addr{gpu_addr} {}

-    MapInterval* Allocate() {
-        if (free_list.empty()) {
-            AllocateNewChunk();
-        }
-        MapInterval* const interval = free_list.back();
-        free_list.pop_back();
-        return interval;
+    void SetCpuAddress(VAddr new_cpu_addr) {
+        cpu_addr = new_cpu_addr;
    }

-    void Release(MapInterval* interval) {
-        free_list.push_back(interval);
+    VAddr GetCpuAddress() const {
+        return cpu_addr;
+    }
+
+    GPUVAddr GetGpuAddress() const {
+        return gpu_addr;
+    }
+
+    bool IsInside(const VAddr other_start, const VAddr other_end) const {
+        return (start <= other_start && other_end <= end);
+    }
+
+    bool operator==(const MapIntervalBase& rhs) const {
+        return std::tie(start, end) == std::tie(rhs.start, rhs.end);
+    }
+
+    bool operator!=(const MapIntervalBase& rhs) const {
+        return !operator==(rhs);
+    }
+
+    void MarkAsRegistered(const bool registered) {
+        is_registered = registered;
+    }
+
+    bool IsRegistered() const {
+        return is_registered;
+    }
+
+    void SetMemoryMarked(bool is_memory_marked_) {
+        is_memory_marked = is_memory_marked_;
+    }
+
+    bool IsMemoryMarked() const {
+        return is_memory_marked;
+    }
+
+    void SetSyncPending(bool is_sync_pending_) {
+        is_sync_pending = is_sync_pending_;
+    }
+
+    bool IsSyncPending() const {
+        return is_sync_pending;
+    }
+
+    VAddr GetStart() const {
+        return start;
+    }
+
+    VAddr GetEnd() const {
+        return end;
+    }
+
+    void MarkAsModified(const bool is_modified_, const u64 tick) {
+        is_modified = is_modified_;
+        ticks = tick;
+    }
+
+    bool IsModified() const {
+        return is_modified;
+    }
+
+    u64 GetModificationTick() const {
+        return ticks;
+    }
+
+    void MarkAsWritten(const bool is_written_) {
+        is_written = is_written_;
+    }
+
+    bool IsWritten() const {
+        return is_written;
    }

 private:
-    struct Chunk {
-        std::unique_ptr<Chunk> next;
-        std::array<MapInterval, 0x8000> data;
-    };
-
-    void AllocateNewChunk();
-
-    void FillFreeList(Chunk& chunk);
-
-    std::vector<MapInterval*> free_list;
-    std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-
-    Chunk first_chunk;
+    VAddr start;
+    VAddr end;
+    GPUVAddr gpu_addr;
+    VAddr cpu_addr{};
+    bool is_written{};
+    bool is_modified{};
+    bool is_registered{};
+    bool is_memory_marked{};
+    bool is_sync_pending{};
+    u64 ticks{};
 };

 } // namespace VideoCommon
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -56,27 +56,9 @@ public:
        last_modified_ticks = cache.GetModifiedTicks();
    }

-    void SetMemoryMarked(bool is_memory_marked_) {
-        is_memory_marked = is_memory_marked_;
-    }
-
-    bool IsMemoryMarked() const {
-        return is_memory_marked;
-    }
-
-    void SetSyncPending(bool is_sync_pending_) {
-        is_sync_pending = is_sync_pending_;
-    }
-
-    bool IsSyncPending() const {
-        return is_sync_pending;
-    }
-
 private:
    bool is_registered{};      ///< Whether the object is currently registered with the cache
    bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
-    bool is_memory_marked{};   ///< Whether the object is marking rasterizer memory.
-    bool is_sync_pending{};    ///< Whether the object is pending deletion.
    u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
    VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
 };
@@ -112,30 +94,6 @@ public:
        }
    }

-    void OnCPUWrite(VAddr addr, std::size_t size) {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
-            if (object->IsRegistered()) {
-                UnmarkMemory(object);
-                object->SetSyncPending(true);
-                marked_for_unregister.emplace_back(object);
-            }
-        }
-    }
-
-    void SyncGuestHost() {
-        std::lock_guard lock{mutex};
-
-        for (const auto& object : marked_for_unregister) {
-            if (object->IsRegistered()) {
-                object->SetSyncPending(false);
-                Unregister(object);
-            }
-        }
-        marked_for_unregister.clear();
-    }
-
    /// Invalidates everything in the cache
    void InvalidateAll() {
        std::lock_guard lock{mutex};
@@ -162,32 +120,19 @@ protected:
        interval_cache.add({GetInterval(object), ObjectSet{object}});
        map_cache.insert({object->GetCpuAddr(), object});
        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
-        object->SetMemoryMarked(true);
    }

    /// Unregisters an object from the cache
    virtual void Unregister(const T& object) {
        std::lock_guard lock{mutex};

-        UnmarkMemory(object);
        object->SetIsRegistered(false);
-        if (object->IsSyncPending()) {
-            marked_for_unregister.remove(object);
-            object->SetSyncPending(false);
-        }
+        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
        const VAddr addr = object->GetCpuAddr();
        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
        map_cache.erase(addr);
    }

-    void UnmarkMemory(const T& object) {
-        if (!object->IsMemoryMarked()) {
-            return;
-        }
-        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        object->SetMemoryMarked(false);
-    }
-
    /// Returns a ticks counter used for tracking when cached objects were last modified
    u64 GetModifiedTicks() {
        std::lock_guard lock{mutex};
@@ -249,5 +194,4 @@ private:
    IntervalCache interval_cache; ///< Cache of objects
    u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
    VideoCore::RasterizerInterface& rasterizer;
-    std::list<T> marked_for_unregister;
 };
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,7 +8,6 @@

 #include "common/assert.h"
 #include "common/microprofile.h"
-#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -13,7 +13,6 @@

 #include "common/logging/log.h"
 #include "common/scope_exit.h"
-#include "core/settings.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"

@@ -184,16 +183,10 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_precise_bug = TestPreciseBug();
    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;
-    use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
-                           GLAD_GL_NV_compute_program5;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
    LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
    LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
-
-    if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
-        LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
-    }
 }

 Device::Device(std::nullptr_t) {
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -88,10 +88,6 @@ public:
        return has_fast_buffer_sub_data;
    }

-    bool UseAssemblyShaders() const {
-        return use_assembly_shaders;
-    }
-
 private:
    static bool TestVariableAoffi();
    static bool TestPreciseBug();
@@ -111,7 +107,6 @@ private:
    bool has_precise_bug{};
    bool has_broken_compute{};
    bool has_fast_buffer_sub_data{};
-    bool use_assembly_shaders{};
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,7 +4,6 @@

 #include "common/assert.h"

-#include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_fence_manager.h"

 namespace OpenGL {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -94,30 +94,17 @@ void oglEnable(GLenum cap, bool state) {
 } // Anonymous namespace

 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
-                                   const Device& device, ScreenInfo& info,
-                                   ProgramManager& program_manager, StateTracker& state_tracker)
-    : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
-                                                                            state_tracker},
+                                   ScreenInfo& info, GLShader::ProgramManager& program_manager,
+                                   StateTracker& state_tracker)
+    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
      shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
      fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
    CheckExtensions();
-
-    if (device.UseAssemblyShaders()) {
-        glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-        for (const GLuint cbuf : staging_cbufs) {
-            glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
-                                 nullptr, 0);
-        }
-    }
 }

-RasterizerOpenGL::~RasterizerOpenGL() {
-    if (device.UseAssemblyShaders()) {
-        glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-    }
-}
+RasterizerOpenGL::~RasterizerOpenGL() {}

 void RasterizerOpenGL::CheckExtensions() {
    if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@@ -243,7 +230,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
    MICROPROFILE_SCOPE(OpenGL_Shader);
    auto& gpu = system.GPU().Maxwell3D();
-    std::size_t num_ssbos = 0;
    u32 clip_distances = 0;

    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -275,14 +261,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {

        Shader shader{shader_cache.GetStageProgram(program)};

-        if (device.UseAssemblyShaders()) {
-            // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
-            // all stages share the same bindings.
-            const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
-            ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
-            num_ssbos += num_stage_ssbos;
-        }
-
        // Stage indices are 0 - 5
        const std::size_t stage = index == 0 ? 0 : index - 1;
        SetupDrawConstBuffers(stage, shader);
@@ -548,7 +526,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    SyncFramebufferSRGB();

    buffer_cache.Acquire();
-    current_cbuf = 0;

    std::size_t buffer_size = CalculateVertexArraysSize();

@@ -558,9 +535,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    }

    // Uniform space for the 5 shader stages
-    buffer_size =
-        Common::AlignUp<std::size_t>(buffer_size, 4) +
-        (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
+    buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
+                  (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
+                      Maxwell::MaxShaderStage;

    // Add space for at least 18 constant buffers
    buffer_size += Maxwell::MaxConstBuffers *
@@ -581,14 +558,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    }

    // Setup emulation uniform buffer.
-    if (!device.UseAssemblyShaders()) {
-        MaxwellUniformData ubo;
-        ubo.SetFromRegs(gpu);
-        const auto [buffer, offset] =
-            buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
-    }
+    GLShader::MaxwellUniformData ubo;
+    ubo.SetFromRegs(gpu);
+    const auto [buffer, offset] =
+        buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
+    glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
+                      static_cast<GLsizeiptr>(sizeof(ubo)));

    // Setup shaders and their used resources.
    texture_cache.GuardSamplers(true);
@@ -660,11 +635,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
    }

    buffer_cache.Acquire();
-    current_cbuf = 0;

    auto kernel = shader_cache.GetComputeKernel(code_addr);
    SetupComputeTextures(kernel);
    SetupComputeImages(kernel);
+    program_manager.BindComputeShader(kernel->GetHandle());

    const std::size_t buffer_size =
        Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -677,7 +652,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
    buffer_cache.Unmap();

    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    program_manager.BindCompute(kernel->GetHandle());
    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
    ++num_queued_commands;
 }
@@ -727,15 +701,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
        return;
    }
    texture_cache.OnCPUWrite(addr, size);
-    shader_cache.OnCPUWrite(addr, size);
+    shader_cache.InvalidateRegion(addr, size);
    buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }

 void RasterizerOpenGL::SyncGuestHost() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    texture_cache.SyncGuestHost();
    buffer_cache.SyncGuestHost();
-    shader_cache.SyncGuestHost();
 }

 void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -838,20 +812,14 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 }

 void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
-    static constexpr std::array PARAMETER_LUT = {
-        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
-
    MICROPROFILE_SCOPE(OpenGL_UBO);
    const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
    const auto& shader_stage = stages[stage_index];

-    u32 binding =
-        device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
+    u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
    for (const auto& entry : shader->GetEntries().const_buffers) {
        const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
-        SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
+        SetupConstBuffer(binding++, buffer, entry);
    }
 }

@@ -867,21 +835,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
        buffer.address = config.Address();
        buffer.size = config.size;
        buffer.enabled = mask[entry.GetIndex()];
-        SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
+        SetupConstBuffer(binding++, buffer, entry);
    }
 }

-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
-                                        const Tegra::Engines::ConstBufferInfo& buffer,
+void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
                                        const ConstBufferEntry& entry) {
    if (!buffer.enabled) {
        // Set values to zero to unbind buffers
-        if (device.UseAssemblyShaders()) {
-            glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
-        } else {
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
-                              buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
-        }
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
+                          sizeof(float));
        return;
    }

@@ -890,19 +853,9 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
    const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));

    const auto alignment = device.GetUniformBufferAlignment();
-    auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
-                                                    device.HasFastBufferSubData());
-    if (!device.UseAssemblyShaders()) {
-        glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
-        return;
-    }
-    if (offset != 0) {
-        const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
-        glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
-        cbuf = staging_cbuf;
-        offset = 0;
-    }
-    glBindBufferRangeNV(stage, binding, cbuf, offset, size);
+    const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+                                                          device.HasFastBufferSubData());
+    glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
 }

 void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -910,8 +863,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
    auto& memory_manager{gpu.MemoryManager()};
    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};

-    u32 binding =
-        device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
+    u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
    for (const auto& entry : shader->GetEntries().global_memory_entries) {
        const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
        const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,8 +56,8 @@ struct DrawParameters;
 class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
 public:
    explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
-                              const Device& device, ScreenInfo& info,
-                              ProgramManager& program_manager, StateTracker& state_tracker);
+                              ScreenInfo& info, GLShader::ProgramManager& program_manager,
+                              StateTracker& state_tracker);
    ~RasterizerOpenGL() override;

    void Draw(bool is_indexed, bool is_instanced) override;
@@ -106,7 +106,7 @@ private:
    void SetupComputeConstBuffers(const Shader& kernel);

    /// Configures a constant buffer.
-    void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
+    void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
                          const ConstBufferEntry& entry);

    /// Configures the current global memory entries to use for the draw command.
@@ -224,7 +224,7 @@ private:

    void SetupShaders(GLenum primitive_mode);

-    const Device& device;
+    const Device device;

    TextureCacheOpenGL texture_cache;
    ShaderCacheOpenGL shader_cache;
@@ -236,7 +236,7 @@ private:

    Core::System& system;
    ScreenInfo& screen_info;
-    ProgramManager& program_manager;
+    GLShader::ProgramManager& program_manager;
    StateTracker& state_tracker;

    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@@ -248,12 +248,6 @@ private:
    std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
        enabled_transform_feedback_buffers;

-    static constexpr std::size_t NUM_CONSTANT_BUFFERS =
-        Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-        Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
-    std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
-    std::size_t current_cbuf = 0;
-
    /// Number of commands queued to the OpenGL driver. Reseted on flush.
    std::size_t num_queued_commands = 0;

--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -125,15 +125,6 @@ void OGLProgram::Release() {
    handle = 0;
 }

-void OGLAssemblyProgram::Release() {
-    if (handle == 0) {
-        return;
-    }
-    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
-    glDeleteProgramsARB(1, &handle);
-    handle = 0;
-}
-
 void OGLPipeline::Create() {
    if (handle != 0)
        return;
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -167,22 +167,6 @@ public:
    GLuint handle = 0;
 };

-class OGLAssemblyProgram : private NonCopyable {
-public:
-    OGLAssemblyProgram() = default;
-
-    OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
-
-    ~OGLAssemblyProgram() {
-        Release();
-    }
-
-    /// Deletes the internal OpenGL resource
-    void Release();
-
-    GLuint handle = 0;
-};
-
 class OGLPipeline : private NonCopyable {
 public:
    OGLPipeline() = default;
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -97,24 +97,6 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
    return {};
 }

-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
-    switch (shader_type) {
-    case ShaderType::Vertex:
-        return GL_VERTEX_PROGRAM_NV;
-    case ShaderType::TesselationControl:
-        return GL_TESS_CONTROL_PROGRAM_NV;
-    case ShaderType::TesselationEval:
-        return GL_TESS_EVALUATION_PROGRAM_NV;
-    case ShaderType::Geometry:
-        return GL_GEOMETRY_PROGRAM_NV;
-    case ShaderType::Fragment:
-        return GL_FRAGMENT_PROGRAM_NV;
-    case ShaderType::Compute:
-        return GL_COMPUTE_PROGRAM_NV;
-    }
-    return {};
-}
-
 std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
    return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
 }
@@ -138,43 +120,18 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
    return registry;
 }

-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
-                             const ShaderIR& ir, const Registry& registry,
-                             bool hint_retrievable = false) {
+std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
+                                        u64 unique_identifier, const ShaderIR& ir,
+                                        const Registry& registry, bool hint_retrievable = false) {
    const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
    LOG_INFO(Render_OpenGL, "{}", shader_id);

-    auto program = std::make_shared<ProgramHandle>();
-
-    if (device.UseAssemblyShaders()) {
-        const std::string arb = "Not implemented";
-
-        GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
-        glGenProgramsNV(1, &arb_prog);
-        glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
-                        reinterpret_cast<const GLubyte*>(arb.data()));
-#else
-        glGenProgramsARB(1, &arb_prog);
-        glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
-                                static_cast<GLsizei>(arb.size()), arb.data());
-#endif
-        const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
-        if (err && *err) {
-            LOG_CRITICAL(Render_OpenGL, "{}", err);
-            LOG_INFO(Render_OpenGL, "\n{}", arb);
-        }
-    } else {
-        const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
-        OGLShader shader;
-        shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
-        program->source_program.Create(true, hint_retrievable, shader.handle);
-    }
+    const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
+    OGLShader shader;
+    shader.Create(glsl.c_str(), GetGLShaderType(shader_type));

+    auto program = std::make_shared<OGLProgram>();
+    program->Create(true, hint_retrievable, shader.handle);
    return program;
 }

@@ -196,22 +153,15 @@ std::unordered_set<GLenum> GetSupportedFormats() {

 CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                           std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                           ShaderEntries entries, ProgramSharedPtr program_)
+                           ShaderEntries entries, std::shared_ptr<OGLProgram> program)
    : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
-      size_in_bytes{size_in_bytes}, program{std::move(program_)} {
-    // Assign either the assembly program or source program. We can't have both.
-    handle = program->assembly_program.handle;
-    if (handle == 0) {
-        handle = program->source_program.handle;
-    }
-    ASSERT(handle != 0);
-}
+      size_in_bytes{size_in_bytes}, program{std::move(program)} {}

 CachedShader::~CachedShader() = default;

 GLuint CachedShader::GetHandle() const {
    DEBUG_ASSERT(registry->IsConsistent());
-    return handle;
+    return program->handle;
 }

 Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
@@ -289,11 +239,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
        return;
    }

-    std::vector<ShaderDiskCachePrecompiled> gl_cache;
-    if (!device.UseAssemblyShaders()) {
-        // Only load precompiled cache when we are not using assembly shaders
-        gl_cache = disk_cache.LoadPrecompiled();
-    }
+    const std::vector gl_cache = disk_cache.LoadPrecompiled();
    const auto supported_formats = GetSupportedFormats();

    // Track if precompiled cache was altered during loading to know if we have to
@@ -332,7 +278,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
            auto registry = MakeRegistry(entry);
            const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);

-            ProgramSharedPtr program;
+            std::shared_ptr<OGLProgram> program;
            if (precompiled_entry) {
                // If the shader is precompiled, attempt to load it with
                program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@@ -386,11 +332,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
        return;
    }

-    if (device.UseAssemblyShaders()) {
-        // Don't store precompiled binaries for assembly shaders.
-        return;
-    }
-
    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
    // before precompiling them

@@ -398,7 +339,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
        const u64 id = (*transferable)[i].unique_identifier;
        const auto it = find_precompiled(id);
        if (it == gl_cache.end()) {
-            const GLuint program = runtime_cache.at(id).program->source_program.handle;
+            const GLuint program = runtime_cache.at(id).program->handle;
            disk_cache.SavePrecompiled(id, program);
            precompiled_cache_altered = true;
        }
@@ -409,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
    }
 }

-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
+std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
    const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
    const std::unordered_set<GLenum>& supported_formats) {
    if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@@ -417,15 +358,15 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
        return {};
    }

-    auto program = std::make_shared<ProgramHandle>();
-    GLuint& handle = program->source_program.handle;
-    handle = glCreateProgram();
-    glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
-    glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
+    auto program = std::make_shared<OGLProgram>();
+    program->handle = glCreateProgram();
+    glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    glProgramBinary(program->handle, precompiled_entry.binary_format,
+                    precompiled_entry.binary.data(),
                    static_cast<GLsizei>(precompiled_entry.binary.size()));

    GLint link_status;
-    glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
+    glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
    if (link_status == GL_FALSE) {
        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
        return {};
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -43,14 +43,8 @@ struct UnspecializedShader;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-struct ProgramHandle {
-    OGLProgram source_program;
-    OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
 struct PrecompiledShader {
-    ProgramSharedPtr program;
+    std::shared_ptr<OGLProgram> program;
    std::shared_ptr<VideoCommon::Shader::Registry> registry;
    ShaderEntries entries;
 };
@@ -93,13 +87,12 @@ public:
 private:
    explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
                          std::shared_ptr<VideoCommon::Shader::Registry> registry,
-                          ShaderEntries entries, ProgramSharedPtr program);
+                          ShaderEntries entries, std::shared_ptr<OGLProgram> program);

    std::shared_ptr<VideoCommon::Shader::Registry> registry;
    ShaderEntries entries;
    std::size_t size_in_bytes = 0;
-    ProgramSharedPtr program;
-    GLuint handle = 0;
+    std::shared_ptr<OGLProgram> program;
 };

 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -122,7 +115,7 @@ protected:
    void FlushObjectInner(const Shader& object) override {}

 private:
-    ProgramSharedPtr GeneratePrecompiledProgram(
+    std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
        const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
        const std::unordered_set<GLenum>& supported_formats);

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1538,9 +1538,7 @@ private:
        Expression target;
        if (const auto gpr = std::get_if<GprNode>(&*dest)) {
            if (gpr->GetIndex() == Register::ZeroIndex) {
-                // Writing to Register::ZeroIndex is a no op but we still have to visit the source
-                // as it might have side effects.
-                code.AddLine("{};", Visit(src).GetCode());
+                // Writing to Register::ZeroIndex is a no op
                return {};
            }
            target = {GetRegister(gpr->GetIndex()), Type::Float};
@@ -2311,18 +2309,6 @@ private:
        return {"gl_SubGroupInvocationARB", Type::Uint};
    }

-    template <const std::string_view& comparison>
-    Expression ThreadMask(Operation) {
-        if (device.HasWarpIntrinsics()) {
-            return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
-        }
-        if (device.HasShaderBallot()) {
-            return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
-        }
-        LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
-        return {"0U", Type::Uint};
-    }
-
    Expression ShuffleIndexed(Operation operation) {
        std::string value = VisitOperand(operation, 0).AsFloat();

@@ -2335,15 +2321,6 @@ private:
        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
    }

-    Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
-            LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
-            return {};
-        }
-        code.AddLine("barrier();");
-        return {};
-    }
-
    Expression MemoryBarrierGL(Operation) {
        code.AddLine("memoryBarrier();");
        return {};
@@ -2360,12 +2337,6 @@ private:
        static constexpr std::string_view NotEqual = "!=";
        static constexpr std::string_view GreaterEqual = ">=";

-        static constexpr std::string_view Eq = "Eq";
-        static constexpr std::string_view Ge = "Ge";
-        static constexpr std::string_view Gt = "Gt";
-        static constexpr std::string_view Le = "Le";
-        static constexpr std::string_view Lt = "Lt";
-
        static constexpr std::string_view Add = "Add";
        static constexpr std::string_view Min = "Min";
        static constexpr std::string_view Max = "Max";
@@ -2583,14 +2554,8 @@ private:
        &GLSLDecompiler::VoteEqual,

        &GLSLDecompiler::ThreadId,
-        &GLSLDecompiler::ThreadMask<Func::Eq>,
-        &GLSLDecompiler::ThreadMask<Func::Ge>,
-        &GLSLDecompiler::ThreadMask<Func::Gt>,
-        &GLSLDecompiler::ThreadMask<Func::Le>,
-        &GLSLDecompiler::ThreadMask<Func::Lt>,
        &GLSLDecompiler::ShuffleIndexed,

-        &GLSLDecompiler::Barrier,
        &GLSLDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -6,107 +6,47 @@

 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"

-namespace OpenGL {
+namespace OpenGL::GLShader {

-ProgramManager::ProgramManager(const Device& device) {
-    use_assembly_programs = device.UseAssemblyShaders();
-    if (use_assembly_programs) {
-        glEnable(GL_COMPUTE_PROGRAM_NV);
-    } else {
-        graphics_pipeline.Create();
-        glBindProgramPipeline(graphics_pipeline.handle);
-    }
-}
+ProgramManager::ProgramManager() = default;

 ProgramManager::~ProgramManager() = default;

-void ProgramManager::BindCompute(GLuint program) {
-    if (use_assembly_programs) {
-        glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
-    } else {
-        is_graphics_bound = false;
-        glUseProgram(program);
-    }
+void ProgramManager::Create() {
+    graphics_pipeline.Create();
+    glBindProgramPipeline(graphics_pipeline.handle);
 }

 void ProgramManager::BindGraphicsPipeline() {
-    if (use_assembly_programs) {
-        UpdateAssemblyPrograms();
-    } else {
-        UpdateSourcePrograms();
-    }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
-    if (use_assembly_programs) {
-        if (geometry_enabled) {
-            geometry_enabled = false;
-            old_state.geometry = 0;
-            glDisable(GL_GEOMETRY_PROGRAM_NV);
-        }
-    }
-    glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
-    if (use_assembly_programs) {
-        glBindProgramPipeline(0);
-    } else {
-        glBindProgramPipeline(graphics_pipeline.handle);
-    }
-}
-
-void ProgramManager::UpdateAssemblyPrograms() {
-    const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
-        if (current == old) {
-            return;
-        }
-        if (current == 0) {
-            if (enabled) {
-                enabled = false;
-                glDisable(stage);
-            }
-            return;
-        }
-        if (!enabled) {
-            enabled = true;
-            glEnable(stage);
-        }
-        glBindProgramARB(stage, current);
-    };
-
-    update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
-    update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
-                 old_state.geometry);
-    update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
-                 old_state.fragment);
-
-    old_state = current_state;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
    if (!is_graphics_bound) {
        is_graphics_bound = true;
        glUseProgram(0);
    }

+    // Avoid updating the pipeline when values have no changed
+    if (old_state == current_state) {
+        return;
+    }
+
+    // Workaround for AMD bug
+    static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+                                            GL_FRAGMENT_SHADER_BIT};
    const GLuint handle = graphics_pipeline.handle;
-    const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
-        if (current == old) {
-            return;
-        }
-        glUseProgramStages(handle, stage, current);
-    };
-    update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
-    update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
-    update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
+    glUseProgramStages(handle, all_used_stages, 0);
+    glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+    glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+    glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);

    old_state = current_state;
 }

+void ProgramManager::BindComputeShader(GLuint program) {
+    is_graphics_bound = false;
+    glUseProgram(program);
+}
+
 void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
    const auto& regs = maxwell.regs;

@@ -114,4 +54,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
    y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
 }

-} // namespace OpenGL
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,9 +11,7 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"

-namespace OpenGL {
-
-class Device;
+namespace OpenGL::GLShader {

 /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@@ -30,58 +28,50 @@ static_assert(sizeof(MaxwellUniformData) < 16384,

 class ProgramManager {
 public:
-    explicit ProgramManager(const Device& device);
+    explicit ProgramManager();
    ~ProgramManager();

-    /// Binds a compute program
-    void BindCompute(GLuint program);
+    void Create();

-    /// Updates bound programs.
+    /// Updates the graphics pipeline and binds it.
    void BindGraphicsPipeline();

-    /// Binds an OpenGL pipeline object unsynchronized with the guest state.
-    void BindHostPipeline(GLuint pipeline);
-
-    /// Rewinds BindHostPipeline state changes.
-    void RestoreGuestPipeline();
+    /// Binds a compute shader.
+    void BindComputeShader(GLuint program);

    void UseVertexShader(GLuint program) {
-        current_state.vertex = program;
+        current_state.vertex_shader = program;
    }

    void UseGeometryShader(GLuint program) {
-        current_state.geometry = program;
+        current_state.geometry_shader = program;
    }

    void UseFragmentShader(GLuint program) {
-        current_state.fragment = program;
+        current_state.fragment_shader = program;
    }

 private:
    struct PipelineState {
-        GLuint vertex = 0;
-        GLuint geometry = 0;
-        GLuint fragment = 0;
+        bool operator==(const PipelineState& rhs) const noexcept {
+            return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
+                   geometry_shader == rhs.geometry_shader;
+        }
+
+        bool operator!=(const PipelineState& rhs) const noexcept {
+            return !operator==(rhs);
+        }
+
+        GLuint vertex_shader = 0;
+        GLuint fragment_shader = 0;
+        GLuint geometry_shader = 0;
    };

-    /// Update NV_gpu_program5 programs.
-    void UpdateAssemblyPrograms();
-
-    /// Update GLSL programs.
-    void UpdateSourcePrograms();
-
    OGLPipeline graphics_pipeline;
-
+    OGLPipeline compute_pipeline;
    PipelineState current_state;
    PipelineState old_state;
-
-    bool use_assembly_programs = false;
-
    bool is_graphics_bound = true;
-
-    bool vertex_enabled = false;
-    bool geometry_enabled = false;
-    bool fragment_enabled = false;
 };

-} // namespace OpenGL
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,7 +316,7 @@ public:
 RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
                               Core::Frontend::GraphicsContext& context)
    : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
-      program_manager{device}, has_debug_tool{HasDebugTool()} {}
+      has_debug_tool{HasDebugTool()} {}

 RendererOpenGL::~RendererOpenGL() = default;

@@ -468,9 +468,8 @@ void RendererOpenGL::InitOpenGLObjects() {
    vertex_program.Create(true, false, vertex_shader.handle);
    fragment_program.Create(true, false, fragment_shader.handle);

-    pipeline.Create();
-    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
-    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+    // Create program pipeline
+    program_manager.Create();

    // Generate VBO handle for drawing
    vertex_buffer.Create();
@@ -509,7 +508,7 @@ void RendererOpenGL::CreateRasterizer() {
    if (rasterizer) {
        return;
    }
-    rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
+    rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
                                                    program_manager, state_tracker);
 }

@@ -621,7 +620,10 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
    state_tracker.NotifyClipControl();
    state_tracker.NotifyAlphaTest();

-    program_manager.BindHostPipeline(pipeline.handle);
+    program_manager.UseVertexShader(vertex_program.handle);
+    program_manager.UseGeometryShader(0);
+    program_manager.UseFragmentShader(fragment_program.handle);
+    program_manager.BindGraphicsPipeline();

    glEnable(GL_CULL_FACE);
    if (screen_info.display_srgb) {
@@ -663,8 +665,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {

    glClear(GL_COLOR_BUFFER_BIT);
    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-
-    program_manager.RestoreGuestPipeline();
 }

 bool RendererOpenGL::TryPresent(int timeout_ms) {
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -9,7 +9,6 @@
 #include "common/common_types.h"
 #include "common/math_util.h"
 #include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -96,7 +95,6 @@ private:
    Core::Frontend::EmuWindow& emu_window;
    Core::System& system;
    Core::Frontend::GraphicsContext& context;
-    const Device device;

    StateTracker state_tracker{system};

@@ -104,14 +102,13 @@ private:
    OGLBuffer vertex_buffer;
    OGLProgram vertex_program;
    OGLProgram fragment_program;
-    OGLPipeline pipeline;
    OGLFramebuffer screenshot_framebuffer;

    /// Display information for Switch screen
    ScreenInfo screen_info;

    /// Global dummy shader pipeline
-    ProgramManager program_manager;
+    GLShader::ProgramManager program_manager;

    /// OpenGL framebuffer data
    std::vector<u8> gl_framebuffer_data;
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,7 +7,6 @@
 #include <memory>

 #include "core/core.h"
-#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,7 +7,6 @@
 #include <memory>

 #include "video_core/fence_manager.h"
-#include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/wrapper.h"

 namespace Core {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -329,7 +329,8 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {

        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
        const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
-        const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
+        ASSERT(cpu_addr);
+        const auto shader = TryGet(*cpu_addr);
        ASSERT(shader);

        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -532,14 +532,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
        return;
    }
    texture_cache.OnCPUWrite(addr, size);
-    pipeline_cache.OnCPUWrite(addr, size);
+    pipeline_cache.InvalidateRegion(addr, size);
    buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }

 void RasterizerVulkan::SyncGuestHost() {
    texture_cache.SyncGuestHost();
    buffer_cache.SyncGuestHost();
-    pipeline_cache.SyncGuestHost();
 }

 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,16 +515,6 @@ private:
    void DeclareCommon() {
        thread_id =
            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
-        thread_masks[0] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
-        thread_masks[1] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
-        thread_masks[2] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
-        thread_masks[3] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
-        thread_masks[4] =
-            DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
    }

    void DeclareVertex() {
@@ -1081,7 +1071,8 @@ private:

    void VisitBasicBlock(const NodeBlock& bb) {
        for (const auto& node : bb) {
-            Visit(node);
+            [[maybe_unused]] const Type type = Visit(node).type;
+            ASSERT(type == Type::Void);
        }
    }

@@ -1371,9 +1362,7 @@ private:
        Expression target{};
        if (const auto gpr = std::get_if<GprNode>(&*dest)) {
            if (gpr->GetIndex() == Register::ZeroIndex) {
-                // Writing to Register::ZeroIndex is a no op but we still have to visit its source
-                // because it might have side effects.
-                Visit(src);
+                // Writing to Register::ZeroIndex is a no op
                return {};
            }
            target = {registers.at(gpr->GetIndex()), Type::Float};
@@ -2186,35 +2175,12 @@ private:
        return {OpLoad(t_uint, thread_id), Type::Uint};
    }

-    template <std::size_t index>
-    Expression ThreadMask(Operation) {
-        // TODO(Rodrigo): Handle devices with different warp sizes
-        const Id mask = thread_masks[index];
-        return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
-    }
-
    Expression ShuffleIndexed(Operation operation) {
        const Id value = AsFloat(Visit(operation[0]));
        const Id index = AsUint(Visit(operation[1]));
        return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
    }

-    Expression Barrier(Operation) {
-        if (!ir.IsDecompiled()) {
-            LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
-            return {};
-        }
-
-        const auto scope = spv::Scope::Workgroup;
-        const auto memory = spv::Scope::Workgroup;
-        const auto semantics =
-            spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
-        OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
-                         Constant(t_uint, static_cast<u32>(memory)),
-                         Constant(t_uint, static_cast<u32>(semantics)));
-        return {};
-    }
-
    Expression MemoryBarrierGL(Operation) {
        const auto scope = spv::Scope::Device;
        const auto semantics =
@@ -2673,14 +2639,8 @@ private:
        &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,

        &SPIRVDecompiler::ThreadId,
-        &SPIRVDecompiler::ThreadMask<0>, // Eq
-        &SPIRVDecompiler::ThreadMask<1>, // Ge
-        &SPIRVDecompiler::ThreadMask<2>, // Gt
-        &SPIRVDecompiler::ThreadMask<3>, // Le
-        &SPIRVDecompiler::ThreadMask<4>, // Lt
        &SPIRVDecompiler::ShuffleIndexed,

-        &SPIRVDecompiler::Barrier,
        &SPIRVDecompiler::MemoryBarrierGL,
    };
    static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2803,7 +2763,6 @@ private:
    Id workgroup_id{};
    Id local_invocation_id{};
    Id thread_id{};
-    std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt

    VertexIndices in_indices;
    VertexIndices out_indices;
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -387,6 +387,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    }
    case OpCode::Id::RED: {
        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+        UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
        const auto [real_address, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, true, true);
        if (!real_address || !base_address) {
@@ -395,7 +396,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
        Node value = GetRegister(instr.gpr0);
-        bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
+        bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
        break;
    }
    case OpCode::Id::ATOM: {
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,27 +109,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                return Operation(OperationCode::WorkGroupIdY);
            case SystemVariable::CtaIdZ:
                return Operation(OperationCode::WorkGroupIdZ);
-            case SystemVariable::EqMask:
-            case SystemVariable::LtMask:
-            case SystemVariable::LeMask:
-            case SystemVariable::GtMask:
-            case SystemVariable::GeMask:
-                uses_warps = true;
-                switch (instr.sys20) {
-                case SystemVariable::EqMask:
-                    return Operation(OperationCode::ThreadEqMask);
-                case SystemVariable::LtMask:
-                    return Operation(OperationCode::ThreadLtMask);
-                case SystemVariable::LeMask:
-                    return Operation(OperationCode::ThreadLeMask);
-                case SystemVariable::GtMask:
-                    return Operation(OperationCode::ThreadGtMask);
-                case SystemVariable::GeMask:
-                    return Operation(OperationCode::ThreadGeMask);
-                default:
-                    UNREACHABLE();
-                    return Immediate(0u);
-                }
            default:
                UNIMPLEMENTED_MSG("Unhandled system move: {}",
                                  static_cast<u32>(instr.sys20.Value()));
@@ -293,11 +272,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
        break;
    }
-    case OpCode::Id::BAR: {
-        UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
-        bb.push_back(Operation(OperationCode::Barrier));
-        break;
-    }
    case OpCode::Id::MEMBAR: {
        UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -226,14 +226,8 @@ enum class OperationCode {
    VoteEqual,    /// (bool) -> bool

    ThreadId,       /// () -> uint
-    ThreadEqMask,   /// () -> uint
-    ThreadGeMask,   /// () -> uint
-    ThreadGtMask,   /// () -> uint
-    ThreadLeMask,   /// () -> uint
-    ThreadLtMask,   /// () -> uint
    ShuffleIndexed, /// (uint value, uint index) -> uint

-    Barrier,         /// () -> void
    MemoryBarrierGL, /// () -> void

    Amount,
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -643,8 +643,6 @@ void Config::ReadRendererValues() {
    Settings::values.use_asynchronous_gpu_emulation =
        ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
    Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
-    Settings::values.use_assembly_shaders =
-        ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
    Settings::values.use_fast_gpu_time =
        ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
    Settings::values.force_30fps_mode =
@@ -1092,8 +1090,6 @@ void Config::SaveRendererValues() {
    WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
                 Settings::values.use_asynchronous_gpu_emulation, false);
    WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
-    WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
-                 false);
    WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
    WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);

--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)

    ui->setupUi(this);

-    // TODO: Remove this after assembly shaders are fully integrated
-    ui->use_assembly_shaders->setVisible(false);
-
    SetConfiguration();
 }

@@ -25,8 +22,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
    ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
    ui->use_vsync->setEnabled(runtime_lock);
    ui->use_vsync->setChecked(Settings::values.use_vsync);
-    ui->use_assembly_shaders->setEnabled(runtime_lock);
-    ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
    ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
    ui->force_30fps_mode->setEnabled(runtime_lock);
    ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
@@ -38,7 +33,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
    auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
    Settings::values.gpu_accuracy = gpu_accuracy;
    Settings::values.use_vsync = ui->use_vsync->isChecked();
-    Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
    Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
    Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
    Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -62,16 +62,6 @@
          </property>
         </widget>
        </item>
-        <item>
-         <widget class="QCheckBox" name="use_assembly_shaders">
-          <property name="toolTip">
-           <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
-          </property>
-          <property name="text">
-           <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
-          </property>
-         </widget>
-        </item>
        <item>
         <widget class="QCheckBox" name="force_30fps_mode">
          <property name="text">
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() {

    // The number is the client ID for yuzu, it's used for images and the
    // application name
-    Discord_Initialize("712465656758665259", &handlers, 1, nullptr);
+    Discord_Initialize("471872241299226636", &handlers, 1, nullptr);
 }

 DiscordImpl::~DiscordImpl() {
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -135,6 +135,28 @@ __declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1;
 }
 #endif

+namespace {
+
+QString GetAccountUsername() {
+    const QString nouser = QString::fromStdString("No User");
+    Service::Account::ProfileManager manager;
+    const auto current_user = manager.GetUser(Settings::values.current_user);
+    if (!current_user.has_value() || (current_user == Common::UUID{})) {
+        return nouser;
+    }
+    Service::Account::ProfileBase profile;
+    if (!manager.GetProfileBase(*current_user, profile)) {
+        return nouser;
+    }
+
+    const auto text = Common::StringFromFixedZeroTerminatedBuffer(
+        reinterpret_cast<const char*>(profile.username.data()), profile.username.size());
+
+    return text.empty() ? nouser : QString::fromStdString(text);
+}
+
+} // Anonymous namespace
+
 constexpr int default_mouse_timeout = 2500;

 constexpr u64 DLC_BASE_TITLE_ID_MASK = 0xFFFFFFFFFFFFE000;
@@ -493,6 +515,48 @@ void GMainWindow::InitializeWidgets() {
        statusBar()->addPermanentWidget(label);
    }

+    // Setup Profile button
+    profile_status_button = new QPushButton();
+    profile_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
+    profile_status_button->setCheckable(true);
+    profile_status_button->setChecked(true);
+    profile_status_button->setFocusPolicy(Qt::NoFocus);
+    const auto username = GetAccountUsername();
+    profile_status_button->setText(username);
+    connect(profile_status_button, &QPushButton::clicked, [=] {
+        profile_status_button->setChecked(true);
+
+        if (emulation_running) {
+            return;
+        }
+
+        // User save data
+        const auto select_profile = [this] {
+            QtProfileSelectionDialog dialog(this);
+            dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+                                  Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
+            dialog.setWindowModality(Qt::WindowModal);
+
+            if (dialog.exec() == QDialog::Rejected) {
+                return -1;
+            }
+
+            return dialog.GetIndex();
+        };
+
+        const auto index = select_profile();
+        if (index == -1) {
+            return;
+        }
+
+        Settings::values.current_user = index;
+        Settings::Apply();
+
+        const auto username = GetAccountUsername();
+        profile_status_button->setText(username);
+    });
+    statusBar()->insertPermanentWidget(0, profile_status_button);
+
    // Setup Dock button
    dock_status_button = new QPushButton();
    dock_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
@@ -1902,6 +1966,8 @@ void GMainWindow::OnConfigure() {
        ui.centralwidget->setMouseTracking(false);
    }

+    const auto username = GetAccountUsername();
+    profile_status_button->setText(username);
    dock_status_button->setChecked(Settings::values.use_docked_mode);
    async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
 #ifdef HAS_VULKAN
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -233,6 +233,7 @@ private:
    QLabel* emu_speed_label = nullptr;
    QLabel* game_fps_label = nullptr;
    QLabel* emu_frametime_label = nullptr;
+    QPushButton* profile_status_button = nullptr;
    QPushButton* async_status_button = nullptr;
    QPushButton* renderer_status_button = nullptr;
    QPushButton* dock_status_button = nullptr;
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -397,8 +397,6 @@ void Config::ReadValues() {
        sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
    Settings::values.use_vsync =
        static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
-    Settings::values.use_assembly_shaders =
-        sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
    Settings::values.use_fast_gpu_time =
        sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);

--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -134,10 +134,6 @@ max_anisotropy =
 # 0 (default): Off, 1: On
 use_vsync =

-# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
-# 0 (default): Off, 1: On
-use_assembly_shaders =
-
 # Turns on the frame limiter, which will limit frames output to the target game speed
 # 0: Off, 1: On (default)
 use_frame_limit =
Author	SHA1	Message	Date
GodKratos	60b184377e	add UUID validation check Co-authored-by: VolcaEM <63682805+VolcaEM@users.noreply.github.com>	2020-05-24 01:13:49 +12:00
Godkratos	7341257fc4	Validate uuid and fix returns	2020-05-18 11:10:12 +12:00
Godkratos	d011f89f15	Validate username before returning	2020-05-18 10:37:25 +12:00
Godkratos	de1ef273b3	Clang formatting	2020-05-18 01:01:40 +12:00
Godkratos	c2522f3e43	Move new method into anonymous namespace	2020-05-18 00:55:32 +12:00
Godkratos	d8b83aa8f5	Add button to show and update current user profile on status bar	2020-05-18 00:08:41 +12:00