Revert "Revert "Yield types""

Merge pull request #32 from ReaperOfSouls1909/revert-30-yield-types
Revert "Yield types"
2018-11-25 01:03:54 -04:00 · 2018-11-25 01:03:52 -04:00 · 2018-11-25 01:03:44 -04:00 · 2018-11-25 01:03:28 -04:00 · 2018-11-25 01:01:31 -04:00 · 2018-11-25 00:58:39 -04:00
26 changed files with 718 additions and 99 deletions
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -28,18 +28,14 @@ __declspec(noinline, noreturn)
 }

 #define ASSERT(_a_)                                                                                \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); });                \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed!");                                                  \
+    }

 #define ASSERT_MSG(_a_, ...)                                                                       \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed! " __VA_ARGS__);                                    \
+    }

 #define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
 #define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -49,6 +49,22 @@ struct ThreadQueueList {
        return T();
    }

+    template <typename UnaryPredicate>
+    T get_first_filter(UnaryPredicate filter) const {
+        const Queue* cur = first;
+        while (cur != nullptr) {
+            if (!cur->data.empty()) {
+                for (const auto& item : cur->data) {
+                    if (filter(item))
+                        return item;
+                }
+            }
+            cur = cur->next_nonempty;
+        }
+
+        return T();
+    }
+
    T pop_first() {
        Queue* cur = first;
        while (cur != nullptr) {
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
@@ -169,6 +170,16 @@ void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
    ready_queue.remove(priority, thread);
 }

+void Scheduler::MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
+    // Thread is not in queue
+    ASSERT(ready_queue.contains(thread) != -1);
+
+    ready_queue.remove(priority, thread);
+    ready_queue.push_back(priority, thread);
+}
+
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

@@ -179,4 +190,64 @@ void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
        ready_queue.prepare(priority);
 }

+Thread* Scheduler::GetNextSuggestedThread(u32 core) const {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
+    const u32 mask = 1U << core;
+    return ready_queue.get_first_filter(
+        [mask](Thread const* thread) { return (thread->GetAffinityMask() & mask) != 0; });
+}
+
+void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
+    ASSERT(thread != nullptr);
+    // Avoid yielding if the thread isn't even running.
+    ASSERT(thread->GetStatus() == ThreadStatus::Running);
+
+    // Sanity check that the priority is valid
+    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
+
+    // Yield this thread
+    MoveThreadToBackOfPriorityQueue(thread, thread->GetPriority());
+    Reschedule();
+}
+
+void Scheduler::YieldWithLoadBalancing(Thread* thread) {
+    ASSERT(thread != nullptr);
+    const auto priority = thread->GetPriority();
+    const auto core = static_cast<u32>(thread->GetProcessorID());
+
+    // Avoid yielding if the thread isn't even running.
+    ASSERT(thread->GetStatus() == ThreadStatus::Running);
+
+    // Sanity check that the priority is valid
+    ASSERT(priority < THREADPRIO_COUNT);
+
+    // Reschedule thread to end of queue.
+    MoveThreadToBackOfPriorityQueue(thread, priority);
+
+    Thread* suggested_thread = nullptr;
+
+    // Search through all of the cpu cores (except this one) for a suggested thread.
+    // Take the first non-nullptr one
+    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
+        if (cur_core == core)
+            continue;
+
+        const auto res =
+            Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core);
+        if (res != nullptr) {
+            suggested_thread = res;
+            break;
+        }
+    }
+
+    // If a suggested thread was found, queue that for this core
+    if (suggested_thread != nullptr)
+        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
+}
+
+void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
+    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -48,9 +48,81 @@ public:
    /// Unschedules a thread that was already scheduled
    void UnscheduleThread(Thread* thread, u32 priority);

+    /// Moves a thread to the back of the current priority queue
+    void MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority);
+
    /// Sets the priority of a thread in the scheduler
    void SetThreadPriority(Thread* thread, u32 priority);

+    /// Gets the next suggested thread for load balancing
+    Thread* GetNextSuggestedThread(u32 core) const;
+
+    /**
+     * YieldWithoutLoadBalancing -- analogous to normal yield on a system
+     * Moves the thread to the end of the ready queue for its priority, and then reschedules the
+     * system to the new head of the queue.
+     *
+     * Example (Single Core -- but can be extrapolated to multi):
+     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
+     * Currently Running: ThreadR
+     *
+     * ThreadR calls YieldWithoutLoadBalancing
+     *
+     * ThreadR is moved to the end of ready_queue[prio=0]:
+     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
+     * Currently Running: Nothing
+     *
+     * System is rescheduled (ThreadA is popped off of queue):
+     * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
+     * Currently Running: ThreadA
+     *
+     * If the queue is empty at time of call, no yielding occurs. This does not cross between cores
+     * or priorities at all.
+     */
+    void YieldWithoutLoadBalancing(Thread* thread);
+
+    /**
+     * YieldWithLoadBalancing -- yield but with better selection of the new running thread
+     * Moves the current thread to the end of the ready queue for its priority, then selects a
+     * 'suggested thread' (a thread on a different core that could run on this core) from the
+     * scheduler, changes its core, and reschedules the current core to that thread.
+     *
+     * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
+     * single core):
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
+     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
+     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
+     *
+     * ThreadQ calls YieldWithLoadBalancing
+     *
+     * ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB
+     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
+     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
+     *
+     * A list of suggested threads for each core is compiled
+     * Suggested Threads: {ThreadC on Core 1}
+     * If this were quad core (as the switch is), there could be between 0 and 3 threads in this
+     * list. If there are more than one, the thread is selected by highest prio.
+     *
+     * ThreadC is core changed to Core 0:
+     * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
+     * ready_queue[core=1][prio=0]: ThreadD
+     * Currently Running: None on Core 0 || ThreadP on Core 1
+     *
+     * System is rescheduled (ThreadC is popped off of queue):
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
+     * ready_queue[core=1][prio=0]: ThreadD
+     * Currently Running: ThreadC on Core 0 || ThreadP on Core 1
+     *
+     * If no suggested threads can be found this will behave just as normal yield. If there are
+     * multiple candidates for the suggested thread on a core, the highest prio is taken.
+     */
+    void YieldWithLoadBalancing(Thread* thread);
+
+    /// Currently unknown -- asserts as unimplemented on call
+    void YieldAndWaitForLoadBalancing(Thread* thread);
+
    /// Returns a list of all threads managed by the scheduler
    const std::vector<SharedPtr<Thread>>& GetThreadList() const {
        return thread_list;
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -736,6 +736,13 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {

    const auto* const current_process = Core::CurrentProcess();

+    // Note: The kernel uses the current process's resource limit instead of
+    // the one from the thread owner's resource limit.
+    const ResourceLimit& resource_limit = current_process->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
+        return ERR_INVALID_THREAD_PRIORITY;
+    }
+
    SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
@@ -789,7 +796,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
        return ERR_INVALID_MEMORY_RANGE;
    }

-    return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare);
+    return shared_memory->Map(current_process, addr, permissions_type, MemoryPermission::DontCare);
 }

 static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
@@ -819,7 +826,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
        return ERR_INVALID_MEMORY_RANGE;
    }

-    return shared_memory->Unmap(*current_process, addr);
+    return shared_memory->Unmap(current_process, addr);
 }

 /// Query process memory
@@ -878,6 +885,10 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
    }

    auto* const current_process = Core::CurrentProcess();
+    const ResourceLimit& resource_limit = current_process->GetResourceLimit();
+    if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
+        return ERR_INVALID_THREAD_PRIORITY;
+    }

    if (processor_id == THREADPROCESSORID_DEFAULT) {
        // Set the target CPU to the one specified in the process' exheader.
@@ -951,16 +962,46 @@ static void SleepThread(s64 nanoseconds) {

    // Don't attempt to yield execution if there are no available threads to run,
    // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
+    if (!Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
        return;

+    enum class SleepType : s64 {
+        YieldWithoutLoadBalancing = 0,
+        YieldWithLoadBalancing = 1,
+        YieldAndWaitForLoadBalancing = 2,
+    };
+
+    if (nanoseconds <= 0) {
+        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
+        switch (static_cast<SleepType>(nanoseconds)) {
+        case SleepType::YieldWithoutLoadBalancing:
+            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            break;
+        case SleepType::YieldWithLoadBalancing:
+            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            break;
+        case SleepType::YieldAndWaitForLoadBalancing:
+            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            break;
+        default:
+            UNREACHABLE_MSG(
+                "Unimplemented sleep yield type '{:016X}'! Falling back to forced reschedule...",
+                nanoseconds);
+        }
+
+        nanoseconds = 0;
+    }
+
    // Sleep current thread and check for next thread to schedule
    WaitCurrentThread_Sleep();

    // Create an event to wake the thread up after the specified nanosecond delay has passed
    GetCurrentThread()->WakeAfterDelay(nanoseconds);

-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(0).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(1).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(2).PrepareReschedule();
+    Core::System::GetInstance().CpuCore(3).PrepareReschedule();
 }

 /// Wait process wide key atomic
@@ -1183,39 +1224,9 @@ static ResultCode ResetSignal(Handle handle) {

 /// Creates a TransferMemory object
 static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) {
-    LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
-              permissions);
-
-    if (!Common::Is4KBAligned(addr)) {
-        LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    if (!Common::Is4KBAligned(size) || size == 0) {
-        LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
-        return ERR_INVALID_ADDRESS;
-    }
-
-    if (!IsValidAddressRange(addr, size)) {
-        LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
-                  addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    const auto perms = static_cast<MemoryPermission>(permissions);
-    if (perms != MemoryPermission::None && perms != MemoryPermission::Read &&
-        perms != MemoryPermission::ReadWrite) {
-        LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
-                  permissions);
-        return ERR_INVALID_MEMORY_PERMISSIONS;
-    }
-
-    auto& kernel = Core::System::GetInstance().Kernel();
-    auto& handle_table = Core::CurrentProcess()->GetHandleTable();
-    const auto shared_mem_handle = SharedMemory::Create(
-        kernel, handle_table.Get<Process>(CurrentProcess), size, perms, perms, addr);
-
-    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
+    LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
+                permissions);
+    *handle = 0;
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -26,6 +26,7 @@ enum ThreadPriority : u32 {
    THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
    THREADPRIO_DEFAULT = 44,      ///< Default thread priority for userland apps
    THREADPRIO_LOWEST = 63,       ///< Lowest thread priority
+    THREADPRIO_COUNT = 64,        ///< Total number of possible thread priorities.
 };

 enum ThreadProcessorId : s32 {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -28,6 +28,8 @@ add_library(video_core STATIC
    renderer_base.h
    renderer_opengl/gl_buffer_cache.cpp
    renderer_opengl/gl_buffer_cache.h
+    renderer_opengl/gl_global_cache.cpp
+    renderer_opengl/gl_global_cache.h
    renderer_opengl/gl_primitive_assembler.cpp
    renderer_opengl/gl_primitive_assembler.h
    renderer_opengl/gl_rasterizer.cpp
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -68,13 +68,13 @@ void Fermi2D::HandleSurfaceCopy() {
            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
                                      src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
                                      dst_buffer, true, regs.src.BlockHeight(),
-                                      regs.src.BlockDepth());
+                                      regs.src.BlockDepth(), 0);
        } else {
            // If the input is linear and the output is tiled, swizzle the input and copy it over.
            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
                                      src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
                                      src_buffer, false, regs.dst.BlockHeight(),
-                                      regs.dst.BlockDepth());
+                                      regs.dst.BlockDepth(), 0);
        }
    }
 }
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -319,6 +319,11 @@ void Maxwell3D::DrawArrays() {
    }
 }

+bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
+               const Maxwell3D::GlobalMemoryDescriptor& rhs) {
+    return std::tie(lhs.cbuf_index, lhs.cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
+}
+
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
    auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <set>
 #include <unordered_map>
 #include <vector>
 #include "common/assert.h"
@@ -31,6 +32,12 @@ public:
    explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
    ~Maxwell3D() = default;

+    /// Structure representing a global memory region
+    struct GlobalMemoryDescriptor {
+        u64 cbuf_index;
+        u64 cbuf_offset;
+    };
+
    /// Register structure of the Maxwell3D engine.
    /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
    struct Regs {
@@ -879,7 +886,15 @@ public:

                Cull cull;

-                INSERT_PADDING_WORDS(0x28);
+                INSERT_PADDING_WORDS(0x6);
+
+                union {
+                    BitField<0, 1, u32> depth_range_0_1;
+                    BitField<3, 1, u32> depth_clamp_near;
+                    BitField<4, 1, u32> depth_clamp_far;
+                } view_volume_clip_control;
+
+                INSERT_PADDING_WORDS(0x21);

                struct {
                    u32 enable;
@@ -1037,6 +1052,8 @@ public:

        std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
        u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
+
+        std::set<GlobalMemoryDescriptor> global_memory_uniforms;
    };

    State state{};
@@ -1069,6 +1086,9 @@ public:
        return macro_memory;
    }

+    std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
+    std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
+
 private:
    void InitializeRegisterDefaults();

@@ -1123,6 +1143,9 @@ private:
    void DrawArrays();
 };

+bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
+               const Maxwell3D::GlobalMemoryDescriptor& rhs);
+
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4,                           \
                  "Field " #field_name " has invalid position")
@@ -1188,6 +1211,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
 ASSERT_REG_POSITION(cull, 0x646);
+ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
 ASSERT_REG_POSITION(color_mask, 0x680);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu Emulator Project
+// Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -206,6 +206,8 @@ enum class UniformType : u64 {
    SignedShort = 3,
    Single = 4,
    Double = 5,
+    Quad = 6,
+    UnsignedQuad = 7,
 };

 enum class StoreType : u64 {
@@ -772,6 +774,12 @@ union Instruction {
        BitField<44, 2, u64> unknown;
    } st_l;

+    union {
+        BitField<48, 3, UniformType> type;
+        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> offset_immediate;
+    } ld_g;
+
    union {
        BitField<0, 3, u64> pred0;
        BitField<3, 3, u64> pred3;
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -0,0 +1,97 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_global_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/utils.h"
+#include "video_core/utils.h"
+
+namespace OpenGL {
+
+CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+    buffer.Create();
+    LabelGLObject(GL_BUFFER, buffer.handle, addr);
+}
+
+/// Helper function to get the maximum size we can use for an OpenGL uniform block
+static u32 GetMaxUniformBlockSize() {
+    GLint max_size{};
+    glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_size);
+    return static_cast<u32>(max_size);
+}
+
+void CachedGlobalRegion::Reload(u32 size_) {
+    static const u32 max_size{GetMaxUniformBlockSize()};
+
+    size = size_;
+    if (size > max_size) {
+        size = max_size;
+        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded max UBO size of {}!", size_, max_size);
+    }
+
+    glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
+    glBufferData(GL_UNIFORM_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+    auto search{reserve.find(addr)};
+    if (search == reserve.end()) {
+        return {};
+    }
+    return search->second;
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
+    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+    if (!region) {
+        // No reserved surface available, create a new one and reserve it
+        region = std::make_shared<CachedGlobalRegion>(addr, size);
+        ReserveGlobalRegion(region);
+    }
+    region->Reload(size);
+    return region;
+}
+
+void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
+    reserve[region->GetAddr()] = region;
+}
+
+GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
+    : RasterizerCache{rasterizer} {}
+
+GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
+    const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& global_region,
+    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
+    auto& gpu{Core::System::GetInstance().GPU()};
+    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
+    const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
+        cbufs.const_buffers[global_region.cbuf_index].address + global_region.cbuf_offset)};
+
+    ASSERT(cbuf_addr);
+
+    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
+    const auto size = Memory::Read32(*cbuf_addr + 8);
+    const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
+
+    ASSERT(actual_addr);
+
+    // Look up global region in the cache based on address
+    GlobalRegion region{TryGet(*actual_addr)};
+
+    if (!region) {
+        // No global region found - create a new one
+        region = GetUncachedGlobalRegion(*actual_addr, size);
+        Register(region);
+    }
+
+    return region;
+}
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -0,0 +1,89 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <fmt/format.h>
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+class RasterizerOpenGL;
+class CachedGlobalRegion;
+using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
+
+/// Helper class for caching global region uniform locations
+class CachedGlobalRegionUniform {
+public:
+    explicit CachedGlobalRegionUniform(std::size_t index) : index{index} {}
+
+    std::string GetName() const {
+        return fmt::format("global_memory_region_declblock_{}", index);
+    }
+
+    u32 GetHash() const {
+        // This needs to be unique from ConstBufferEntry::GetHash and SamplerEntry::GetHash
+        return (static_cast<u32>(index) << 16) | 0xFFFF;
+    }
+
+private:
+    std::size_t index{};
+};
+
+class CachedGlobalRegion final : public RasterizerCacheObject {
+public:
+    CachedGlobalRegion(VAddr addr, u32 size);
+
+    /// Gets the address of the shader in guest memory, required for cache management
+    VAddr GetAddr() const {
+        return addr;
+    }
+
+    /// Gets the size of the shader in guest memory, required for cache management
+    std::size_t GetSizeInBytes() const {
+        return size;
+    }
+
+    /// Gets the GL program handle for the buffer
+    GLuint GetBufferHandle() const {
+        return buffer.handle;
+    }
+
+    /// Reloads the global region from guest memory
+    void Reload(u32 size_);
+
+    // We do not have to flush this cache as things in it are never modified by us.
+    void Flush() override {}
+
+private:
+    VAddr addr;
+    u32 size;
+
+    OGLBuffer buffer;
+};
+
+class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
+public:
+    explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
+
+    /// Gets the current specified shader stage program
+    GlobalRegion GetGlobalRegion(
+        const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& descriptor,
+        Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
+
+private:
+    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
+    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+    void ReserveGlobalRegion(const GlobalRegion& region);
+
+    std::unordered_map<VAddr, GlobalRegion> reserve;
+};
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -81,7 +81,7 @@ struct DrawParameters {

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE) {
+      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -113,10 +113,43 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
    glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);

    LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
+    CheckExtensions();
 }

 RasterizerOpenGL::~RasterizerOpenGL() {}

+void RasterizerOpenGL::CheckExtensions() {
+    if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
+        LOG_WARNING(
+            Render_OpenGL,
+            "Anisotropic filter is not supported! This can cause graphical issues in some games.");
+    }
+    if (!GLAD_GL_ARB_viewport_array) {
+        LOG_WARNING(Render_OpenGL, "Viewport arrays are not supported! This can potentially cause "
+                                   "issues in games that use geometry shaders.");
+    }
+    if (!GLAD_GL_ARB_color_buffer_float) {
+        LOG_WARNING(
+            Render_OpenGL,
+            "Color clamp control is not supported! This can cause graphical issues in some games.");
+    }
+    if (!GLAD_GL_ARB_buffer_storage) {
+        LOG_WARNING(
+            Render_OpenGL,
+            "Buffer storage control is not supported! This can cause performance degradation.");
+    }
+    if (!GLAD_GL_AMD_depth_clamp_separate) {
+        if (!GLAD_GL_ARB_depth_clamp) {
+            LOG_WARNING(
+                Render_OpenGL,
+                "Depth Clamp is not supported! This can cause graphical issues in some games.");
+        } else {
+            LOG_WARNING(Render_OpenGL, "Separate Depth Clamp is not supported! This can cause "
+                                       "graphical issues in some games.");
+        }
+    }
+}
+
 void RasterizerOpenGL::SetupVertexFormat() {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;
@@ -267,7 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {

    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
    // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
-    u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
+    u32 current_buffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
    u32 current_texture_bindpoint = 0;

    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,9 +354,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
        }

        // Configure the const buffers for this shader stage.
-        current_constbuffer_bindpoint =
+        current_buffer_bindpoint =
            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
-                              current_constbuffer_bindpoint);
+                              current_buffer_bindpoint);
+
+        // Configure global memory regions for this shader stage.
+        current_buffer_bindpoint =
+            SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
+                               current_buffer_bindpoint);

        // Configure the textures for this shader stage.
        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -695,6 +733,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.InvalidateRegion(addr, size);
    shader_cache.InvalidateRegion(addr, size);
+    global_cache.InvalidateRegion(addr, size);
    buffer_cache.InvalidateRegion(addr, size);
 }

@@ -919,6 +958,29 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
    return current_bindpoint + static_cast<u32>(entries.size());
 }

+u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
+                                         GLenum primitive_mode, u32 current_bindpoint) {
+    std::size_t global_region_index{};
+    const auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
+    for (const auto& global_region : maxwell3d.state.global_memory_uniforms) {
+        const auto& region{
+            global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage))};
+        const GLenum b_index{
+            shader->GetProgramResourceIndex(CachedGlobalRegionUniform{global_region_index})};
+
+        if (b_index != GL_INVALID_INDEX) {
+            glBindBufferBase(GL_UNIFORM_BUFFER, current_bindpoint, region->GetBufferHandle());
+            glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), b_index,
+                                  current_bindpoint);
+            ++current_bindpoint;
+        }
+
+        ++global_region_index;
+    }
+
+    return current_bindpoint;
+}
+
 u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
                                    GLenum primitive_mode, u32 current_unit) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -979,6 +1041,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        viewport.depth_range_far = regs.viewports[i].depth_range_far;
        viewport.depth_range_near = regs.viewports[i].depth_range_near;
    }
+    state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
+    state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
 }

 void RasterizerOpenGL::SyncClipEnabled() {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -118,7 +119,7 @@ private:
                               bool using_depth_fb = true, bool preserve_contents = true,
                               std::optional<std::size_t> single_color_target = {});

-    /*
+    /**
     * Configures the current constbuffers to use for the draw command.
     * @param stage The shader stage to configure buffers for.
     * @param shader The shader object that contains the specified stage.
@@ -128,7 +129,17 @@ private:
    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
                          GLenum primitive_mode, u32 current_bindpoint);

-    /*
+    /**
+     * Configures the current global memory regions to use for the draw command.
+     * @param stage The shader stage to configure buffers for.
+     * @param shader The shader object that contains the specified stage.
+     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
+                           GLenum primitive_mode, u32 current_bindpoint);
+
+    /**
     * Configures the current textures to use for the draw command.
     * @param stage The shader stage to configure textures for.
     * @param shader The shader object that contains the specified stage.
@@ -186,6 +197,10 @@ private:
    /// Check asserts for alpha testing.
    void CheckAlphaTests();

+    /// Check for extension that are not strictly required
+    /// but are needed for correct emulation
+    void CheckExtensions();
+
    bool has_ARB_direct_state_access = false;
    bool has_ARB_multi_bind = false;

@@ -193,6 +208,7 @@ private:

    RasterizerCacheOpenGL res_cache;
    ShaderCacheOpenGL shader_cache;
+    GlobalRegionCacheOpenGL global_cache;

    Core::Frontend::EmuWindow& emu_window;

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -95,6 +95,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
+    params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
    params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
                                                       params.srgb_conversion);
@@ -160,6 +161,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.block_width = 1 << config.memory_layout.block_width;
    params.block_height = 1 << config.memory_layout.block_height;
    params.block_depth = 1 << config.memory_layout.block_depth;
+    params.tile_width_spacing = 1;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@@ -195,6 +197,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.block_width = 1 << std::min(block_width, 5U);
    params.block_height = 1 << std::min(block_height, 5U);
    params.block_depth = 1 << std::min(block_depth, 5U);
+    params.tile_width_spacing = 1;
    params.pixel_format = PixelFormatFromDepthFormat(format);
    params.component_type = ComponentTypeFromDepthFormat(format);
    params.type = GetFormatType(params.pixel_format);
@@ -221,6 +224,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
+    params.tile_width_spacing = 1;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@@ -371,8 +375,8 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
 }

 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
-                std::size_t gl_buffer_size, VAddr addr) {
+void MortonCopy(u32 stride, u32 width_spacing, u32 block_height, u32 height, u32 block_depth,
+                u32 depth, u8* gl_buffer, std::size_t gl_buffer_size, VAddr addr) {
    constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);

    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -382,17 +386,19 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d

    if (morton_to_gl) {
        Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
-                                         stride, height, depth, block_height, block_depth);
+                                         stride, height, depth, block_height, block_depth,
+                                         width_spacing);
    } else {
-        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
-                                         (height + tile_size_y - 1) / tile_size_y, depth,
-                                         bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
-                                         gl_buffer, false, block_height, block_depth);
+        Tegra::Texture::CopySwizzledData(
+            (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
+            depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), gl_buffer, false,
+            block_height, block_depth, width_spacing);
    }
 }

-using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
-                                     VideoCore::Surface::MaxPixelFormat>;
+using GLConversionArray =
+    std::array<void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
+               VideoCore::Surface::MaxPixelFormat>;

 static constexpr GLConversionArray morton_to_gl_fns = {
    // clang-format off
@@ -551,16 +557,17 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
        const u64 gl_size = params.LayerSizeGL(mip_level);
        for (u32 i = 0; i < params.depth; i++) {
            functions[static_cast<std::size_t>(params.pixel_format)](
-                params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
-                params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1,
-                gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+                params.MipWidth(mip_level), params.tile_width_spacing,
+                params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
+                params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
+                params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
        }
    } else {
        const u64 offset = params.GetMipmapLevelOffset(mip_level);
        functions[static_cast<std::size_t>(params.pixel_format)](
-            params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+            params.MipWidth(mip_level), params.tile_width_spacing, params.MipBlockHeight(mip_level),
            params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
            gl_buffer.size(), params.addr + offset);
    }
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -208,6 +208,7 @@ struct SurfaceParams {
    u32 block_width;
    u32 block_height;
    u32 block_depth;
+    u32 tile_width_spacing;
    PixelFormat pixel_format;
    ComponentType component_type;
    SurfaceType type;
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -98,18 +98,6 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
    }
 }

-GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
-    const auto search{resource_cache.find(buffer.GetHash())};
-    if (search == resource_cache.end()) {
-        const GLuint index{
-            glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
-        resource_cache[buffer.GetHash()] = index;
-        return index;
-    }
-
-    return search->second;
-}
-
 GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
    const auto search{uniform_cache.find(sampler.GetHash())};
    if (search == uniform_cache.end()) {
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -71,7 +71,18 @@ public:
    }

    /// Gets the GL program resource location for the specified resource, caching as needed
-    GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
+    template <typename T>
+    GLuint GetProgramResourceIndex(const T& buffer) {
+        const auto& search{resource_cache.find(buffer.GetHash())};
+        if (search == resource_cache.end()) {
+            const GLuint index{glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK,
+                                                         buffer.GetName().c_str())};
+            resource_cache[buffer.GetHash()] = index;
+            return index;
+        }
+
+        return search->second;
+    }

    /// Gets the GL uniform location for the specified resource, caching as needed
    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -13,6 +13,7 @@

 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/core.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -570,6 +571,7 @@ public:
        GenerateInputAttrs();
        GenerateOutputAttrs();
        GenerateConstBuffers();
+        GenerateGlobalRegions();
        GenerateSamplers();
        GenerateGeometry();
    }
@@ -691,6 +693,21 @@ private:
        declarations.AddNewLine();
    }

+    /// Generates declarations for global memory regions.
+    void GenerateGlobalRegions() {
+        const auto& regions{
+            Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms};
+        for (std::size_t i = 0; i < regions.size(); ++i) {
+            declarations.AddLine("layout(std140) uniform " +
+                                 fmt::format("global_memory_region_declblock_{}", i));
+            declarations.AddLine('{');
+            declarations.AddLine("    vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
+            declarations.AddLine("};");
+            declarations.AddNewLine();
+        }
+        declarations.AddNewLine();
+    }
+
    /// Generates declarations for samplers.
    void GenerateSamplers() {
        const auto& samplers = GetSamplers();
@@ -1778,6 +1795,11 @@ private:
                } else {
                    op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
                                            GLSLRegister::Type::Integer);
+                    if (opcode->get().GetId() == OpCode::Id::IADD_C) {
+                        s_last_iadd = last_iadd;
+                        last_iadd = IADDReference{instr.gpr8.Value(), instr.cbuf34.index,
+                                                  instr.cbuf34.offset};
+                    }
                }
            }

@@ -3008,6 +3030,72 @@ private:
                shader.AddLine('}');
                break;
            }
+            case OpCode::Id::LDG: {
+                // Determine number of GPRs to fill with data
+                u64 count = 1;
+
+                switch (instr.ld_g.type) {
+                case Tegra::Shader::UniformType::Single:
+                    count = 1;
+                    break;
+                case Tegra::Shader::UniformType::Double:
+                    count = 2;
+                    break;
+                case Tegra::Shader::UniformType::Quad:
+                case Tegra::Shader::UniformType::UnsignedQuad:
+                    count = 4;
+                    break;
+                default:
+                    UNREACHABLE_MSG("Unimplemented LDG size!");
+                }
+
+                auto [gpr_index, index, offset] = last_iadd;
+
+                // The last IADD might be the upper u32 of address, so instead take the one before
+                // that.
+                if (gpr_index == Register::ZeroIndex) {
+                    gpr_index = s_last_iadd.out;
+                    index = s_last_iadd.cbuf_index;
+                    offset = s_last_iadd.cbuf_offset;
+                }
+
+                const auto gpr = regs.GetRegisterAsInteger(gpr_index);
+                const auto constbuffer =
+                    regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
+
+                Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms.insert(
+                    {index, offset * 4});
+                const auto memory = fmt::format("global_memory_region_{}",
+                                                Core::System::GetInstance()
+                                                        .GPU()
+                                                        .Maxwell3D()
+                                                        .state.global_memory_uniforms.size() -
+                                                    1);
+
+                const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
+                const auto o_register = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
+                const auto address = "( " + immediate + " + " + o_register + " )";
+                const auto base_sub = address + " - " + constbuffer;
+
+                // New scope to prevent potential conflicts
+                shader.AddLine('{');
+                ++shader.scope;
+
+                shader.AddLine("uint final_offset = " + base_sub + ";");
+                for (std::size_t out = 0; out < count; ++out) {
+                    const u64 reg_id = instr.gpr0.Value() + out;
+                    const auto this_memory =
+                        fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
+                                    memory, out * 4, out * 4);
+
+                    regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
+                }
+
+                --shader.scope;
+                shader.AddLine('}');
+
+                break;
+            }
            default: {
                UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
            }
@@ -3777,9 +3865,18 @@ private:
    ShaderWriter declarations;
    GLSLRegisterManager regs{shader, declarations, stage, suffix, header};

+    struct IADDReference {
+        Register out;
+        u64 cbuf_index;
+        u64 cbuf_offset;
+    };
+
+    IADDReference last_iadd{};
+    IADDReference s_last_iadd{};
+
    // Declarations
    std::set<std::string> declr_predicates;
-}; // namespace OpenGL::GLShader::Decompiler
+};

 std::string GetCommonDeclarations() {
    return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -57,7 +57,8 @@ public:
    }

    u32 GetHash() const {
-        return (static_cast<u32>(stage) << 16) | index;
+        // This needs to be unique from CachedGlobalRegionUniform::GetHash
+        return (static_cast<u32>(stage) << 12) | index;
    }

 private:
@@ -138,7 +139,8 @@ public:
    }

    u32 GetHash() const {
-        return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
+        // This needs to be unique from CachedGlobalRegionUniform::GetHash
+        return (static_cast<u32>(stage) << 12) | static_cast<u32>(sampler_index);
    }

    static std::string GetArrayName(Maxwell::ShaderStage stage) {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -92,6 +92,8 @@ OpenGLState::OpenGLState() {

    point.size = 1;
    fragment_color_clamp.enabled = false;
+    depth_clamp.far_plane = false;
+    depth_clamp.near_plane = false;
 }

 void OpenGLState::ApplyDefaultState() {
@@ -469,6 +471,32 @@ void OpenGLState::ApplyVertexBufferState() const {
    }
 }

+void OpenGLState::ApplyDepthClamp() const {
+    if (depth_clamp.far_plane != cur_state.depth_clamp.far_plane ||
+        depth_clamp.near_plane != cur_state.depth_clamp.near_plane) {
+        if (GLAD_GL_AMD_depth_clamp_separate) {
+            if (depth_clamp.far_plane) {
+                glEnable(GL_DEPTH_CLAMP_FAR_AMD);
+            } else {
+                glDisable(GL_DEPTH_CLAMP_FAR_AMD);
+            }
+            if (depth_clamp.near_plane) {
+                glEnable(GL_DEPTH_CLAMP_NEAR_AMD);
+            } else {
+                glDisable(GL_DEPTH_CLAMP_NEAR_AMD);
+            }
+        } else {
+            if (depth_clamp.far_plane || depth_clamp.near_plane) {
+                glEnable(GL_DEPTH_CLAMP);
+            } else {
+                glDisable(GL_DEPTH_CLAMP);
+            }
+            UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                                 "Unimplemented Depth Clamp Separation!");
+        }
+    }
+}
+
 void OpenGLState::Apply() const {
    ApplyFramebufferState();
    ApplyVertexBufferState();
@@ -520,7 +548,7 @@ void OpenGLState::Apply() const {
            glDisable(GL_SAMPLE_ALPHA_TO_ONE);
        }
    }
-
+    ApplyDepthClamp();
    ApplyColorMask();
    ApplyViewport();
    ApplyStencilTest();
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -48,6 +48,11 @@ public:
        bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB
    } fragment_color_clamp;

+    struct {
+        bool far_plane;
+        bool near_plane;
+    } depth_clamp; // GL_DEPTH_CLAMP
+
    struct {
        bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
    } geometry_shaders;
@@ -226,6 +231,7 @@ private:
    void ApplyLogicOp() const;
    void ApplyTextures() const;
    void ApplySamplers() const;
+    void ApplyDepthClamp() const;
 };

 } // namespace OpenGL
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -127,7 +127,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
 template <bool fast>
 void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
                  const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
-                  const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
+                  const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
+                  const u32 width_spacing) {
    auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
    const u32 stride_x = width * out_bytes_per_pixel;
    const u32 layer_z = height * stride_x;
@@ -137,7 +138,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
    const u32 block_x_elements = gob_elements_x;
    const u32 block_y_elements = gob_elements_y * block_height;
    const u32 block_z_elements = gob_elements_z * block_depth;
-    const u32 blocks_on_x = div_ceil(width, block_x_elements);
+    const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
+    const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
    const u32 blocks_on_y = div_ceil(height, block_y_elements);
    const u32 blocks_on_z = div_ceil(depth, block_z_elements);
    const u32 xy_block_size = gob_size * block_height;
@@ -169,13 +171,15 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool

 void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                      u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
-                      bool unswizzle, u32 block_height, u32 block_depth) {
+                      bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
    if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
        SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
-                           bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
+                           bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
+                           width_spacing);
    } else {
        SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
-                            bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
+                            bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
+                            width_spacing);
    }
 }

@@ -228,19 +232,19 @@ u32 BytesPerPixel(TextureFormat format) {

 void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
-                      u32 block_depth) {
+                      u32 block_depth, u32 width_spacing) {
    CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
                     (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
                     bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
-                     block_height, block_depth);
+                     block_height, block_depth, width_spacing);
 }

 std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
-                                 u32 block_height, u32 block_depth) {
+                                 u32 block_height, u32 block_depth, u32 width_spacing) {
    std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
    UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
-                     width, height, depth, block_height, block_depth);
+                     width, height, depth, block_height, block_depth, width_spacing);
    return unswizzled_data;
 }

--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -22,19 +22,20 @@ inline std::size_t GetGOBSize() {
 void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                      u32 block_height = TICEntry::DefaultBlockHeight,
-                      u32 block_depth = TICEntry::DefaultBlockHeight);
+                      u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
 /**
 * Unswizzles a swizzled texture without changing its format.
 */
 std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                                 u32 block_height = TICEntry::DefaultBlockHeight,
-                                 u32 block_depth = TICEntry::DefaultBlockHeight);
+                                 u32 block_depth = TICEntry::DefaultBlockHeight,
+                                 u32 width_spacing = 0);

 /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
 void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                      u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
-                      bool unswizzle, u32 block_height, u32 block_depth);
+                      bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);

 /**
 * Decodes an unswizzled texture into a A8R8G8B8 texture.
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -166,6 +166,8 @@ struct TICEntry {
        BitField<3, 3, u32> block_height;
        BitField<6, 3, u32> block_depth;

+        BitField<10, 3, u32> tile_width_spacing;
+
        // High 16 bits of the pitch value
        BitField<0, 16, u32> pitch_high;
        BitField<26, 1, u32> use_header_opt_control;
Author	SHA1	Message	Date
ReaperOfSouls	bfb35a8fee	Revert "Revert "Yield types""	2018-11-25 01:03:54 -04:00
ReaperOfSouls	362919eaca	Merge pull request #32 from ReaperOfSouls1909/revert-30-yield-types Revert "Yield types"	2018-11-25 01:03:52 -04:00
ReaperOfSouls	ddc3a76809	Revert "Yield types"	2018-11-25 01:03:44 -04:00
ReaperOfSouls	2531ee5809	Merge pull request #31 from ReaperOfSouls1909/revert-29-map-physical-memory Revert "Map physical memory"	2018-11-25 01:03:28 -04:00
ReaperOfSouls	ca998e9c0d	Revert "Map physical memory"	2018-11-25 01:01:31 -04:00
ReaperOfSouls	dc248dc35d	Merge pull request #30 from DarkLordZach/yield-types Yield types	2018-11-25 00:58:39 -04:00
ReaperOfSouls	f0ec00a9cd	Add files via upload	2018-11-25 00:58:29 -04:00
ReaperOfSouls	317e407e47	Delete svc.cpp	2018-11-25 00:55:35 -04:00
ReaperOfSouls	a6aa9b5db4	Merge pull request #29 from ogniK5377/map-physical-memory Map physical memory	2018-11-24 20:54:47 -04:00
ReaperOfSouls	63f11c1a83	Merge pull request #28 from bunnei/sleepthread svc: Improve SleepThread for yield types.	2018-11-24 20:46:01 -04:00
ReaperOfSouls	dcedf48591	Merge pull request #27 from yuzu-emu/master Merge master	2018-11-24 20:45:26 -04:00
ReaperOfSouls	9e2d8336ba	Merge pull request #26 from ReaperOfSouls1909/revert-1-dc Revert "Dc"	2018-11-24 20:45:03 -04:00
ReaperOfSouls	515027236f	Revert "Dc"	2018-11-24 20:44:53 -04:00
ReaperOfSouls	81133f35e0	Merge pull request #25 from ReaperOfSouls1909/revert-2-core-mgr Revert "core: Relocate CPU core management to its own class"	2018-11-24 20:44:06 -04:00
ReaperOfSouls	2fb716e6d4	Revert "core: Relocate CPU core management to its own class"	2018-11-24 20:43:56 -04:00
ReaperOfSouls	1fdd669c64	Merge pull request #24 from ReaperOfSouls1909/revert-7-shader_cache Revert "shader_cache: Only lock covered instructions."	2018-11-24 20:43:35 -04:00
ReaperOfSouls	4ef96a5ea1	Revert "shader_cache: Only lock covered instructions."	2018-11-24 20:43:26 -04:00
ReaperOfSouls	4f62c27d05	Merge pull request #23 from ReaperOfSouls1909/revert-11-bfi Revert "gl_shader_decompiler: Implement BFI_IMM_R"	2018-11-24 20:42:50 -04:00
ReaperOfSouls	0c1524936b	Revert "gl_shader_decompiler: Implement BFI_IMM_R"	2018-11-24 20:42:41 -04:00
ReaperOfSouls	156a41b8cc	Merge pull request #22 from ReaperOfSouls1909/revert-13-r2p Revert "gl_shader_decompiler: Implement R2P_IMM"	2018-11-24 20:42:12 -04:00
ReaperOfSouls	e1be6bb2a5	Revert "gl_shader_decompiler: Implement R2P_IMM"	2018-11-24 20:42:00 -04:00
ReaperOfSouls	dcd78037ac	Merge pull request #21 from ReaperOfSouls1909/revert-14-clip-distances Revert "gl_shader_decompiler: Implement clip distances"	2018-11-24 20:41:42 -04:00
ReaperOfSouls	63d5e92a92	Revert "gl_shader_decompiler: Implement clip distances"	2018-11-24 20:41:32 -04:00
ReaperOfSouls	30b45751ea	Merge pull request #20 from ReaperOfSouls1909/revert-10-pred-comp-11 Revert "Pred comp 11"	2018-11-24 20:40:49 -04:00
ReaperOfSouls	50237cb9ed	Revert "Pred comp 11"	2018-11-24 20:40:40 -04:00
ReaperOfSouls	2fde25ed4d	Merge pull request #19 from ReaperOfSouls1909/revert-12-getgputime Revert "nvhost_ctrl_gpu: Implement IoctlGetGpuTime."	2018-11-24 20:39:47 -04:00
ReaperOfSouls	2e4955632e	Revert "nvhost_ctrl_gpu: Implement IoctlGetGpuTime."	2018-11-24 20:39:38 -04:00
ReaperOfSouls	ee4d1cc92f	Merge pull request #18 from ReaperOfSouls1909/revert-4-fix-txq Revert "Properly Implemented TXQ Instruction"	2018-11-24 20:39:12 -04:00
ReaperOfSouls	0248933ef3	Revert "Properly Implemented TXQ Instruction"	2018-11-24 20:39:02 -04:00
ReaperOfSouls	46c2c936a1	Merge pull request #17 from ReaperOfSouls1909/revert-5-master Revert "Add support for clear_flags register"	2018-11-24 20:37:54 -04:00
ReaperOfSouls	276efa7bae	Revert "Add support for clear_flags register"	2018-11-24 20:37:44 -04:00
ReaperOfSouls	7a3f0b8154	Merge pull request #16 from Tinob/DepthClamp Implement depth clamp	2018-11-23 11:48:05 -04:00
ReaperOfSouls	cab0eba6d4	Merge pull request #15 from ReaperOfSouls1909/revert-8-master Revert "Polyfix"	2018-11-23 11:47:20 -04:00
ReaperOfSouls	34a5a4e4aa	Revert "Polyfix"	2018-11-23 11:47:12 -04:00
Rodolfo Bogado	35bb416308	Implement depth clamp	2018-11-23 12:11:21 -03:00
ReaperOfSouls	ec7345a1e8	Merge pull request #8 from marcosvitali/master Polyfix	2018-11-23 10:58:01 -04:00
ReaperOfSouls	f328d25933	Merge pull request #14 from ReinUsesLisp/clip-distances gl_shader_decompiler: Implement clip distances	2018-11-23 10:57:09 -04:00
ReaperOfSouls	dd9fc99901	Merge pull request #13 from ReinUsesLisp/r2p gl_shader_decompiler: Implement R2P_IMM	2018-11-23 10:56:09 -04:00
ReaperOfSouls	aa5e70b410	Merge pull request #12 from bunnei/getgputime nvhost_ctrl_gpu: Implement IoctlGetGpuTime.	2018-11-23 10:55:48 -04:00
ReaperOfSouls	930f76f31e	Merge pull request #11 from ReinUsesLisp/bfi gl_shader_decompiler: Implement BFI_IMM_R	2018-11-23 10:55:26 -04:00
ReaperOfSouls	cb00001882	Merge pull request #10 from Hexagon12/pred-comp-11 Pred comp 11	2018-11-23 10:54:49 -04:00
ReaperOfSouls	9b51d92d3a	Merge pull request #9 from FernandoS27/tex-spacing Implemented Tile Width Spacing	2018-11-23 10:53:52 -04:00
Marcos Vitali	cd94dc484a	Initialize polygon_offset in the constructor.	2018-11-23 10:38:06 -03:00
ReaperOfSouls	d557f4abea	Merge pull request #7 from degasus/shader_cache shader_cache: Only lock covered instructions.	2018-11-23 09:18:35 -04:00
ReaperOfSouls	f0b51bf713	Merge pull request #6 from bunnei/ldg Ldg	2018-11-23 09:14:13 -04:00
Marcos Vitali	ddeb7809f7	Clang Format fixes.	2018-11-23 10:10:48 -03:00
ReaperOfSouls	07009688c8	Merge pull request #5 from Tinob/master Add support for clear_flags register	2018-11-23 08:59:59 -04:00
ReaperOfSouls	c30f30da50	Merge pull request #4 from FernandoS27/fix-txq Properly Implemented TXQ Instruction	2018-11-23 08:58:42 -04:00
ReaperOfSouls	dffc128448	Merge pull request #3 from FernandoS27/ignore-assert-dev Implement better Ignore Assert	2018-11-23 08:57:51 -04:00
ReaperOfSouls	5cbade867f	Merge pull request #2 from lioncash/core-mgr core: Relocate CPU core management to its own class	2018-11-23 08:57:34 -04:00
ReaperOfSouls	cde484e32a	Merge pull request #1 from FernandoS27/dc Dc	2018-11-23 08:57:15 -04:00
FernandoS27	e012b3e1fe	Fix Texture Overlapping	2018-11-23 08:56:41 -04:00
Rodolfo Bogado	de8001cfcd	Add support for clear_flags register	2018-11-23 09:39:18 -03:00
Marcos Vitali	33ba10591e	GPU States: Implement Polygon Offset. This is used in SMO all the time.	2018-11-23 03:01:33 -03:00
ReinUsesLisp	b3853403b7	gl_shader_decompiler: Implement clip distances	2018-11-23 02:14:43 -03:00
FernandoS27	0c465ca9c9	Fix TEXS Instruction encodings	2018-11-22 22:51:25 -04:00
FernandoS27	0eeda68d19	Fix one encoding in TEX Instruction	2018-11-22 22:08:19 -04:00
FernandoS27	1f895d68d1	Corrected inputs indexing in TEX instruction	2018-11-22 22:08:19 -04:00
Zach Hilman	820d81b9a5	scheduler: Add explanations for YieldWith and WithoutLoadBalancing	2018-11-22 00:33:53 -05:00
ReinUsesLisp	642dfeda2a	gl_shader_decompiler: Implement BFI_IMM_R	2018-11-21 16:12:30 -03:00
ReinUsesLisp	d92afc7493	gl_shader_decompiler: Implement R2P_IMM	2018-11-21 04:56:00 -03:00
Hexagon12	76de2d0656	Clang fix	2018-11-20 18:57:44 +02:00
Hexagon12	9fbe79320b	oops	2018-11-19 21:36:49 +02:00
Hexagon12	ae6e074f4a	Added predicate comparison LessEqualWithNan	2018-11-19 21:27:51 +02:00
FernandoS27	fab4934f03	Implemented Tile Width Spacing	2018-11-19 14:09:42 -04:00
Zach Hilman	409dcf0e0a	svc: Implement yield types 0 and -1	2018-11-18 23:44:19 -05:00
David Marcec	e6d205ec25	Updated SetMemoryPermission to new api	2018-11-17 15:22:17 +11:00
David Marcec	9327033580	Merge branch 'master' of https://github.com/yuzu-emu/yuzu into map-physical-memory	2018-11-17 15:17:31 +11:00
David Marcec	e840aa610a	Added svc error checking	2018-11-16 00:21:23 +11:00
David Marcec	0d284a52bb	Final fixups	2018-11-16 00:08:23 +11:00
David Marcec	d68162a7c9	Initial implementation of svcMapPhysicalMemory	2018-11-15 14:57:28 +11:00
bunnei	eadf2c070a	gl_global_cache: Ensure buffer size does not exceed UBO maximum. - Fixes crash with Xenoblade Chronicles 2.	2018-11-12 19:15:09 -05:00
bunnei	0c3eb8e318	gl_global_cache: Optimize caching to eliminate unnecessary resource management.	2018-11-12 19:15:09 -05:00
bunnei	13edd9ee68	gl_rasterizer: Cache global region uniform locations and refactor.	2018-11-12 19:15:09 -05:00
bunnei	ba1c8d935c	gl_global_cache: Use const reference for GetGlobalRegion argument.	2018-11-12 19:15:09 -05:00
Zach Hilman	e84b02a351	gl_rasterizer: Add caching for global memory regions	2018-11-12 19:15:08 -05:00
Zach Hilman	2b18ce1248	Preliminary implementation of LDG Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.	2018-11-12 18:28:42 -05:00
bunnei	5583fe1377	svc: Improve SleepThread for yield types. - Fixes Super Mario Party.	2018-10-23 19:06:08 -04:00
FernandoS27	babd3581ee	Implement better Ignore Assert	2018-09-17 14:14:48 -04:00