gpu: Move command processing to another thread.

2019-01-08 00:23:40 -05:00
parent 208c599463
commit 1690ea9902
6 changed files with 206 additions and 5 deletions
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,8 @@ add_library(video_core STATIC
    engines/shader_header.h
    gpu.cpp
    gpu.h
+    gpu_thread.cpp
+    gpu_thread.h
    macro_interpreter.cpp
    macro_interpreter.h
    memory_manager.cpp
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,12 +3,14 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
 #include "video_core/renderer_base.h"

 namespace Tegra {
@@ -33,6 +35,10 @@ GPU::GPU(VideoCore::RendererBase& renderer) : renderer{renderer} {
    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
+
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread = std::make_unique<VideoCore::GPUThread>(renderer, *dma_pusher);
+    }
 }

 GPU::~GPU() = default;
@@ -62,14 +68,30 @@ const DmaPusher& GPU::DmaPusher() const {
 }

 void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
-    dma_pusher->Push(std::move(entries));
-    dma_pusher->QueuePendingCalls();
-    dma_pusher->DispatchCalls();
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->PushGPUEntries(std::move(entries));
+    } else {
+        dma_pusher->Push(std::move(entries));
+        dma_pusher->QueuePendingCalls();
+        dma_pusher->DispatchCalls();
+    }
 }

 void GPU::SwapBuffers(
    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    renderer.SwapBuffers(std::move(framebuffer));
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->SwapBuffers(std::move(framebuffer));
+    } else {
+        renderer.SwapBuffers(std::move(framebuffer));
+    }
+}
+
+void GPU::WaitUntilIdle(std::function<void()> callback) {
+    if (Settings::values.use_asynchronous_gpu_emulation) {
+        gpu_thread->WaitUntilIdle(std::move(callback));
+    } else {
+        callback();
+    }
 }

 u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <functional>
 #include <memory>
 #include <vector>
 #include "common/common_types.h"
@@ -13,6 +14,7 @@
 #include "video_core/memory_manager.h"

 namespace VideoCore {
+class GPUThread;
 class RendererBase;
 } // namespace VideoCore

@@ -163,9 +165,13 @@ public:
    void SwapBuffers(
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);

+    /// Waits the caller until the thread is idle, and then calls the callback
+    void WaitUntilIdle(std::function<void()> callback);
+
 private:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
    std::unique_ptr<Tegra::MemoryManager> memory_manager;
+    std::unique_ptr<VideoCore::GPUThread> gpu_thread;

    /// Mapping of command subchannels to their bound engine ids.
    std::array<EngineID, 8> bound_engines = {};
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,115 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/frontend/scope_acquire_window_context.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace {
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+                      VideoCore::GPUThreadState& state) {
+
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+    while (state.is_running) {
+        bool is_dma_pending{};
+        bool is_swapbuffers_pending{};
+
+        {
+            // Wait for CPU thread to send GPU commands
+            std::unique_lock<std::mutex> lock{state.signal_mutex};
+            state.signal_condition.wait(lock, [&] {
+                return state.is_dma_pending || state.is_swapbuffers_pending || !state.is_running;
+            });
+
+            if (!state.is_running) {
+                return;
+            }
+
+            is_dma_pending = state.is_dma_pending;
+            is_swapbuffers_pending = state.is_swapbuffers_pending;
+
+            if (is_dma_pending) {
+                dma_pusher.QueuePendingCalls();
+                state.is_dma_pending = false;
+            }
+        }
+
+        if (is_dma_pending) {
+            // Process pending DMA pushbuffer commands
+            std::lock_guard<std::recursive_mutex> lock{state.running_mutex};
+            dma_pusher.DispatchCalls();
+        }
+
+        if (is_swapbuffers_pending) {
+            // Process pending SwapBuffers
+            renderer.SwapBuffers(state.pending_swapbuffers_config);
+            state.is_swapbuffers_pending = false;
+            state.signal_condition.notify_one();
+        }
+    }
+}
+} // Anonymous namespace
+
+namespace VideoCore {
+
+GPUThread::GPUThread(RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
+    : dma_pusher{dma_pusher} {
+    thread = std::make_unique<std::thread>(RunThread, std::ref(renderer), std::ref(dma_pusher),
+                                           std::ref(state));
+}
+
+GPUThread::~GPUThread() {
+    {
+        // Notify GPU thread that a shutdown is pending
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.is_running = false;
+    }
+
+    state.signal_condition.notify_one();
+    thread->join();
+}
+
+void GPUThread::PushGPUEntries(Tegra::CommandList&& entries) {
+    if (entries.empty()) {
+        return;
+    }
+
+    {
+        // Notify GPU thread that data is available
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        dma_pusher.Push(std::move(entries));
+        state.is_dma_pending = true;
+    }
+
+    state.signal_condition.notify_one();
+}
+
+void GPUThread::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+
+    {
+        // Notify GPU thread that we should SwapBuffers
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.pending_swapbuffers_config = framebuffer;
+        state.is_swapbuffers_pending = true;
+    }
+
+    state.signal_condition.notify_one();
+
+    {
+        // Wait for SwapBuffers
+        std::unique_lock<std::mutex> lock{state.signal_mutex};
+        state.signal_condition.wait(lock, [this] { return !state.is_swapbuffers_pending; });
+    }
+}
+
+void GPUThread::WaitUntilIdle(std::function<void()> callback) {
+    // Needs to be a recursive mutex, as this can be called by the GPU thread
+    std::unique_lock<std::recursive_mutex> lock{state.running_mutex};
+    callback();
+}
+
+} // namespace VideoCore
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+struct FramebufferConfig;
+}
+
+namespace VideoCore {
+
+class RendererBase;
+
+struct GPUThreadState final {
+    bool is_running{true};
+    bool is_dma_pending{};
+    bool is_swapbuffers_pending{};
+    std::optional<Tegra::FramebufferConfig> pending_swapbuffers_config;
+    std::condition_variable signal_condition;
+    std::condition_variable running_condition;
+    std::mutex signal_mutex;
+    std::recursive_mutex running_mutex;
+};
+
+class GPUThread final {
+public:
+    explicit GPUThread(RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    ~GPUThread();
+
+    /// Push GPU command entries to be processed
+    void PushGPUEntries(Tegra::CommandList&& entries);
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+    /// Waits the caller until the thread is idle, and then calls the callback
+    void WaitUntilIdle(std::function<void()> callback);
+
+private:
+    GPUThreadState state;
+    std::unique_ptr<std::thread> thread;
+    Tegra::DmaPusher& dma_pusher;
+};
+
+} // namespace VideoCore