Rasterizer: Refactor inlineToMemory.

GPU: Improve syncing.
Rasterizer: Implement Inline2Memory Acceleration.
2022-02-01 01:47:28 +01:00 · 2022-01-29 23:02:04 +01:00 · 2022-01-29 22:53:27 +01:00 · 2022-01-29 17:42:28 +01:00 · 2022-01-28 20:04:24 -05:00 · 2022-01-28 20:03:50 -05:00
26 changed files with 201 additions and 46 deletions
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -72,7 +72,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
    if (caps.invariant_tsc) {
        rtsc_frequency = EstimateRDTSCFrequency();
    }
-    if (rtsc_frequency == 0) {
+
+    // Fallback to StandardWallClock if rtsc period is higher than a nano second
+    if (rtsc_frequency <= 1000000000) {
        return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
                                                   emulated_clock_frequency);
    } else {
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback
        raw_status.gyro.y.value,
        raw_status.gyro.z.value,
    });
+    emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold);
    emulated.UpdateRotation(raw_status.delta_timestamp);
    emulated.UpdateOrientation(raw_status.delta_timestamp);
    force_update_motion = raw_status.force_update;
--- a/src/core/hid/motion_input.cpp
+++ b/src/core/hid/motion_input.cpp
@@ -10,7 +10,7 @@ namespace Core::HID {
 MotionInput::MotionInput() {
    // Initialize PID constants with default values
    SetPID(0.3f, 0.005f, 0.0f);
-    SetGyroThreshold(0.00005f);
+    SetGyroThreshold(0.007f);
 }

 void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) {
@@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) {
        gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f);
    }

-    if (gyro.Length2() < gyro_threshold) {
+    if (gyro.Length() < gyro_threshold) {
        gyro = {};
    } else {
        only_accelerometer = false;
--- a/src/core/hle/kernel/k_priority_queue.h
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -258,7 +258,7 @@ private:

 private:
    constexpr void ClearAffinityBit(u64& affinity, s32 core) {
-        affinity &= ~(u64(1) << core);
+        affinity &= ~(UINT64_C(1) << core);
    }

    constexpr s32 GetNextCore(u64& affinity) {
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) {
 }

 void KScheduler::Reload(KThread* thread) {
-    LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr");
+    LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName());

-    if (thread) {
-        ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");
-
-        Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
-        cpu_core.LoadContext(thread->GetContext32());
-        cpu_core.LoadContext(thread->GetContext64());
-        cpu_core.SetTlsAddress(thread->GetTLSAddress());
-        cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
-        cpu_core.ClearExclusiveState();
-    }
+    Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
+    cpu_core.LoadContext(thread->GetContext32());
+    cpu_core.LoadContext(thread->GetContext64());
+    cpu_core.SetTlsAddress(thread->GetTLSAddress());
+    cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
+    cpu_core.ClearExclusiveState();
 }

 void KScheduler::SwitchContextStep2() {
    // Load context of new thread
-    Reload(current_thread.load());
+    Reload(GetCurrentThread());

    RescheduleCurrentCore();
 }
@@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() {
    KThread* previous_thread = GetCurrentThread();
    KThread* next_thread = state.highest_priority_thread;

-    state.needs_scheduling = false;
+    state.needs_scheduling.store(false);

    // We never want to schedule a null thread, so use the idle thread if we don't have a next.
    if (next_thread == nullptr) {
        next_thread = idle_thread;
    }

+    if (next_thread->GetCurrentCore() != core_id) {
+        next_thread->SetCurrentCore(core_id);
+    }
+
    // We never want to schedule a dummy thread, as these are only used by host threads for locking.
    if (next_thread->GetThreadType() == ThreadType::Dummy) {
        ASSERT_MSG(false, "Dummy threads should never be scheduled!");
@@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() {
        return;
    }

-    if (next_thread->GetCurrentCore() != core_id) {
-        next_thread->SetCurrentCore(core_id);
-    }
-
-    current_thread.store(next_thread);
-
+    // Update the CPU time tracking variables.
    KProcess* const previous_process = system.Kernel().CurrentProcess();
-
    UpdateLastContextSwitchTime(previous_thread, previous_process);

    // Save context for previous thread
@@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() {

    std::shared_ptr<Common::Fiber>* old_context;
    old_context = &previous_thread->GetHostContext();
+
+    // Set the new thread.
+    current_thread.store(next_thread);
+
    guard.Unlock();

    Common::Fiber::YieldTo(*old_context, *switch_fiber);
@@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() {
        do {
            auto next_thread = current_thread.load();
            if (next_thread != nullptr) {
-                next_thread->context_guard.Lock();
-                if (next_thread->GetRawState() != ThreadState::Runnable) {
+                const auto locked = next_thread->context_guard.TryLock();
+                if (state.needs_scheduling.load()) {
                    next_thread->context_guard.Unlock();
                    break;
                }
@@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() {
                    next_thread->context_guard.Unlock();
                    break;
                }
+                if (!locked) {
+                    continue;
+                }
            }
            auto thread = next_thread ? next_thread : idle_thread;
            Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext());
--- a/src/input_common/drivers/sdl_driver.cpp
+++ b/src/input_common/drivers/sdl_driver.cpp
@@ -109,8 +109,9 @@ public:

    bool HasHDRumble() const {
        if (sdl_controller) {
-            return (SDL_GameControllerGetType(sdl_controller.get()) ==
-                    SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO);
+            const auto type = SDL_GameControllerGetType(sdl_controller.get());
+            return (type == SDL_CONTROLLER_TYPE_NINTENDO_SWITCH_PRO) ||
+                   (type == SDL_CONTROLLER_TYPE_PS5);
        }
        return false;
    }
--- a/src/input_common/input_poller.cpp
+++ b/src/input_common/input_poller.cpp
@@ -504,9 +504,10 @@ private:

 class InputFromMotion final : public Common::Input::InputDevice {
 public:
-    explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_,
+    explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_,
                             InputEngine* input_engine_)
-        : identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) {
+        : identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_),
+          input_engine(input_engine_) {
        UpdateCallback engine_callback{[this]() { OnChange(); }};
        const InputIdentifier input_identifier{
            .identifier = identifier,
@@ -525,8 +526,9 @@ public:
        const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor);
        Common::Input::MotionStatus status{};
        const Common::Input::AnalogProperties properties = {
-            .deadzone = 0.001f,
+            .deadzone = 0.0f,
            .range = 1.0f,
+            .threshold = gyro_threshold,
            .offset = 0.0f,
        };
        status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties};
@@ -551,6 +553,7 @@ public:
 private:
    const PadIdentifier identifier;
    const int motion_sensor;
+    const float gyro_threshold;
    int callback_key;
    InputEngine* input_engine;
 };
@@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice(

    if (params.Has("motion")) {
        const auto motion_sensor = params.Get("motion", 0);
+        const auto gyro_threshold = params.Get("threshold", 0.007f);
        input_engine->PreSetController(identifier);
        input_engine->PreSetMotion(identifier, motion_sensor);
-        return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get());
+        return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold,
+                                                 input_engine.get());
    }

    const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f);
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
    }
 }

+void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
+    if (ctx.runtime_info.xfb_varyings.empty()) {
+        return;
+    }
+    ctx.AddCapability(spv::Capability::TransformFeedback);
+    ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb);
+}
+
 void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
    if (info.uses_sampled_1d) {
        ctx.AddCapability(spv::Capability::Sampled1D);
@@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
    if (info.uses_sample_id) {
        ctx.AddCapability(spv::Capability::SampleRateShading);
    }
-    if (!ctx.runtime_info.xfb_varyings.empty()) {
-        ctx.AddCapability(spv::Capability::TransformFeedback);
-    }
    if (info.uses_derivatives) {
        ctx.AddCapability(spv::Capability::DerivativeControl);
    }
@@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
        SetupSignedNanCapabilities(profile, program, ctx, main);
    }
    SetupCapabilities(profile, program.info, ctx);
+    SetupTransformFeedbackCapabilities(ctx, main);
    PatchPhiNodes(program, ctx);
    return ctx.Assemble();
 }
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -74,7 +74,7 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
        const auto [scope, semantics]{AtomicArgs(ctx)};
        return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
    }
-    LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+    LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
    const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
                                    binding, offset, sizeof(u32[2]))};
    const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
@@ -267,7 +267,7 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const
        const auto [scope, semantics]{AtomicArgs(ctx)};
        return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value);
    }
-    LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
+    LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
    const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
                                    binding, offset, sizeof(u32[2]))};
    const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))};
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -688,7 +688,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
    case IR::Opcode::StorageAtomicAnd64:
    case IR::Opcode::StorageAtomicOr64:
    case IR::Opcode::StorageAtomicXor64:
-        info.used_storage_buffer_types |= IR::Type::U64;
+        info.used_storage_buffer_types |= IR::Type::U64 | IR::Type::U32x2;
        info.uses_int64_bit_atomics = true;
        break;
    case IR::Opcode::BindlessImageAtomicIAdd32:
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -131,6 +131,8 @@ public:

    void DownloadMemory(VAddr cpu_addr, u64 size);

+    bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
+
    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);

    void DisableGraphicsUniformBuffer(size_t stage, u32 index);
@@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
        return;
    }
    MICROPROFILE_SCOPE(GPU_DownloadMemory);
+    const bool is_accuracy_normal =
+        Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;

    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
    u64 total_size_bytes = 0;
@@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
            ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
                buffer.ForEachDownloadRangeAndClear(
                    cpu_addr, size, [&](u64 range_offset, u64 range_size) {
+                        if (is_accuracy_normal) {
+                            return;
+                        }
                        const VAddr buffer_addr = buffer.CpuAddr();
                        const auto add_download = [&](VAddr start, VAddr end) {
                            const u64 new_offset = start - buffer_addr;
@@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
    const IntervalType base_interval{cpu_addr, cpu_addr + size};
    common_ranges.add(base_interval);

-    const bool is_accuracy_high =
-        Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
    const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
-    if (!is_async && !is_accuracy_high) {
+    if (!is_async) {
        return;
    }
    uncommitted_ranges.add(base_interval);
@@ -1474,6 +1479,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
            // When this memory region has been joined a bunch of times, we assume it's being used
            // as a stream buffer. Increase the size to skip constantly recreating buffers.
            has_stream_leap = true;
+            begin -= PAGE_SIZE * 256;
+            cpu_addr = begin;
            end += PAGE_SIZE * 256;
        }
    }
@@ -1641,6 +1648,42 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
    runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
 }

+template <class P>
+bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
+                                  std::span<u8> inlined_buffer) {
+    const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
+    if (!is_dirty) {
+        return false;
+    }
+    if (!IsRegionGpuModified(dest_address, copy_size)) {
+        return false;
+    }
+
+    const IntervalType subtract_interval{dest_address, dest_address + copy_size};
+    ClearDownload(subtract_interval);
+    common_ranges.subtract(subtract_interval);
+
+    BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
+    auto& buffer = slot_buffers[buffer_id];
+    SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
+
+    if constexpr (USE_MEMORY_MAPS) {
+        std::array copies{BufferCopy{
+            .src_offset = 0,
+            .dst_offset = buffer.Offset(dest_address),
+            .size = copy_size,
+        }};
+        auto upload_staging = runtime.UploadStagingBuffer(copy_size);
+        u8* const src_pointer = upload_staging.mapped_span.data();
+        std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
+        runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
+    } else {
+        buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
+    }
+
+    return true;
+}
+
 template <class P>
 void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
    DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -7,6 +7,7 @@
 #include "common/assert.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
 #include "video_core/textures/decoders.h"

 namespace Tegra::Engines::Upload {
@@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_)

 State::~State() = default;

+void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
 void State::ProcessExec(const bool is_linear_) {
    write_offset = 0;
    copy_size = regs.line_length_in * regs.line_count;
@@ -32,7 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
    }
    const GPUVAddr address{regs.dest.Address()};
    if (is_linear) {
-        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+        rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer);
    } else {
        UNIMPLEMENTED_IF(regs.dest.z != 0);
        UNIMPLEMENTED_IF(regs.dest.depth != 1);
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -12,6 +12,10 @@ namespace Tegra {
 class MemoryManager;
 }

+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines::Upload {

 struct Registers {
@@ -60,6 +64,9 @@ public:
    void ProcessExec(bool is_linear_);
    void ProcessData(u32 data, bool is_last_call);

+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
 private:
    u32 write_offset = 0;
    u32 copy_size = 0;
@@ -68,6 +75,7 @@ private:
    bool is_linear = false;
    Registers& regs;
    MemoryManager& memory_manager;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
 };

 } // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default;

 void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
    rasterizer = rasterizer_;
+    upload_state.BindRasterizer(rasterizer);
 }

 void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)

 KeplerMemory::~KeplerMemory() = default;

+void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    upload_state.BindRasterizer(rasterizer_);
+}
+
 void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
    ASSERT_MSG(method < Regs::NUM_REGS,
               "Invalid KeplerMemory register, increase the size of the Regs structure");
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -22,6 +22,10 @@ namespace Tegra {
 class MemoryManager;
 }

+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines {

 /**
@@ -38,6 +42,9 @@ public:
    explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
    ~KeplerMemory() override;

+    /// Binds a rasterizer to this engine.
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
    /// Write the value to the register identified by method.
    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default;

 void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
    rasterizer = rasterizer_;
+    upload_state.BindRasterizer(rasterizer_);
 }

 void Maxwell3D::InitializeRegisterDefaults() {
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1557,7 +1557,8 @@ private:

    static constexpr u32 null_cb_data = 0xFFFFFFFF;
    struct CBDataState {
-        std::array<std::array<u32, 0x4000>, 16> buffer;
+        static constexpr size_t inline_size = 0x4000;
+        std::array<std::array<u32, inline_size>, 16> buffer;
        u32 current{null_cb_data};
        u32 id{null_cb_data};
        u32 start_pos{};
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -59,6 +59,7 @@ struct GPU::Impl {
        maxwell_3d->BindRasterizer(rasterizer);
        fermi_2d->BindRasterizer(rasterizer);
        kepler_compute->BindRasterizer(rasterizer);
+        kepler_memory->BindRasterizer(rasterizer);
        maxwell_dma->BindRasterizer(rasterizer);
    }

@@ -502,8 +503,13 @@ struct GPU::Impl {
        case BufferMethods::SemaphoreAddressHigh:
        case BufferMethods::SemaphoreAddressLow:
        case BufferMethods::SemaphoreSequence:
+            break;
        case BufferMethods::UnkCacheFlush:
+            rasterizer->SyncGuestHost();
+            break;
        case BufferMethods::WrcacheFlush:
+            rasterizer->SignalReference();
+            break;
        case BufferMethods::FenceValue:
            break;
        case BufferMethods::RefCnt:
@@ -513,7 +519,7 @@ struct GPU::Impl {
            ProcessFenceActionMethod();
            break;
        case BufferMethods::WaitForInterrupt:
-            ProcessWaitForInterruptMethod();
+            rasterizer->WaitForIdle();
            break;
        case BufferMethods::SemaphoreTrigger: {
            ProcessSemaphoreTriggerMethod();
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -143,6 +143,8 @@ public:
    [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
    void Unmap(GPUVAddr gpu_addr, std::size_t size);

+    void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
+
 private:
    [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
    void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
@@ -153,8 +155,6 @@ private:
    void TryLockPage(PageEntry page_entry, std::size_t size);
    void TryUnlockPage(PageEntry page_entry, std::size_t size);

-    void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
-
    void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
                       bool is_safe) const;
    void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size,
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -123,6 +123,9 @@ public:

    [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0;

+    virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+                                          std::span<u8> memory) = 0;
+
    /// Attempt to use a faster method to display the framebuffer to screen
    [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
                                                 VAddr framebuffer_addr, u32 pixel_stride) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA()
    return accelerate_dma;
 }

+void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+                                                std::span<u8> memory) {
+    auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+    if (!cpu_addr) [[unlikely]] {
+        gpu_memory.WriteBlock(address, memory.data(), copy_size);
+        return;
+    }
+    gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+    {
+        std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+        if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+            buffer_cache.WriteMemory(*cpu_addr, copy_size);
+        }
+    }
+    {
+        std::scoped_lock lock_texture{texture_cache.mutex};
+        texture_cache.WriteMemory(*cpu_addr, copy_size);
+    }
+    shader_cache.InvalidateRegion(*cpu_addr, copy_size);
+    query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
                                         VAddr framebuffer_addr, u32 pixel_stride) {
    if (framebuffer_addr == 0) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -106,6 +106,8 @@ public:
                               const Tegra::Engines::Fermi2D::Surface& dst,
                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
    Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+    void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+                                  std::span<u8> memory) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
    return accelerate_dma;
 }

+void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+                                                std::span<u8> memory) {
+    auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+    if (!cpu_addr) [[unlikely]] {
+        gpu_memory.WriteBlock(address, memory.data(), copy_size);
+        return;
+    }
+    gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+    {
+        std::unique_lock<std::mutex> lock{buffer_cache.mutex};
+        if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
+            buffer_cache.WriteMemory(*cpu_addr, copy_size);
+        }
+    }
+    {
+        std::scoped_lock lock_texture{texture_cache.mutex};
+        texture_cache.WriteMemory(*cpu_addr, copy_size);
+    }
+    pipeline_cache.InvalidateRegion(*cpu_addr, copy_size);
+    query_cache.InvalidateRegion(*cpu_addr, copy_size);
+}
+
 bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
                                         VAddr framebuffer_addr, u32 pixel_stride) {
    if (!framebuffer_addr) {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -99,6 +99,8 @@ public:
                               const Tegra::Engines::Fermi2D::Surface& dst,
                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
    Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
+    void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
+                                  std::span<u8> memory) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
        connect(button, &QPushButton::customContextMenuRequested,
                [=, this](const QPoint& menu_location) {
                    QMenu context_menu;
+                    Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id);
                    context_menu.addAction(tr("Clear"), [&] {
                        emulated_controller->SetMotionParam(motion_id, {});
                        motion_map[motion_id]->setText(tr("[not set]"));
                    });
+                    if (param.Has("motion")) {
+                        context_menu.addAction(tr("Set gyro threshold"), [&] {
+                            const int gyro_threshold =
+                                static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f);
+                            const int new_threshold = QInputDialog::getInt(
+                                this, tr("Set threshold"), tr("Choose a value between 0% and 100%"),
+                                gyro_threshold, 0, 100);
+                            param.Set("threshold", new_threshold / 1000.0f);
+                            emulated_controller->SetMotionParam(motion_id, param);
+                        });
+                    }
                    context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location));
                });
    }
Author	SHA1	Message	Date
Fernando Sahmkow	8a6e6465a7	Rasterizer: Refactor inlineToMemory.	2022-02-01 01:47:28 +01:00
Fernando Sahmkow	d0a5a48948	GPU: Improve syncing.	2022-01-29 23:02:04 +01:00
Fernando Sahmkow	4258d515e6	Rasterizer: Implement Inline2Memory Acceleration.	2022-01-29 22:53:27 +01:00
Fernando Sahmkow	f54280dafd	Inline2Memory: Flush before writting buffer.	2022-01-29 17:42:28 +01:00
Morph	11099dda2e	Merge pull request #7791 from german77/wall_clock wall_clock: Use standard wall clock if rtsc frequency is too low	2022-01-28 20:04:24 -05:00
Morph	64a68ccbb4	Merge pull request #7800 from ameerj/spirv-int64-storage spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics	2022-01-28 20:03:50 -05:00
ameerj	4790ba7839	spirv_atomic: Define U32x2 storage buffers for 64-bit storage atomics Some drivers do not support 64-bit atomics, and fallback to atomically modifying U32x2 vectors. This change ensures that U32x2 storage vectors are defined in the spir-v shader when 64-bit atomics are used. Fixes a hang on some devices, notably Intel GPUs, when booting Pokemon Legends Arceus	2022-01-28 19:00:04 -05:00
Morph	1900abde13	Merge pull request #7784 from german77/ds5 input_common: Add DS5 to HD rumble list	2022-01-28 18:36:28 -05:00
Morph	60b5670577	Merge pull request #7787 from bunnei/scheduler-deadlock-fix hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated.	2022-01-28 18:30:29 -05:00
Morph	b00406c8e4	Merge pull request #7788 from ameerj/stream-buffer-begin buffer_cache: Reduce stream buffer allocations when expanding from the left	2022-01-28 18:30:01 -05:00
Morph	8dea7fa129	Merge pull request #7786 from ameerj/vmnmx-sel video_minimum_maximum: Implement src operand selectors	2022-01-28 18:24:56 -05:00
Morph	2241d8c971	Merge pull request #7799 from ameerj/amd-xfb emit_spirv: Add Xfb execution mode when transform feedback is used	2022-01-28 17:55:17 -05:00
ameerj	beaf7654bb	emit_spirv: Add Xfb execution mode when transform feedback is used Fixes Transform Feedback on Vulkan AMD drivers.	2022-01-28 16:32:48 -05:00
bunnei	0dec42431f	Merge pull request #7770 from german77/motion-threshold input_common: Add option to configure gyro threshold	2022-01-27 15:44:04 -08:00
german77	e4c63d432d	wall_clock: use standard wall clock if rtsc frequency is too low	2022-01-27 17:07:52 -06:00
ameerj	f300a1d54b	buffer_cache: Reduce stream buffer allocations when expanding from the left The existing stream buffer optimization accounts for size increases at the end of the allocated buffer. This adds the same optimization, increasing the size from the beginning of the buffer as well to reduce buffer allocations when expanding the same buffer from the left.	2022-01-27 15:31:43 -05:00
bunnei	3a1a3dd0db	hle: kernel: KScheduler: Fix deadlock with core waiting for a thread lock that has migrated. - Previously, it was possible for a thread migration to occur from core A to core B. - Next, core B waits on a guest lock that must be released by a thread queued for core A. - Meanwhile, core A is still waiting on the core B's current thread lock - resulting in a deadlock. - Fix this by try-locking the thread lock. - Fixes softlocks in FF8 and Pokemon Legends Arceus.	2022-01-27 12:17:14 -08:00
Narr the Reg	fd1cef5616	input_common: Add DS5 to HD rumble list	2022-01-26 21:49:32 -06:00
german77	ebf19616f4	input_common: Add option to configure gyro threshold	2022-01-23 21:54:33 -06:00