shader/arithmetic: Implement FCMP immediate + register variant

Trivially add the encoding for this.
Merge pull request #4851 from ReinUsesLisp/core-threads-race
2020-10-28 17:05:41 -03:00 · 2020-10-28 04:54:35 -04:00 · 2020-10-28 04:54:19 -04:00 · 2020-10-28 04:26:10 -04:00 · 2020-10-28 03:16:10 -04:00 · 2020-10-28 03:14:58 -04:00
8 changed files with 49 additions and 41 deletions
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -91,7 +91,7 @@ void Fiber::Rewind() {
    SwitchToFiber(impl->rewind_handle);
 }

-void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
    to->guard.lock();
@@ -199,7 +199,7 @@ void Fiber::Rewind() {
    boost::context::detail::jump_fcontext(impl->rewind_context, this);
 }

-void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
    to->guard.lock();
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -46,7 +46,7 @@ public:

    /// Yields control from Fiber 'from' to Fiber 'to'
    /// Fiber 'from' must be the currently running fiber.
-    static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
+    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
    [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();

    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -86,8 +86,6 @@ struct KernelCore::Impl {
        }
        cores.clear();

-        registered_core_threads.reset();
-
        process_list.clear();
        current_process = nullptr;

@@ -199,9 +197,7 @@ struct KernelCore::Impl {
        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
        ASSERT(it == end);
-        ASSERT(!registered_core_threads[core_id]);
        InsertHostThread(static_cast<u32>(core_id));
-        registered_core_threads.set(core_id);
    }

    void RegisterHostThread() {
@@ -332,7 +328,6 @@ struct KernelCore::Impl {

    // 0-3 IDs represent core threads, >3 represent others
    std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
-    std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;

    // Number of host threads is a relatively high number to avoid overflowing
    static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -6,18 +6,40 @@
 #include <cstdlib>
 #include <functional>
 #include <memory>
+#include <mutex>
+#include <stdexcept>
 #include <thread>
 #include <unordered_map>
 #include <vector>

 #include <catch2/catch.hpp>
-#include <math.h>
+
 #include "common/common_types.h"
 #include "common/fiber.h"
-#include "common/spin_lock.h"

 namespace Common {

+class ThreadIds {
+public:
+    void Register(u32 id) {
+        const auto thread_id = std::this_thread::get_id();
+        std::scoped_lock lock{mutex};
+        if (ids.contains(thread_id)) {
+            throw std::logic_error{"Registering the same thread twice"};
+        }
+        ids.emplace(thread_id, id);
+    }
+
+    [[nodiscard]] u32 Get() const {
+        std::scoped_lock lock{mutex};
+        return ids.at(std::this_thread::get_id());
+    }
+
+private:
+    mutable std::mutex mutex;
+    std::unordered_map<std::thread::id, u32> ids;
+};
+
 class TestControl1 {
 public:
    TestControl1() = default;
@@ -26,7 +48,7 @@ public:

    void ExecuteThread(u32 id);

-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
    std::vector<u32> items;
@@ -39,8 +61,7 @@ static void WorkControl1(void* control) {
 }

 void TestControl1::DoWork() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    u32 value = items[id];
    for (u32 i = 0; i < id; i++) {
        value++;
@@ -50,8 +71,7 @@ void TestControl1::DoWork() {
 }

 void TestControl1::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
    work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
@@ -98,8 +118,7 @@ public:
            value1 += i;
        }
        Fiber::YieldTo(fiber1, fiber3);
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        assert1 = id == 1;
        value2 += 5000;
        Fiber::YieldTo(fiber1, thread_fibers[id]);
@@ -115,8 +134,7 @@ public:
    }

    void DoWork3() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        assert2 = id == 0;
        value1 += 1000;
        Fiber::YieldTo(fiber3, thread_fibers[id]);
@@ -125,14 +143,12 @@ public:
    void ExecuteThread(u32 id);

    void CallFiber1() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber1);
    }

    void CallFiber2() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber2);
    }

@@ -145,7 +161,7 @@ public:
    u32 value2{};
    std::atomic<bool> trap{true};
    std::atomic<bool> trap2{true};
-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::shared_ptr<Common::Fiber> fiber1;
    std::shared_ptr<Common::Fiber> fiber2;
@@ -168,15 +184,13 @@ static void WorkControl2_3(void* control) {
 }

 void TestControl2::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
 }

 void TestControl2::Exit() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    thread_fibers[id]->Exit();
 }

@@ -228,24 +242,21 @@ public:
    void DoWork1() {
        value1 += 1;
        Fiber::YieldTo(fiber1, fiber2);
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        value3 += 1;
        Fiber::YieldTo(fiber1, thread_fibers[id]);
    }

    void DoWork2() {
        value2 += 1;
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(fiber2, thread_fibers[id]);
    }

    void ExecuteThread(u32 id);

    void CallFiber1() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber1);
    }

@@ -254,7 +265,7 @@ public:
    u32 value1{};
    u32 value2{};
    u32 value3{};
-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::shared_ptr<Common::Fiber> fiber1;
    std::shared_ptr<Common::Fiber> fiber2;
@@ -271,15 +282,13 @@ static void WorkControl3_2(void* control) {
 }

 void TestControl3::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
 }

 void TestControl3::Exit() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    thread_fibers[id]->Exit();
 }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -305,6 +305,7 @@ else()
        -Werror=pessimizing-move
        -Werror=redundant-move
        -Werror=switch
+        -Werror=type-limits
        -Werror=unused-variable

        $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -893,7 +893,7 @@ void VpxRangeEncoder::Write(bool bit, s32 probability) {
        if (((low_value << (offset - 1)) >> 31) != 0) {
            const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
            base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
-            while (base_stream.GetPosition() >= 0 && PeekByte() == 0xff) {
+            while (PeekByte() == 0xff) {
                base_stream.WriteByte(0);

                base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
        ICMP_IMM,
        FCMP_RR,
        FCMP_RC,
+        FCMP_IMMR,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2205,6 +2206,7 @@ private:
            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -137,7 +137,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        break;
    }
    case OpCode::Id::FCMP_RR:
-    case OpCode::Id::FCMP_RC: {
+    case OpCode::Id::FCMP_RC:
+    case OpCode::Id::FCMP_IMMR: {
        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
        Node op_c = GetRegister(instr.gpr39);
        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
Author	SHA1	Message	Date
ReinUsesLisp	44b552be71	shader/arithmetic: Implement FCMP immediate + register variant Trivially add the encoding for this.	2020-10-28 17:05:41 -03:00
LC	725fcbb368	Merge pull request #4851 from ReinUsesLisp/core-threads-race hle/kernel: Remove unused registered_core_threads to fix data races	2020-10-28 04:54:35 -04:00
LC	a1f176ce52	Merge pull request #4850 from ReinUsesLisp/fiber-ptr-ref common/fiber: Take shared_ptr<Fiber> by copy in YieldTo	2020-10-28 04:54:19 -04:00
LC	1fd22823bc	Merge pull request #4849 from ReinUsesLisp/fix-fiber-test tests: Fix data race in fibers test	2020-10-28 04:26:10 -04:00
LC	978e7897a3	Merge pull request #4848 from ReinUsesLisp/type-limits video_core: Enforce -Werror=type-limits	2020-10-28 03:16:10 -04:00
LC	55ac6f7a2b	Merge pull request #4847 from ReinUsesLisp/warn-move video_core: Enforce -Wredundant-move and -Wpessimizing-move	2020-10-28 03:14:58 -04:00
ReinUsesLisp	4a451e5849	video_core: Enforce -Werror=type-limits Silences one warning and avoids introducing more in the future.	2020-10-28 02:37:47 -03:00
ReinUsesLisp	cdb2480d39	common/fiber: Take shared_ptr<Fiber> by copy in YieldTo YieldTo does not intend to modify the passed shared_ptrs. Pass it by copy to keep a reference count while this function executes.	2020-10-28 02:02:44 -03:00
ReinUsesLisp	3fdb42e0b4	tests: Fix data race in fibers test Previous to this commit, the tests were using operator[] from unordered_map to query elements but this silently inserts empty elements when they don't exist. If all threads were executed without concurrency, this wouldn't be an issue, but the same unordered_map could be written from two threads at the same time. This is a data race and makes some previously inserted elements invisible for a short period of time, causing them to insert and return an empty element. This default constructed element (a zero) was used to index an array of fibers that asserted when one of them was nullptr, shutting the test session off. To address this issue, lock on thread id reads and writes. This could be a shared mutex to allow concurrent reads, but the definition of std::this_thread::get_id is fuzzy when using non-standard techniques like fibers. I opted to use a standard mutex. While we are at it, fix the included headers.	2020-10-28 01:41:24 -03:00
ReinUsesLisp	ce69ff2890	hle/kernel: Remove unused registered_core_threads to fix data races This member was only used on asserts and it triggered data races. Remove it to fix them.	2020-10-27 01:55:39 -03:00