From 7fa34900e749cb81d218d2287a05c3628b018c92 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:36:52 -0400
Subject: [PATCH 001/122] file_sys: Add functions to manage system archive
 importing

Provides a couple of functions that simply clearing and adding to imported sysdata.
---
 src/core/CMakeLists.txt                       |  2 +
 src/core/file_sys/system_archive/importer.cpp | 46 +++++++++++++++++++
 src/core/file_sys/system_archive/importer.h   | 31 +++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 src/core/file_sys/system_archive/importer.cpp
 create mode 100644 src/core/file_sys/system_archive/importer.h

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 4f6a87b0a7..7a43e9cae9 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -86,6 +86,8 @@ add_library(core STATIC
     file_sys/system_archive/data/font_nintendo_extended.h
     file_sys/system_archive/data/font_standard.cpp
     file_sys/system_archive/data/font_standard.h
+    file_sys/system_archive/importer.cpp
+    file_sys/system_archive/importer.h
     file_sys/system_archive/mii_model.cpp
     file_sys/system_archive/mii_model.h
     file_sys/system_archive/ng_word.cpp
diff --git a/src/core/file_sys/system_archive/importer.cpp b/src/core/file_sys/system_archive/importer.cpp
new file mode 100644
index 0000000000..9f81361e0e
--- /dev/null
+++ b/src/core/file_sys/system_archive/importer.cpp
@@ -0,0 +1,46 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <fmt/format.h>
+#include "common/file_util.h"
+#include "core/file_sys/card_image.h"
+#include "core/file_sys/content_archive.h"
+#include "core/file_sys/system_archive/importer.h"
+#include "core/file_sys/vfs.h"
+#include "core/loader/loader.h"
+
+namespace FileSys::SystemArchive {
+
+VirtualFile GetImportedSystemArchive(const VirtualDir& sysdata, u64 title_id) {
+    const auto filename = fmt::format("{:016X}.arc", title_id);
+    return sysdata->GetFile(filename);
+}
+
+bool ImportSystemArchive(const VirtualDir& sysdata, u64 title_id, const VirtualFile& data) {
+    const auto filename = fmt::format("{:016X}.arc", title_id);
+    const auto out = sysdata->CreateFile(filename);
+    return out != nullptr && VfsRawCopy(data, out);
+}
+
+bool ImportDirectorySystemUpdate(const VirtualDir& sysdata, const VirtualDir& dir) {
+    Core::Crypto::KeyManager keys;
+
+    for (const auto& file : dir->GetFiles()) {
+        NCA nca{file, nullptr, 0, keys};
+        if (nca.GetStatus() == Loader::ResultStatus::Success &&
+            nca.GetType() == NCAContentType::Data && nca.GetRomFS() != nullptr) {
+            if (!ImportSystemArchive(sysdata, nca.GetTitleId(), nca.GetRomFS())) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool ImportXCISystemUpdate(const VirtualDir& sysdata, XCI& xci) {
+    return ImportDirectorySystemUpdate(sysdata, xci.GetUpdatePartition());
+}
+
+} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/importer.h b/src/core/file_sys/system_archive/importer.h
new file mode 100644
index 0000000000..01c81a4436
--- /dev/null
+++ b/src/core/file_sys/system_archive/importer.h
@@ -0,0 +1,31 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/file_sys/vfs_types.h"
+
+namespace FileSys {
+
+class NSP;
+class XCI;
+
+namespace SystemArchive {
+
+/// Returns the file corresponding to the title_id if it exists in sysdata.
+VirtualFile GetImportedSystemArchive(const VirtualDir& sysdata, u64 title_id);
+
+/// Copies the provided file into sysdata, overwriting current data.
+bool ImportSystemArchive(const VirtualDir& sysdata, u64 title_id, const VirtualFile& data);
+
+/// Copies all system archives in the directory to sysdata.
+bool ImportDirectorySystemUpdate(const VirtualDir& sysdata, const VirtualDir& dir);
+
+/// Calls ImportDirectorySystemUpdate on the update partition of the XCI.
+bool ImportXCISystemUpdate(const VirtualDir& sysdata, XCI& xci);
+
+} // namespace SystemArchive
+
+} // namespace FileSys
\ No newline at end of file

From cedcaed581b081b97d16f33ba848c583501e17a1 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:37:11 -0400
Subject: [PATCH 002/122] system_archive: Expose count and base ID constants

---
 src/core/file_sys/system_archive/system_archive.cpp | 3 ---
 src/core/file_sys/system_archive/system_archive.h   | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index e93d100a5d..e65d9410ec 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -12,9 +12,6 @@
 
 namespace FileSys::SystemArchive {
 
-constexpr u64 SYSTEM_ARCHIVE_BASE_TITLE_ID = 0x0100000000000800;
-constexpr std::size_t SYSTEM_ARCHIVE_COUNT = 0x28;
-
 using SystemArchiveSupplier = VirtualDir (*)();
 
 struct SystemArchiveDescriptor {
diff --git a/src/core/file_sys/system_archive/system_archive.h b/src/core/file_sys/system_archive/system_archive.h
index 724a8eb17e..91c976634c 100644
--- a/src/core/file_sys/system_archive/system_archive.h
+++ b/src/core/file_sys/system_archive/system_archive.h
@@ -9,6 +9,9 @@
 
 namespace FileSys::SystemArchive {
 
+constexpr u64 SYSTEM_ARCHIVE_BASE_TITLE_ID = 0x0100000000000800;
+constexpr std::size_t SYSTEM_ARCHIVE_COUNT = 0x28;
+
 VirtualFile SynthesizeSystemArchive(u64 title_id);
 
 } // namespace FileSys::SystemArchive

From db3ddd80da09e0d30e7e175135d7ab33d3babe1b Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:37:38 -0400
Subject: [PATCH 003/122] filesystem: Add accessors for sysdata imported
 directory

Stores imported system archives
---
 src/core/hle/service/filesystem/filesystem.cpp | 8 ++++++++
 src/core/hle/service/filesystem/filesystem.h   | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 11e5c56b7a..1728e311f8 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -717,6 +717,14 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove
         system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC,
                                        sdmc_factory->GetSDMCContents());
     }
+
+    sysdata_imported_dir =
+        vfs.CreateDirectory(FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir) + "imported",
+                            FileSys::Mode::ReadWrite);
+}
+
+FileSys::VirtualDir FileSystemController::GetSysdataImportedDirectory() const {
+    return sysdata_imported_dir;
 }
 
 void InstallInterfaces(Core::System& system) {
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 1b0a6a9496..2572592ffa 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -116,6 +116,8 @@ public:
 
     FileSys::VirtualDir GetBCATDirectory(u64 title_id) const;
 
+    FileSys::VirtualDir GetSysdataImportedDirectory() const;
+
     // Creates the SaveData, SDMC, and BIS Factories. Should be called once and before any function
     // above is called.
     void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true);
@@ -130,6 +132,8 @@ private:
     std::unique_ptr<FileSys::RegisteredCache> gamecard_registered;
     std::unique_ptr<FileSys::PlaceholderCache> gamecard_placeholder;
 
+    FileSys::VirtualDir sysdata_imported_dir;
+
     Core::System& system;
 };
 

From f06c541440569a52bbbd5bf92c3038f1d30d1604 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:38:07 -0400
Subject: [PATCH 004/122] fsp_srv: Load imported system archives when available

Occurs after NAND, but before OSS
---
 src/core/hle/service/filesystem/fsp_srv.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index cbd5466c12..f3e3e9c012 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -21,6 +21,7 @@
 #include "core/file_sys/patch_manager.h"
 #include "core/file_sys/romfs_factory.h"
 #include "core/file_sys/savedata_factory.h"
+#include "core/file_sys/system_archive/importer.h"
 #include "core/file_sys/system_archive/system_archive.h"
 #include "core/file_sys/vfs.h"
 #include "core/hle/ipc_helpers.h"
@@ -914,7 +915,12 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
     auto data = fsc.OpenRomFS(title_id, storage_id, FileSys::ContentRecordType::Data);
 
     if (data.Failed()) {
-        const auto archive = FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
+        auto archive = FileSys::SystemArchive::GetImportedSystemArchive(
+            fsc.GetSystemNANDContentDirectory(), title_id);
+
+        if (archive == nullptr) {
+            archive = FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
+        }
 
         if (archive != nullptr) {
             IPC::ResponseBuilder rb{ctx, 2, 0, 1};

From a85d0b6712e2be5631d74496ea5977c1a1072a59 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:38:52 -0400
Subject: [PATCH 005/122] qt: Add UI to view sources of system archives

Displays, color coded, the origin for each of the 0x28 archives, allowing for easy debugging.
---
 src/yuzu/CMakeLists.txt            |   3 +
 src/yuzu/status/system_archive.cpp |  76 ++++++++++
 src/yuzu/status/system_archive.h   |  28 ++++
 src/yuzu/status/system_archive.ui  | 224 +++++++++++++++++++++++++++++
 4 files changed, 331 insertions(+)
 create mode 100644 src/yuzu/status/system_archive.cpp
 create mode 100644 src/yuzu/status/system_archive.h
 create mode 100644 src/yuzu/status/system_archive.ui

diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index ff1c1d9856..b8bfcc3352 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -103,6 +103,9 @@ add_executable(yuzu
     main.cpp
     main.h
     main.ui
+    status/system_archive.cpp
+    status/system_archive.h
+    status/system_archive.ui
     uisettings.cpp
     uisettings.h
     util/limitable_input_dialog.cpp
diff --git a/src/yuzu/status/system_archive.cpp b/src/yuzu/status/system_archive.cpp
new file mode 100644
index 0000000000..6d70271c63
--- /dev/null
+++ b/src/yuzu/status/system_archive.cpp
@@ -0,0 +1,76 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <QLabel>
+#include "core/file_sys/nca_metadata.h"
+#include "core/file_sys/romfs_factory.h"
+#include "core/file_sys/system_archive/importer.h"
+#include "core/file_sys/system_archive/system_archive.h"
+#include "core/hle/service/filesystem/filesystem.h"
+#include "ui_system_archive.h"
+#include "yuzu/status/system_archive.h"
+
+namespace {
+
+enum class Source {
+    NAND,
+    XCI,
+    OSS,
+    None,
+};
+
+constexpr std::array<const char*, 4> SOURCE_FORMAT = {
+    "<html><head/><body><p><span style=\"color:#00aa00;\"><b>8{:02X}</b></span></p></body></html>",
+    "<html><head/><body><p><span style=\"color:#0055ff;\"><b>8{:02X}</b></span></p></body></html>",
+    "<html><head/><body><p><span style=\"color:#aa00ff;\"><b>8{:02X}</b></span></p></body></html>",
+    "<html><head/><body><p><span style=\"color:#aa0000;\"><b>8{:02X}</b></span></p></body></html>",
+};
+
+QWidget* CreateItemForSourceAndId(QWidget* parent, Source source, std::size_t id) {
+    const auto text = fmt::format(SOURCE_FORMAT.at(static_cast<std::size_t>(source)), id);
+    auto* out = new QLabel(QString::fromStdString(text), parent);
+    out->setAlignment(Qt::AlignHCenter);
+    return out;
+}
+
+QWidget* CreateItem(QWidget* parent, const Service::FileSystem::FileSystemController& fsc,
+                    std::size_t suffix) {
+    const auto title_id = FileSys::SystemArchive::SYSTEM_ARCHIVE_BASE_TITLE_ID + suffix;
+
+    const auto nand =
+        fsc.OpenRomFS(title_id, FileSys::StorageId::NandSystem, FileSys::ContentRecordType::Data);
+
+    if (nand.Succeeded()) {
+        return CreateItemForSourceAndId(parent, Source::NAND, suffix);
+    }
+
+    auto archive = FileSys::SystemArchive::GetImportedSystemArchive(
+        fsc.GetSysdataImportedDirectory(), title_id);
+
+    if (archive != nullptr) {
+        return CreateItemForSourceAndId(parent, Source::XCI, suffix);
+    }
+
+    archive = FileSys::SystemArchive::SynthesizeSystemArchive(title_id);
+
+    if (archive != nullptr) {
+        return CreateItemForSourceAndId(parent, Source::OSS, suffix);
+    }
+
+    return CreateItemForSourceAndId(parent, Source::None, suffix);
+}
+
+} // Anonymous namespace
+
+SystemArchiveDialog::SystemArchiveDialog(QWidget* parent,
+                                         const Service::FileSystem::FileSystemController& fsc)
+    : QDialog(parent), ui(new Ui::SystemArchiveDialog) {
+    ui->setupUi(this);
+
+    for (std::size_t i = 0; i < FileSys::SystemArchive::SYSTEM_ARCHIVE_COUNT; ++i) {
+        ui->grid->addWidget(CreateItem(this, fsc, i), i / 6, i % 6, 1, 1);
+    }
+}
+
+SystemArchiveDialog::~SystemArchiveDialog() = default;
diff --git a/src/yuzu/status/system_archive.h b/src/yuzu/status/system_archive.h
new file mode 100644
index 0000000000..fef8ce7805
--- /dev/null
+++ b/src/yuzu/status/system_archive.h
@@ -0,0 +1,28 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <QDialog>
+
+namespace Service::FileSystem {
+class FileSystemController;
+} // namespace Service::FileSystem
+
+namespace Ui {
+class SystemArchiveDialog;
+} // namespace Ui
+
+class SystemArchiveDialog : public QDialog {
+    Q_OBJECT
+
+public:
+    explicit SystemArchiveDialog(QWidget* parent,
+                                 const Service::FileSystem::FileSystemController& fsc);
+    ~SystemArchiveDialog() override;
+
+private:
+    std::unique_ptr<Ui::SystemArchiveDialog> ui;
+};
diff --git a/src/yuzu/status/system_archive.ui b/src/yuzu/status/system_archive.ui
new file mode 100644
index 0000000000..12d46a1bd5
--- /dev/null
+++ b/src/yuzu/status/system_archive.ui
@@ -0,0 +1,224 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>SystemArchiveDialog</class>
+ <widget class="QDialog" name="SystemArchiveDialog">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>473</width>
+    <height>422</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>System Archive Status</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout_3">
+   <item>
+    <layout class="QVBoxLayout" name="verticalLayout">
+     <item>
+      <widget class="QLabel" name="label">
+       <property name="font">
+        <font>
+         <weight>75</weight>
+         <bold>true</bold>
+        </font>
+       </property>
+       <property name="text">
+        <string>System Archive Status</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_2">
+       <property name="text">
+        <string>System archives are special data files games and applications can use for common functions, such as filtering for bad words or showing Miis on screen.</string>
+       </property>
+       <property name="wordWrap">
+        <bool>true</bool>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_3">
+       <property name="text">
+        <string>yuzu can load system archives from 3 sources:</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_4">
+       <property name="text">
+        <string>    1. A real NAND dump placed into yuzu's NAND directory</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_5">
+       <property name="text">
+        <string>    2. Archives imported from an XCI/cartridge game</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_6">
+       <property name="text">
+        <string>    3. Open source reimplementations from the yuzu team</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QLabel" name="label_7">
+       <property name="text">
+        <string>The following table shows the source for all of the system archives:</string>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="Line" name="line">
+       <property name="orientation">
+        <enum>Qt::Horizontal</enum>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <layout class="QGridLayout" name="grid"/>
+     </item>
+     <item>
+      <widget class="Line" name="line_2">
+       <property name="orientation">
+        <enum>Qt::Horizontal</enum>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <layout class="QHBoxLayout" name="horizontalLayout">
+       <item>
+        <widget class="QLabel" name="label_8">
+         <property name="font">
+          <font>
+           <weight>75</weight>
+           <bold>true</bold>
+          </font>
+         </property>
+         <property name="text">
+          <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; color:#00aa00;&quot;&gt;NAND Dump&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+         </property>
+         <property name="alignment">
+          <set>Qt::AlignCenter</set>
+         </property>
+        </widget>
+       </item>
+       <item>
+        <widget class="QLabel" name="label_9">
+         <property name="font">
+          <font>
+           <weight>75</weight>
+           <bold>true</bold>
+          </font>
+         </property>
+         <property name="text">
+          <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; color:#0055ff;&quot;&gt;Cartridge Dump&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+         </property>
+         <property name="alignment">
+          <set>Qt::AlignCenter</set>
+         </property>
+        </widget>
+       </item>
+       <item>
+        <widget class="QLabel" name="label_10">
+         <property name="font">
+          <font>
+           <weight>75</weight>
+           <bold>true</bold>
+          </font>
+         </property>
+         <property name="text">
+          <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; color:#aa00ff;&quot;&gt;Open Source&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+         </property>
+         <property name="alignment">
+          <set>Qt::AlignCenter</set>
+         </property>
+        </widget>
+       </item>
+       <item>
+        <widget class="QLabel" name="label_11">
+         <property name="font">
+          <font>
+           <weight>75</weight>
+           <bold>true</bold>
+          </font>
+         </property>
+         <property name="text">
+          <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; color:#aa0000;&quot;&gt;Missing&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+         </property>
+         <property name="alignment">
+          <set>Qt::AlignCenter</set>
+         </property>
+        </widget>
+       </item>
+      </layout>
+     </item>
+     <item>
+      <spacer name="verticalSpacer">
+       <property name="orientation">
+        <enum>Qt::Vertical</enum>
+       </property>
+       <property name="sizeHint" stdset="0">
+        <size>
+         <width>20</width>
+         <height>40</height>
+        </size>
+       </property>
+      </spacer>
+     </item>
+    </layout>
+   </item>
+   <item>
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Ok</set>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>SystemArchiveDialog</receiver>
+   <slot>accept()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>248</x>
+     <y>254</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>157</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>SystemArchiveDialog</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>

From bf5c2371a270b65b434f3af2428207f9613c77c7 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:39:26 -0400
Subject: [PATCH 006/122] qt: Add question when booting XCI to import archives

Adds convenience.
---
 src/yuzu/main.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index d6bb18d24a..89df1a612d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -897,6 +897,23 @@ bool GMainWindow::LoadROM(const QString& filename) {
                "wiki</a>. This message will not be shown again."));
     }
 
+    const auto no_imported =
+        system.GetFileSystemController().GetSysdataImportedDirectory()->GetFiles().empty();
+
+    if (result == Core::System::ResultStatus::Success &&
+        system.GetAppLoader().GetFileType() == Loader::FileType::XCI && no_imported) {
+        if (QMessageBox::question(this, tr("Import System Archives"),
+                                  tr("The game type you are using includes additional system files "
+                                     "that may improve yuzu's compatibility with this and other "
+                                     "games. Would you like to import these files?")) ==
+            QMessageBox::Yes) {
+            const auto game = Core::GetGameFileFromPath(vfs, filename.toStdString());
+            FileSys::XCI xci{game};
+            FileSys::SystemArchive::ImportXCISystemUpdate(
+                system.GetFileSystemController().GetSysdataImportedDirectory(), xci);
+        }
+    }
+
     if (result != Core::System::ResultStatus::Success) {
         switch (result) {
         case Core::System::ResultStatus::ErrorGetLoader:

From a8119a47a15fd78127adf645698c944ed33097a7 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Tue, 15 Oct 2019 14:39:47 -0400
Subject: [PATCH 007/122] qt: Add Tools commands for system archive management

---
 src/yuzu/main.cpp | 81 +++++++++++++++++++++++++++++++++++++++++++++++
 src/yuzu/main.h   |  4 +++
 src/yuzu/main.ui  | 33 +++++++++++++++++++
 3 files changed, 118 insertions(+)

diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 89df1a612d..828102cdda 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -27,6 +27,7 @@
 #include "core/hle/service/am/applets/applets.h"
 #include "core/hle/service/hid/controllers/npad.h"
 #include "core/hle/service/hid/hid.h"
+#include "yuzu/status/system_archive.h"
 
 // These are wrappers to avoid the calls to CreateDirectory and CreateFile because of the Windows
 // defines.
@@ -40,6 +41,10 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
     return dir->CreateFile(path);
 }
 
+static bool VfsDirectoryDeleteFileWrapper(const FileSys::VirtualDir& dir, const std::string& path) {
+    return dir->DeleteFile(path);
+}
+
 #include <fmt/ostream.h>
 #include <glad/glad.h>
 
@@ -83,6 +88,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
 #include "core/file_sys/romfs.h"
 #include "core/file_sys/savedata_factory.h"
 #include "core/file_sys/submission_package.h"
+#include "core/file_sys/system_archive/importer.h"
 #include "core/frontend/applets/software_keyboard.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/am/am.h"
@@ -760,6 +766,16 @@ void GMainWindow::ConnectMenuEvents() {
     connect(ui.action_Capture_Screenshot, &QAction::triggered, this,
             &GMainWindow::OnCaptureScreenshot);
 
+    // Tools
+    connect(ui.actionImport_Directory, &QAction::triggered, this,
+            &GMainWindow::OnImportDirectorySystemUpdate);
+    connect(ui.actionImport_Cartridge, &QAction::triggered, this,
+            &GMainWindow::OnImportCartridgeSystemUpdate);
+    connect(ui.actionClear_Imported, &QAction::triggered, this,
+            &GMainWindow::OnClearImportedSysdata);
+    connect(ui.actionView_Status, &QAction::triggered, this,
+            &GMainWindow::OnViewSystemArchiveStatus);
+
     // Help
     connect(ui.action_Open_yuzu_Folder, &QAction::triggered, this, &GMainWindow::OnOpenYuzuFolder);
     connect(ui.action_Rederive, &QAction::triggered, this,
@@ -1358,6 +1374,71 @@ void GMainWindow::OnGameListNavigateToGamedbEntry(u64 program_id,
     QDesktopServices::openUrl(QUrl(QStringLiteral("https://yuzu-emu.org/game/") + directory));
 }
 
+void GMainWindow::OnClearImportedSysdata() {
+    Core::System& system{Core::System::GetInstance()};
+
+    const auto path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir) + "imported" DIR_SEP;
+    QDir dir(QString::fromStdString(path));
+    const auto list = dir.entryList(QDir::Files);
+    for (const auto& file : list) {
+        if (!dir.remove(file)) {
+            QMessageBox::warning(this, tr("Clear Failed"),
+                                 tr("The imported sysdata directory was not able to be cleared."));
+            return;
+        }
+    }
+
+    QMessageBox::information(this, tr("Clear Successful"),
+                             tr("The imported sysdata directory was cleared successfully."));
+}
+
+void GMainWindow::OnImportDirectorySystemUpdate() {
+    Core::System& system{Core::System::GetInstance()};
+
+    const auto dir = QFileDialog::getExistingDirectory(this, tr("Select System Update Directory"));
+
+    if (dir.isEmpty()) {
+        return;
+    }
+
+    const auto vdir = vfs->OpenDirectory(dir.toStdString(), FileSys::Mode::Read);
+    if (FileSys::SystemArchive::ImportDirectorySystemUpdate(
+            system.GetFileSystemController().GetSysdataImportedDirectory(), vdir)) {
+        QMessageBox::information(this, tr("Import Successful"),
+                                 tr("The system update import was successful."));
+    } else {
+        QMessageBox::warning(this, tr("Import Failed"), tr("The system update import failed."));
+    }
+}
+
+void GMainWindow::OnImportCartridgeSystemUpdate() {
+    Core::System& system{Core::System::GetInstance()};
+
+    const auto file = QFileDialog::getOpenFileName(this, tr("Select Cartridge File"), QString{},
+                                                   QStringLiteral("Cartridge Images (*.xci)"));
+
+    if (file.isEmpty()) {
+        return;
+    }
+
+    FileSys::XCI xci{vfs->OpenFile(file.toStdString(), FileSys::Mode::Read)};
+    if (FileSys::SystemArchive::ImportXCISystemUpdate(
+            system.GetFileSystemController().GetSysdataImportedDirectory(), xci)) {
+        QMessageBox::information(this, tr("Import Successful"),
+                                 tr("The system update import was successful."));
+    } else {
+        QMessageBox::warning(this, tr("Import Failed"), tr("The system update import failed."));
+    }
+}
+
+void GMainWindow::OnViewSystemArchiveStatus() {
+    Core::System& system{Core::System::GetInstance()};
+
+    system.GetFileSystemController().CreateFactories(*vfs);
+    SystemArchiveDialog dialog(this, system.GetFileSystemController());
+    dialog.exec();
+}
+
 void GMainWindow::OnGameListOpenDirectory(const QString& directory) {
     QString path;
     if (directory == QStringLiteral("SDMC")) {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index fd4b9ccf54..cce177f7f0 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -190,6 +190,10 @@ private slots:
     void OnGameListCopyTID(u64 program_id);
     void OnGameListNavigateToGamedbEntry(u64 program_id,
                                          const CompatibilityList& compatibility_list);
+    void OnClearImportedSysdata();
+    void OnImportDirectorySystemUpdate();
+    void OnImportCartridgeSystemUpdate();
+    void OnViewSystemArchiveStatus();
     void OnGameListOpenDirectory(const QString& directory);
     void OnGameListAddDirectory();
     void OnGameListShowList(bool show);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index a1ce3c0c35..37cc24e17b 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -104,9 +104,22 @@
     <property name="title">
      <string>Tools</string>
     </property>
+    <widget class="QMenu" name="menuSystem_Archives">
+     <property name="title">
+      <string>System Archives</string>
+     </property>
+     <addaction name="actionImport_Directory"/>
+     <addaction name="actionImport_Cartridge"/>
+     <addaction name="separator"/>
+     <addaction name="actionClear_Imported"/>
+     <addaction name="separator"/>
+     <addaction name="actionView_Status"/>
+    </widget>
     <addaction name="action_Rederive"/>
     <addaction name="separator"/>
     <addaction name="action_Capture_Screenshot"/>
+    <addaction name="separator"/>
+    <addaction name="menuSystem_Archives"/>
    </widget>
    <widget class="QMenu" name="menu_Help">
     <property name="title">
@@ -293,6 +306,26 @@
     <string>Capture Screenshot</string>
    </property>
   </action>
+  <action name="actionImport_Directory">
+   <property name="text">
+    <string>Import Directory</string>
+   </property>
+  </action>
+  <action name="actionImport_Cartridge">
+   <property name="text">
+    <string>Import Cartridge</string>
+   </property>
+  </action>
+  <action name="actionClear_Imported">
+   <property name="text">
+    <string>Clear Imported</string>
+   </property>
+  </action>
+  <action name="actionView_Status">
+   <property name="text">
+    <string>View Status</string>
+   </property>
+  </action>
  </widget>
  <resources/>
  <connections/>

From 51effa618be9373a2bfc7e26a4a2e67ec66a8829 Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Fri, 10 Aug 2018 14:06:09 -0400
Subject: [PATCH 008/122] filesystem: Create directory if it dosen't exist on
 open

It would seem that Ultra Street Fighter II requires this behavior on save creation.
---
 src/core/hle/service/filesystem/filesystem.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index cadc038050..3c45a2fa8a 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -40,7 +40,10 @@ static FileSys::VirtualDir GetDirectoryRelativeWrapped(FileSys::VirtualDir base,
     if (dir_name.empty() || dir_name == "." || dir_name == "/" || dir_name == "\\")
         return base;
 
-    return base->GetDirectoryRelative(dir_name);
+    const auto res = base->GetDirectoryRelative(dir_name);
+    if (res == nullptr)
+        return base->CreateDirectoryRelative(dir_name);
+    return res;
 }
 
 VfsDirectoryServiceWrapper::VfsDirectoryServiceWrapper(FileSys::VirtualDir backing_)

From 618a61071d816332a149bf0489d18ca02bc7b05d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 4 Feb 2020 11:23:12 -0400
Subject: [PATCH 009/122] Common: Implement a basic SpinLock class

---
 src/common/CMakeLists.txt |  2 ++
 src/common/spin_lock.cpp  | 46 +++++++++++++++++++++++++++++++++++++++
 src/common/spin_lock.h    | 20 +++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 src/common/spin_lock.cpp
 create mode 100644 src/common/spin_lock.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c1..74a883f3f6 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -141,6 +141,8 @@ add_library(common STATIC
     scm_rev.cpp
     scm_rev.h
     scope_exit.h
+    spin_lock.cpp
+    spin_lock.h
     string_util.cpp
     string_util.h
     swap.h
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 0000000000..8077b78d28
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/spin_lock.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#if _M_AMD64
+#define __x86_64__ 1
+#endif
+#if _M_ARM64
+#define __aarch64__ 1
+#endif
+#else
+#if __x86_64__
+#include <xmmintrin.h>
+#endif
+#endif
+
+namespace {
+
+void thread_pause() {
+#if __x86_64__
+    _mm_pause();
+#elif __aarch64__ && _MSC_VER
+    __yield();
+#elif __aarch64__
+    asm("yield");
+#endif
+}
+
+} // namespace
+
+namespace Common {
+
+void SpinLock::lock() {
+    while (lck.test_and_set(std::memory_order_acquire))
+        thread_pause();
+}
+
+void SpinLock::unlock() {
+    lck.clear(std::memory_order_release);
+}
+
+} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 0000000000..cbc67b6c85
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,20 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+
+namespace Common {
+
+class SpinLock {
+public:
+    void lock();
+    void unlock();
+
+private:
+    std::atomic_flag lck = ATOMIC_FLAG_INIT;
+};
+
+} // namespace Common

From fc788a13f19a628185af073d07822055f224853a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 4 Feb 2020 15:06:23 -0400
Subject: [PATCH 010/122] Common: Implement a basic Fiber class.

---
 src/common/CMakeLists.txt |   2 +
 src/common/fiber.cpp      | 147 ++++++++++++++++++++++++++++++++++++++
 src/common/fiber.h        |  55 ++++++++++++++
 3 files changed, 204 insertions(+)
 create mode 100644 src/common/fiber.cpp
 create mode 100644 src/common/fiber.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 74a883f3f6..00a5f6303f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -108,6 +108,8 @@ add_library(common STATIC
     common_types.h
     dynamic_library.cpp
     dynamic_library.h
+    fiber.cpp
+    fiber.h
     file_util.cpp
     file_util.h
     hash.h
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 0000000000..eb59f1aa91
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,147 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/fiber.h"
+
+namespace Common {
+
+#ifdef _MSC_VER
+#include <windows.h>
+
+struct Fiber::FiberImpl {
+    LPVOID handle = nullptr;
+};
+
+void Fiber::_start([[maybe_unused]] void* parameter) {
+    guard.lock();
+    if (previous_fiber) {
+        previous_fiber->guard.unlock();
+        previous_fiber = nullptr;
+    }
+    entry_point(start_parameter);
+}
+
+static void __stdcall FiberStartFunc(LPVOID lpFiberParameter)
+{
+   auto fiber = static_cast<Fiber *>(lpFiberParameter);
+   fiber->_start(nullptr);
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+    impl->handle = CreateFiber(0, &FiberStartFunc, this);
+}
+
+Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+}
+
+Fiber::~Fiber() {
+    // Make sure the Fiber is not being used
+    guard.lock();
+    guard.unlock();
+    DeleteFiber(impl->handle);
+}
+
+void Fiber::Exit() {
+    if (!is_thread_fiber) {
+        return;
+    }
+    ConvertFiberToThread();
+    guard.unlock();
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    to->guard.lock();
+    to->previous_fiber = from;
+    SwitchToFiber(to->impl->handle);
+    auto previous_fiber = from->previous_fiber;
+    if (previous_fiber) {
+        previous_fiber->guard.unlock();
+        previous_fiber.reset();
+    }
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->guard.lock();
+    fiber->impl->handle = ConvertThreadToFiber(NULL);
+    fiber->is_thread_fiber = true;
+    return fiber;
+}
+
+#else
+
+#include <boost/context/detail/fcontext.hpp>
+
+constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
+
+struct Fiber::FiberImpl {
+    boost::context::detail::fcontext_t context;
+    std::array<u8, default_stack_size> stack;
+};
+
+void Fiber::_start(void* parameter) {
+    guard.lock();
+    boost::context::detail::transfer_t* transfer = static_cast<boost::context::detail::transfer_t*>(parameter);
+    if (previous_fiber) {
+        previous_fiber->impl->context = transfer->fctx;
+        previous_fiber->guard.unlock();
+        previous_fiber = nullptr;
+    }
+    entry_point(start_parameter);
+}
+
+static void FiberStartFunc(boost::context::detail::transfer_t transfer)
+{
+   auto fiber = static_cast<Fiber *>(transfer.data);
+   fiber->_start(&transfer);
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+    auto start_func = std::bind(&Fiber::start, this);
+    impl->context =
+        boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func);
+}
+
+Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+    impl = std::make_unique<FiberImpl>();
+}
+
+Fiber::~Fiber() {
+    // Make sure the Fiber is not being used
+    guard.lock();
+    guard.unlock();
+}
+
+void Fiber::Exit() {
+    if (!is_thread_fiber) {
+        return;
+    }
+    guard.unlock();
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    to->guard.lock();
+    to->previous_fiber = from;
+    auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
+    auto previous_fiber = from->previous_fiber;
+    if (previous_fiber) {
+        previous_fiber->impl->context = transfer.fctx;
+        previous_fiber->guard.unlock();
+        previous_fiber.reset();
+    }
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->is_thread_fiber = true;
+    return fiber;
+}
+
+#endif
+} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 0000000000..ab44905cf9
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,55 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "common/common_types.h"
+#include "common/spin_lock.h"
+
+namespace Common {
+
+class Fiber {
+public:
+    Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
+    ~Fiber();
+
+    Fiber(const Fiber&) = delete;
+    Fiber& operator=(const Fiber&) = delete;
+
+    Fiber(Fiber&&) = default;
+    Fiber& operator=(Fiber&&) = default;
+
+    /// Yields control from Fiber 'from' to Fiber 'to'
+    /// Fiber 'from' must be the currently running fiber.
+    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
+    static std::shared_ptr<Fiber> ThreadToFiber();
+
+    /// Only call from main thread's fiber
+    void Exit();
+
+    /// Used internally but required to be public, Shall not be used
+    void _start(void* parameter);
+
+    /// Changes the start parameter of the fiber. Has no effect if the fiber already started
+    void SetStartParameter(void* new_parameter) {
+        start_parameter = new_parameter;
+    }
+
+private:
+    Fiber();
+
+    struct FiberImpl;
+
+    SpinLock guard;
+    std::function<void(void*)> entry_point;
+    void* start_parameter;
+    std::shared_ptr<Fiber> previous_fiber;
+    std::unique_ptr<FiberImpl> impl;
+    bool is_thread_fiber{};
+};
+
+} // namespace Common

From f33e42c770f245628187afea293f11566ddc241a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 14:13:16 -0400
Subject: [PATCH 011/122] Tests: Add tests for fibers and refactor/fix Fiber
 class

---
 src/common/fiber.cpp        |  32 +++---
 src/common/fiber.h          |  19 +++-
 src/tests/CMakeLists.txt    |   1 +
 src/tests/common/fibers.cpp | 214 ++++++++++++++++++++++++++++++++++++
 4 files changed, 247 insertions(+), 19 deletions(-)
 create mode 100644 src/tests/common/fibers.cpp

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index eb59f1aa91..a2c0401c4d 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -3,18 +3,21 @@
 // Refer to the license.txt file included.
 
 #include "common/fiber.h"
+#ifdef _MSC_VER
+#include <windows.h>
+#else
+#include <boost/context/detail/fcontext.hpp>
+#endif
 
 namespace Common {
 
 #ifdef _MSC_VER
-#include <windows.h>
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
 };
 
-void Fiber::_start([[maybe_unused]] void* parameter) {
-    guard.lock();
+void Fiber::start() {
     if (previous_fiber) {
         previous_fiber->guard.unlock();
         previous_fiber = nullptr;
@@ -22,10 +25,10 @@ void Fiber::_start([[maybe_unused]] void* parameter) {
     entry_point(start_parameter);
 }
 
-static void __stdcall FiberStartFunc(LPVOID lpFiberParameter)
+void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
 {
-   auto fiber = static_cast<Fiber *>(lpFiberParameter);
-   fiber->_start(nullptr);
+   auto fiber = static_cast<Fiber *>(fiber_parameter);
+   fiber->start();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -74,30 +77,26 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 
 #else
 
-#include <boost/context/detail/fcontext.hpp>
-
 constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
 
-struct Fiber::FiberImpl {
-    boost::context::detail::fcontext_t context;
+struct alignas(64) Fiber::FiberImpl {
     std::array<u8, default_stack_size> stack;
+    boost::context::detail::fcontext_t context;
 };
 
-void Fiber::_start(void* parameter) {
-    guard.lock();
-    boost::context::detail::transfer_t* transfer = static_cast<boost::context::detail::transfer_t*>(parameter);
+void Fiber::start(boost::context::detail::transfer_t& transfer) {
     if (previous_fiber) {
-        previous_fiber->impl->context = transfer->fctx;
+        previous_fiber->impl->context = transfer.fctx;
         previous_fiber->guard.unlock();
         previous_fiber = nullptr;
     }
     entry_point(start_parameter);
 }
 
-static void FiberStartFunc(boost::context::detail::transfer_t transfer)
+void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
 {
    auto fiber = static_cast<Fiber *>(transfer.data);
-   fiber->_start(&transfer);
+   fiber->start(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -139,6 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
     std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+    fiber->guard.lock();
     fiber->is_thread_fiber = true;
     return fiber;
 }
diff --git a/src/common/fiber.h b/src/common/fiber.h
index ab44905cf9..812d6644ac 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -10,6 +10,12 @@
 #include "common/common_types.h"
 #include "common/spin_lock.h"
 
+#ifndef _MSC_VER
+namespace boost::context::detail {
+    struct transfer_t;
+}
+#endif
+
 namespace Common {
 
 class Fiber {
@@ -31,9 +37,6 @@ public:
     /// Only call from main thread's fiber
     void Exit();
 
-    /// Used internally but required to be public, Shall not be used
-    void _start(void* parameter);
-
     /// Changes the start parameter of the fiber. Has no effect if the fiber already started
     void SetStartParameter(void* new_parameter) {
         start_parameter = new_parameter;
@@ -42,6 +45,16 @@ public:
 private:
     Fiber();
 
+#ifdef _MSC_VER
+    void start();
+    static void FiberStartFunc(void* fiber_parameter);
+#else
+    void start(boost::context::detail::transfer_t& transfer);
+    static void FiberStartFunc(boost::context::detail::transfer_t transfer);
+#endif
+
+
+
     struct FiberImpl;
 
     SpinLock guard;
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index c7038b2179..47ef30aa91 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_executable(tests
     common/bit_field.cpp
     common/bit_utils.cpp
+    common/fibers.cpp
     common/multi_level_queue.cpp
     common/param_package.cpp
     common/ring_buffer.cpp
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
new file mode 100644
index 0000000000..ff840afa64
--- /dev/null
+++ b/src/tests/common/fibers.cpp
@@ -0,0 +1,214 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <cstdlib>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/fiber.h"
+#include "common/spin_lock.h"
+
+namespace Common {
+
+class TestControl1 {
+public:
+    TestControl1() = default;
+
+    void DoWork();
+
+    void ExecuteThread(u32 id);
+
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
+    std::vector<u32> items;
+    std::vector<u32> results;
+};
+
+static void WorkControl1(void* control) {
+    TestControl1* test_control = static_cast<TestControl1*>(control);
+    test_control->DoWork();
+}
+
+void TestControl1::DoWork() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    u32 value = items[id];
+    for (u32 i = 0; i < id; i++) {
+        value++;
+    }
+    results[id] = value;
+    Fiber::YieldTo(work_fibers[id], thread_fibers[id]);
+}
+
+void TestControl1::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+    work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
+    items[id] = rand() % 256;
+    Fiber::YieldTo(thread_fibers[id], work_fibers[id]);
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart1(u32 id, TestControl1& test_control) {
+    test_control.ExecuteThread(id);
+}
+
+
+TEST_CASE("Fibers::Setup", "[common]") {
+    constexpr u32 num_threads = 7;
+    TestControl1 test_control{};
+    test_control.thread_fibers.resize(num_threads, nullptr);
+    test_control.work_fibers.resize(num_threads, nullptr);
+    test_control.items.resize(num_threads, 0);
+    test_control.results.resize(num_threads, 0);
+    std::vector<std::thread> threads;
+    for (u32 i = 0; i < num_threads; i++) {
+        threads.emplace_back(ThreadStart1, i, std::ref(test_control));
+    }
+    for (u32 i = 0; i < num_threads; i++) {
+        threads[i].join();
+    }
+    for (u32 i = 0; i < num_threads; i++) {
+        REQUIRE(test_control.items[i] + i == test_control.results[i]);
+    }
+}
+
+class TestControl2 {
+public:
+    TestControl2() = default;
+
+    void DoWork1() {
+        trap2 = false;
+        while (trap.load());
+        for (u32 i = 0; i < 12000; i++) {
+            value1 += i;
+        }
+        Fiber::YieldTo(fiber1, fiber3);
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        assert1 = id == 1;
+        value2 += 5000;
+        Fiber::YieldTo(fiber1, thread_fibers[id]);
+    }
+
+    void DoWork2() {
+        while (trap2.load());
+        value2 = 2000;
+        trap = false;
+        Fiber::YieldTo(fiber2, fiber1);
+        assert3 = false;
+    }
+
+    void DoWork3() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        assert2 = id == 0;
+        value1 += 1000;
+        Fiber::YieldTo(fiber3, thread_fibers[id]);
+    }
+
+    void ExecuteThread(u32 id);
+
+    void CallFiber1() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber1);
+    }
+
+    void CallFiber2() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber2);
+    }
+
+    void Exit();
+
+    bool assert1{};
+    bool assert2{};
+    bool assert3{true};
+    u32 value1{};
+    u32 value2{};
+    std::atomic<bool> trap{true};
+    std::atomic<bool> trap2{true};
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> fiber2;
+    std::shared_ptr<Common::Fiber> fiber3;
+};
+
+static void WorkControl2_1(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork1();
+}
+
+static void WorkControl2_2(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork2();
+}
+
+static void WorkControl2_3(void* control) {
+    TestControl2* test_control = static_cast<TestControl2*>(control);
+    test_control->DoWork3();
+}
+
+void TestControl2::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+}
+
+void TestControl2::Exit() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart2_1(u32 id, TestControl2& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber1();
+    test_control.Exit();
+}
+
+static void ThreadStart2_2(u32 id, TestControl2& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber2();
+    test_control.Exit();
+}
+
+TEST_CASE("Fibers::InterExchange", "[common]") {
+    TestControl2 test_control{};
+    test_control.thread_fibers.resize(2, nullptr);
+    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
+    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
+    test_control.fiber3 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
+    std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
+    std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
+    thread1.join();
+    thread2.join();
+    REQUIRE(test_control.assert1);
+    REQUIRE(test_control.assert2);
+    REQUIRE(test_control.assert3);
+    REQUIRE(test_control.value2 == 7000);
+    u32 cal_value = 0;
+    for (u32 i = 0; i < 12000; i++) {
+        cal_value += i;
+    }
+    cal_value += 1000;
+    REQUIRE(test_control.value1 == cal_value);
+}
+
+
+} // namespace Common

From f6e5692dae8eadb7131a66e0e2b1e9a2b3e3bc79 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 15:48:20 -0400
Subject: [PATCH 012/122] Common: Polish Fiber class, add comments, asserts and
 more tests.

---
 src/common/fiber.cpp        | 55 ++++++++++++---------
 src/common/fiber.h          | 14 +++++-
 src/common/spin_lock.cpp    |  7 +++
 src/common/spin_lock.h      |  1 +
 src/tests/common/fibers.cpp | 95 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 147 insertions(+), 25 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a2c0401c4d..a88a30cede 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/fiber.h"
 #ifdef _MSC_VER
 #include <windows.h>
@@ -18,11 +19,11 @@ struct Fiber::FiberImpl {
 };
 
 void Fiber::start() {
-    if (previous_fiber) {
-        previous_fiber->guard.unlock();
-        previous_fiber = nullptr;
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
     entry_point(start_parameter);
+    UNREACHABLE();
 }
 
 void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
@@ -43,12 +44,16 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
 
 Fiber::~Fiber() {
     // Make sure the Fiber is not being used
-    guard.lock();
-    guard.unlock();
+    bool locked = guard.try_lock();
+    ASSERT_MSG(locked, "Destroying a fiber that's still running");
+    if (locked) {
+        guard.unlock();
+    }
     DeleteFiber(impl->handle);
 }
 
 void Fiber::Exit() {
+    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
@@ -57,14 +62,15 @@ void Fiber::Exit() {
 }
 
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+    ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     SwitchToFiber(to->impl->handle);
     auto previous_fiber = from->previous_fiber;
-    if (previous_fiber) {
-        previous_fiber->guard.unlock();
-        previous_fiber.reset();
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
@@ -85,12 +91,12 @@ struct alignas(64) Fiber::FiberImpl {
 };
 
 void Fiber::start(boost::context::detail::transfer_t& transfer) {
-    if (previous_fiber) {
-        previous_fiber->impl->context = transfer.fctx;
-        previous_fiber->guard.unlock();
-        previous_fiber = nullptr;
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->impl->context = transfer.fctx;
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
     entry_point(start_parameter);
+    UNREACHABLE();
 }
 
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
@@ -113,11 +119,15 @@ Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
 
 Fiber::~Fiber() {
     // Make sure the Fiber is not being used
-    guard.lock();
-    guard.unlock();
+    bool locked = guard.try_lock();
+    ASSERT_MSG(locked, "Destroying a fiber that's still running");
+    if (locked) {
+        guard.unlock();
+    }
 }
 
 void Fiber::Exit() {
+    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
@@ -125,15 +135,16 @@ void Fiber::Exit() {
 }
 
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+    ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
     auto previous_fiber = from->previous_fiber;
-    if (previous_fiber) {
-        previous_fiber->impl->context = transfer.fctx;
-        previous_fiber->guard.unlock();
-        previous_fiber.reset();
-    }
+    ASSERT(previous_fiber != nullptr);
+    previous_fiber->impl->context = transfer.fctx;
+    previous_fiber->guard.unlock();
+    previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 812d6644ac..89a01fdd8e 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -18,6 +18,18 @@ namespace boost::context::detail {
 
 namespace Common {
 
+/**
+ * Fiber class
+ * a fiber is a userspace thread with it's own context. They can be used to
+ * implement coroutines, emulated threading systems and certain asynchronous
+ * patterns.
+ *
+ * This class implements fibers at a low level, thus allowing greater freedom
+ * to implement such patterns. This fiber class is 'threadsafe' only one fiber
+ * can be running at a time and threads will be locked while trying to yield to
+ * a running fiber until it yields. WARNING exchanging two running fibers between
+ * threads will cause a deadlock.
+ */
 class Fiber {
 public:
     Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
@@ -53,8 +65,6 @@ private:
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
 #endif
 
-
-
     struct FiberImpl;
 
     SpinLock guard;
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
index 8077b78d28..82a1d39fff 100644
--- a/src/common/spin_lock.cpp
+++ b/src/common/spin_lock.cpp
@@ -43,4 +43,11 @@ void SpinLock::unlock() {
     lck.clear(std::memory_order_release);
 }
 
+bool SpinLock::try_lock() {
+    if (lck.test_and_set(std::memory_order_acquire)) {
+        return false;
+    }
+    return true;
+}
+
 } // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
index cbc67b6c85..70282a961f 100644
--- a/src/common/spin_lock.h
+++ b/src/common/spin_lock.h
@@ -12,6 +12,7 @@ class SpinLock {
 public:
     void lock();
     void unlock();
+    bool try_lock();
 
 private:
     std::atomic_flag lck = ATOMIC_FLAG_INIT;
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index ff840afa64..358393a192 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -64,7 +64,9 @@ static void ThreadStart1(u32 id, TestControl1& test_control) {
     test_control.ExecuteThread(id);
 }
 
-
+/** This test checks for fiber setup configuration and validates that fibers are
+ *  doing all the work required.
+ */
 TEST_CASE("Fibers::Setup", "[common]") {
     constexpr u32 num_threads = 7;
     TestControl1 test_control{};
@@ -188,6 +190,10 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
     test_control.Exit();
 }
 
+/** This test checks for fiber thread exchange configuration and validates that fibers are
+ *  that a fiber has been succesfully transfered from one thread to another and that the TLS
+ *  region of the thread is kept while changing fibers.
+ */
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
@@ -210,5 +216,92 @@ TEST_CASE("Fibers::InterExchange", "[common]") {
     REQUIRE(test_control.value1 == cal_value);
 }
 
+class TestControl3 {
+public:
+    TestControl3() = default;
+
+    void DoWork1() {
+        value1 += 1;
+        Fiber::YieldTo(fiber1, fiber2);
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        value3 += 1;
+        Fiber::YieldTo(fiber1, thread_fibers[id]);
+    }
+
+    void DoWork2() {
+        value2 += 1;
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(fiber2, thread_fibers[id]);
+    }
+
+    void ExecuteThread(u32 id);
+
+    void CallFiber1() {
+        std::thread::id this_id = std::this_thread::get_id();
+        u32 id = ids[this_id];
+        Fiber::YieldTo(thread_fibers[id], fiber1);
+    }
+
+    void Exit();
+
+    u32 value1{};
+    u32 value2{};
+    u32 value3{};
+    std::unordered_map<std::thread::id, u32> ids;
+    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> fiber2;
+};
+
+static void WorkControl3_1(void* control) {
+    TestControl3* test_control = static_cast<TestControl3*>(control);
+    test_control->DoWork1();
+}
+
+static void WorkControl3_2(void* control) {
+    TestControl3* test_control = static_cast<TestControl3*>(control);
+    test_control->DoWork2();
+}
+
+void TestControl3::ExecuteThread(u32 id) {
+    std::thread::id this_id = std::this_thread::get_id();
+    ids[this_id] = id;
+    auto thread_fiber = Fiber::ThreadToFiber();
+    thread_fibers[id] = thread_fiber;
+}
+
+void TestControl3::Exit() {
+    std::thread::id this_id = std::this_thread::get_id();
+    u32 id = ids[this_id];
+    thread_fibers[id]->Exit();
+}
+
+static void ThreadStart3(u32 id, TestControl3& test_control) {
+    test_control.ExecuteThread(id);
+    test_control.CallFiber1();
+    test_control.Exit();
+}
+
+/** This test checks for one two threads racing for starting the same fiber.
+ *  It checks execution occured in an ordered manner and by no time there were
+ *  two contexts at the same time.
+ */
+TEST_CASE("Fibers::StartRace", "[common]") {
+    TestControl3 test_control{};
+    test_control.thread_fibers.resize(2, nullptr);
+    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
+    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
+    std::thread thread1(ThreadStart3, 0, std::ref(test_control));
+    std::thread thread2(ThreadStart3, 1, std::ref(test_control));
+    thread1.join();
+    thread2.join();
+    REQUIRE(test_control.value1 == 1);
+    REQUIRE(test_control.value2 == 1);
+    REQUIRE(test_control.value3 == 1);
+}
+
+
 
 } // namespace Common

From 929a88479b390bf837cc7c1d7e6b7df4a6644820 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 5 Feb 2020 19:12:27 -0400
Subject: [PATCH 013/122] Core: Implement a Host Timer.

---
 src/core/CMakeLists.txt       |   2 +
 src/core/core_timing_util.cpp |   5 ++
 src/core/core_timing_util.h   |   1 +
 src/core/host_timing.cpp      | 161 ++++++++++++++++++++++++++++++++++
 src/core/host_timing.h        | 126 ++++++++++++++++++++++++++
 5 files changed, 295 insertions(+)
 create mode 100644 src/core/host_timing.cpp
 create mode 100644 src/core/host_timing.h

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 47418006b2..c0d0683769 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -547,6 +547,8 @@ add_library(core STATIC
     hle/service/vi/vi_u.h
     hle/service/wlan/wlan.cpp
     hle/service/wlan/wlan.h
+    host_timing.cpp
+    host_timing.h
     loader/deconstructed_rom_directory.cpp
     loader/deconstructed_rom_directory.h
     loader/elf.cpp
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index de50d3b14f..f42666b4db 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -49,6 +49,11 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
     return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }
 
+u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+    const u128 temporal = Common::Multiply64Into128(ns.count(), CNTFREQ);
+    return Common::Divide128On32(temporal, 1000000000).first;
+}
+
 u64 CpuCyclesToClockCycles(u64 ticks) {
     const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
     return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index addc72b192..65fb7368b6 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,6 +13,7 @@ namespace Core::Timing {
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);
+u64 nsToClockCycles(std::chrono::nanoseconds ns);
 
 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
     return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
new file mode 100644
index 0000000000..c02f571c67
--- /dev/null
+++ b/src/core/host_timing.cpp
@@ -0,0 +1,161 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/host_timing.h"
+
+#include <algorithm>
+#include <mutex>
+#include <string>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/thread.h"
+#include "core/core_timing_util.h"
+
+namespace Core::HostTiming {
+
+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
+    return std::make_shared<EventType>(std::move(callback), std::move(name));
+}
+
+struct CoreTiming::Event {
+    u64 time;
+    u64 fifo_order;
+    u64 userdata;
+    std::weak_ptr<EventType> type;
+
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
+
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
+};
+
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
+
+void CoreTiming::ThreadEntry(CoreTiming& instance) {
+    instance.Advance();
+}
+
+void CoreTiming::Initialize() {
+    event_fifo_id = 0;
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
+    start_time = std::chrono::system_clock::now();
+    timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+}
+
+void CoreTiming::Shutdown() {
+    std::unique_lock<std::mutex> guard(inner_mutex);
+    shutting_down = true;
+    if (!is_set) {
+        is_set = true;
+        condvar.notify_one();
+    }
+    inner_mutex.unlock();
+    timer_thread->join();
+    ClearPendingEvents();
+}
+
+void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+                               u64 userdata) {
+    std::lock_guard guard{inner_mutex};
+    const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
+
+    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+
+    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    if (!is_set) {
+        is_set = true;
+        condvar.notify_one();
+    }
+}
+
+void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
+    std::lock_guard guard{inner_mutex};
+
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type.lock().get() == event_type.get() && e.userdata == userdata;
+    });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    }
+}
+
+u64 CoreTiming::GetCPUTicks() const {
+    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
+    return Core::Timing::nsToCycles(time_now);
+}
+
+u64 CoreTiming::GetClockTicks() const {
+    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
+    return Core::Timing::nsToClockCycles(time_now);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
+    std::lock_guard guard{inner_mutex};
+
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type.lock().get() == event_type.get();
+    });
+
+    // Removing random items breaks the invariant so we have to re-establish it.
+    if (itr != event_queue.end()) {
+        event_queue.erase(itr, event_queue.end());
+        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    }
+}
+
+void CoreTiming::Advance() {
+    while (true) {
+        std::unique_lock<std::mutex> guard(inner_mutex);
+
+        global_timer = GetGlobalTimeNs().count();
+
+        while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+            Event evt = std::move(event_queue.front());
+            std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+            event_queue.pop_back();
+            inner_mutex.unlock();
+
+            if (auto event_type{evt.type.lock()}) {
+                event_type->callback(evt.userdata, global_timer - evt.time);
+            }
+
+            inner_mutex.lock();
+        }
+        auto next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+        condvar.wait_for(guard, next_time, [this] { return is_set; });
+        is_set = false;
+        if (shutting_down) {
+            break;
+        }
+    }
+}
+
+std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
+    sys_time_point current = std::chrono::system_clock::now();
+    auto elapsed = current - start_time;
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+}
+
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
+    sys_time_point current = std::chrono::system_clock::now();
+    auto elapsed = current - start_time;
+    return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+}
+
+} // namespace Core::Timing
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
new file mode 100644
index 0000000000..a3a32e0875
--- /dev/null
+++ b/src/core/host_timing.h
@@ -0,0 +1,126 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::HostTiming {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
+using sys_time_point = std::chrono::time_point<std::chrono::system_clock>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    EventType(TimedCallback&& callback, std::string&& name)
+        : callback{std::move(callback)}, name{std::move(name)} {}
+
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string name;
+};
+
+/**
+ * This is a system to schedule events into the emulated machine's future. Time is measured
+ * in main CPU clock cycles.
+ *
+ * To schedule an event, you first have to register its type. This is where you pass in the
+ * callback. You then schedule events using the type id you get back.
+ *
+ * The int cyclesLate that the callbacks get is how many cycles late it was.
+ * So to schedule a new event on a regular basis:
+ * inside callback:
+ *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
+ */
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();
+
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;
+
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;
+
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();
+
+    /// Tears down all timing related functionality.
+    void Shutdown();
+
+    /// Schedules an event in core timing
+    void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+                       u64 userdata = 0);
+
+    void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
+
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const std::shared_ptr<EventType>& event_type);
+
+    /// Returns current time in emulated CPU cycles
+    u64 GetCPUTicks() const;
+
+    /// Returns current time in emulated in Clock cycles
+    u64 GetClockTicks() const;
+
+    /// Returns current time in microseconds.
+    std::chrono::microseconds GetGlobalTimeUs() const;
+
+    /// Returns current time in nanoseconds.
+    std::chrono::nanoseconds GetGlobalTimeNs() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+
+    static void ThreadEntry(CoreTiming& instance);
+    void Advance();
+
+    sys_time_point start_time;
+
+    u64 global_timer = 0;
+
+    std::chrono::nanoseconds start_point;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    std::shared_ptr<EventType> ev_lost;
+    bool is_set = false;
+    std::condition_variable condvar;
+    std::mutex inner_mutex;
+    std::unique_ptr<std::thread> timer_thread;
+    std::atomic<bool> shutting_down{};
+};
+
+/// Creates a core timing event with the given name and callback.
+///
+/// @param name     The name of the core timing event to create.
+/// @param callback The callback to execute for the event.
+///
+/// @returns An EventType instance representing the created event.
+///
+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
+
+} // namespace Core::Timing

From 45d8f48e3ce63702b806edabb2f918ab32172efa Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 8 Feb 2020 12:48:57 -0400
Subject: [PATCH 014/122] Tests: Add base tests to host timing

---
 src/common/thread.h            |   4 +-
 src/core/host_timing.cpp       |  97 +++++++++++++--------
 src/core/host_timing.h         |  30 ++++++-
 src/tests/CMakeLists.txt       |   1 +
 src/tests/core/host_timing.cpp | 150 +++++++++++++++++++++++++++++++++
 5 files changed, 241 insertions(+), 41 deletions(-)
 create mode 100644 src/tests/core/host_timing.cpp

diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc0716855..127cc7e233 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
 #include <cstddef>
 #include <mutex>
 #include <thread>
+#include "common/common_types.h"
 
 namespace Common {
 
@@ -28,8 +29,7 @@ public:
         is_set = false;
     }
 
-    template <class Duration>
-    bool WaitFor(const std::chrono::duration<Duration>& time) {
+    bool WaitFor(const std::chrono::nanoseconds& time) {
         std::unique_lock lk{mutex};
         if (!condvar.wait_for(lk, time, [this] { return is_set; }))
             return false;
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index c02f571c67..d9514b2c57 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -10,7 +10,6 @@
 #include <tuple>
 
 #include "common/assert.h"
-#include "common/thread.h"
 #include "core/core_timing_util.h"
 
 namespace Core::HostTiming {
@@ -47,39 +46,55 @@ void CoreTiming::Initialize() {
     event_fifo_id = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    start_time = std::chrono::system_clock::now();
+    start_time = std::chrono::steady_clock::now();
     timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
 }
 
 void CoreTiming::Shutdown() {
-    std::unique_lock<std::mutex> guard(inner_mutex);
+    paused = true;
     shutting_down = true;
-    if (!is_set) {
-        is_set = true;
-        condvar.notify_one();
-    }
-    inner_mutex.unlock();
+    event.Set();
     timer_thread->join();
     ClearPendingEvents();
+    timer_thread.reset();
+    has_started = false;
+}
+
+void CoreTiming::Pause(bool is_paused) {
+    paused = is_paused;
+}
+
+void CoreTiming::SyncPause(bool is_paused) {
+    if (is_paused == paused && paused_set == paused) {
+        return;
+    }
+    Pause(is_paused);
+    event.Set();
+    while (paused_set != is_paused);
+}
+
+bool CoreTiming::IsRunning() {
+    return !paused_set;
+}
+
+bool CoreTiming::HasPendingEvents() {
+    return !(wait_set && event_queue.empty());
 }
 
 void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
                                u64 userdata) {
-    std::lock_guard guard{inner_mutex};
+    basic_lock.lock();
     const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
 
     event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
 
     std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-    if (!is_set) {
-        is_set = true;
-        condvar.notify_one();
-    }
+    basic_lock.unlock();
+    event.Set();
 }
 
 void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
-    std::lock_guard guard{inner_mutex};
-
+    basic_lock.lock();
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get() && e.userdata == userdata;
     });
@@ -89,6 +104,7 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
 u64 CoreTiming::GetCPUTicks() const {
@@ -106,7 +122,7 @@ void CoreTiming::ClearPendingEvents() {
 }
 
 void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
-    std::lock_guard guard{inner_mutex};
+    basic_lock.lock();
 
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get();
@@ -117,43 +133,54 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
 void CoreTiming::Advance() {
-    while (true) {
-        std::unique_lock<std::mutex> guard(inner_mutex);
+    has_started = true;
+    while (!shutting_down) {
+        while (!paused) {
+            paused_set = false;
+            basic_lock.lock();
+            global_timer = GetGlobalTimeNs().count();
 
-        global_timer = GetGlobalTimeNs().count();
+            while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+                Event evt = std::move(event_queue.front());
+                std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+                event_queue.pop_back();
+                basic_lock.unlock();
 
-        while (!event_queue.empty() && event_queue.front().time <= global_timer) {
-            Event evt = std::move(event_queue.front());
-            std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-            event_queue.pop_back();
-            inner_mutex.unlock();
+                if (auto event_type{evt.type.lock()}) {
+                    event_type->callback(evt.userdata, global_timer - evt.time);
+                }
 
-            if (auto event_type{evt.type.lock()}) {
-                event_type->callback(evt.userdata, global_timer - evt.time);
+                basic_lock.lock();
             }
 
-            inner_mutex.lock();
-        }
-        auto next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
-        condvar.wait_for(guard, next_time, [this] { return is_set; });
-        is_set = false;
-        if (shutting_down) {
-            break;
+            if (!event_queue.empty()) {
+                std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+                basic_lock.unlock();
+                event.WaitFor(next_time);
+            } else {
+                basic_lock.unlock();
+                wait_set = true;
+                event.Wait();
+            }
+
+            wait_set = false;
         }
+        paused_set = true;
     }
 }
 
 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    sys_time_point current = std::chrono::system_clock::now();
+    sys_time_point current = std::chrono::steady_clock::now();
     auto elapsed = current - start_time;
     return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    sys_time_point current = std::chrono::system_clock::now();
+    sys_time_point current = std::chrono::steady_clock::now();
     auto elapsed = current - start_time;
     return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
 }
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index a3a32e0875..1d053a7faa 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -14,13 +14,15 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/spin_lock.h"
+#include "common/thread.h"
 #include "common/threadsafe_queue.h"
 
 namespace Core::HostTiming {
 
 /// A callback that may be scheduled for a particular core timing event.
 using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
-using sys_time_point = std::chrono::time_point<std::chrono::system_clock>;
+using sys_time_point = std::chrono::time_point<std::chrono::steady_clock>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
@@ -63,6 +65,23 @@ public:
     /// Tears down all timing related functionality.
     void Shutdown();
 
+    /// Pauses/Unpauses the execution of the timer thread.
+    void Pause(bool is_paused);
+
+    /// Pauses/Unpauses the execution of the timer thread and waits until paused.
+    void SyncPause(bool is_paused);
+
+    /// Checks if core timing is running.
+    bool IsRunning();
+
+    /// Checks if the timer thread has started.
+    bool HasStarted() {
+        return has_started;
+    }
+
+    /// Checks if there are any pending time events.
+    bool HasPendingEvents();
+
     /// Schedules an event in core timing
     void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
                        u64 userdata = 0);
@@ -107,11 +126,14 @@ private:
     u64 event_fifo_id = 0;
 
     std::shared_ptr<EventType> ev_lost;
-    bool is_set = false;
-    std::condition_variable condvar;
-    std::mutex inner_mutex;
+    Common::Event event{};
+    Common::SpinLock basic_lock{};
     std::unique_ptr<std::thread> timer_thread;
+    std::atomic<bool> paused{};
+    std::atomic<bool> paused_set{};
+    std::atomic<bool> wait_set{};
     std::atomic<bool> shutting_down{};
+    std::atomic<bool> has_started{};
 };
 
 /// Creates a core timing event with the given name and callback.
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 47ef30aa91..3f750b51c9 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable(tests
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
+    core/host_timing.cpp
     tests.cpp
 )
 
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
new file mode 100644
index 0000000000..ca9c8e50aa
--- /dev/null
+++ b/src/tests/core/host_timing.cpp
@@ -0,0 +1,150 @@
+// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
+// Licensed under GPLv2+
+// Refer to the license.txt file included.
+
+#include <catch2/catch.hpp>
+
+#include <array>
+#include <bitset>
+#include <cstdlib>
+#include <memory>
+#include <string>
+
+#include "common/file_util.h"
+#include "core/core.h"
+#include "core/host_timing.h"
+
+// Numbers are chosen randomly to make sure the correct one is given.
+static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+static constexpr std::array<u64, 5> calls_order{{2,0,1,4,3}};
+static std::array<s64, 5> delays{};
+
+static std::bitset<CB_IDS.size()> callbacks_ran_flags;
+static u64 expected_callback = 0;
+static s64 lateness = 0;
+
+template <unsigned int IDX>
+void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
+    static_assert(IDX < CB_IDS.size(), "IDX out of range");
+    callbacks_ran_flags.set(IDX);
+    REQUIRE(CB_IDS[IDX] == userdata);
+    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
+    delays[IDX] = nanoseconds_late;
+    ++expected_callback;
+}
+
+static u64 callbacks_done = 0;
+
+struct ScopeInit final {
+    ScopeInit() {
+        core_timing.Initialize();
+    }
+    ~ScopeInit() {
+        core_timing.Shutdown();
+    }
+
+    Core::HostTiming::CoreTiming core_timing;
+};
+
+TEST_CASE("HostTiming[BasicOrder]", "[core]") {
+    ScopeInit guard;
+    auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
+    events.resize(5);
+    events[0] =
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] =
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] =
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] =
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] =
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+
+    expected_callback = 0;
+
+    core_timing.SyncPause(true);
+
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    /// test pause
+    REQUIRE(callbacks_ran_flags.none());
+
+    core_timing.Pause(false); // No need to sync
+
+    while (core_timing.HasPendingEvents());
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+    }
+}
+
+#pragma optimize("", off)
+u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+#pragma optimize("", on)
+
+TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
+    ScopeInit guard;
+    auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
+    events.resize(5);
+    events[0] =
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] =
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] =
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] =
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] =
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+
+    core_timing.SyncPause(true);
+    core_timing.SyncPause(false);
+
+    expected_callback = 0;
+
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    const double scheduling_time = static_cast<double>(end - start);
+    const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
+
+    while (core_timing.HasPendingEvents());
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+    }
+
+    const double micro = scheduling_time / 1000.0f;
+    const double mili = micro / 1000.0f;
+    printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
+    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f);
+}

From 88283c4e1a879c64541bbeb787a65e3d9c02ffcd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 9 Feb 2020 16:53:22 -0400
Subject: [PATCH 015/122] Common: Implement WallClock Interface and implement a
 native clock for x64

---
 src/common/CMakeLists.txt       |   4 +
 src/common/wall_clock.cpp       |  90 ++++++++++++++++++++++
 src/common/wall_clock.h         |  40 ++++++++++
 src/common/x64/cpu_detect.cpp   |  33 ++++++++
 src/common/x64/cpu_detect.h     |  12 +++
 src/common/x64/native_clock.cpp | 128 ++++++++++++++++++++++++++++++++
 src/common/x64/native_clock.h   |  41 ++++++++++
 src/core/host_timing.cpp        |  21 +++---
 src/core/host_timing.h          |   4 +-
 src/tests/core/host_timing.cpp  |  45 +++++------
 10 files changed, 378 insertions(+), 40 deletions(-)
 create mode 100644 src/common/wall_clock.cpp
 create mode 100644 src/common/wall_clock.h
 create mode 100644 src/common/x64/native_clock.cpp
 create mode 100644 src/common/x64/native_clock.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 00a5f6303f..f502354e54 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -165,6 +165,8 @@ add_library(common STATIC
     vector_math.h
     virtual_buffer.cpp
     virtual_buffer.h
+    wall_clock.cpp
+    wall_clock.h
     web_result.h
     zstd_compression.cpp
     zstd_compression.h
@@ -175,6 +177,8 @@ if(ARCHITECTURE_x86_64)
         PRIVATE
             x64/cpu_detect.cpp
             x64/cpu_detect.h
+            x64/native_clock.cpp
+            x64/native_clock.h
             x64/xbyak_abi.h
             x64/xbyak_util.h
     )
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 0000000000..eabbba9da6
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,90 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/uint128.h"
+#include "common/wall_clock.h"
+
+#ifdef ARCHITECTURE_x86_64
+#include "common/x64/cpu_detect.h"
+#include "common/x64/native_clock.h"
+#endif
+
+namespace Common {
+
+using base_timer = std::chrono::steady_clock;
+using base_time_point = std::chrono::time_point<base_timer>;
+
+class StandardWallClock : public WallClock {
+public:
+    StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
+        : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
+        start_time = base_timer::now();
+    }
+
+    std::chrono::nanoseconds GetTimeNS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+    }
+
+    std::chrono::microseconds GetTimeUS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+    }
+
+    std::chrono::milliseconds GetTimeMS() override {
+        base_time_point current = base_timer::now();
+        auto elapsed = current - start_time;
+        return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
+    }
+
+    u64 GetClockCycles() override {
+        std::chrono::nanoseconds time_now = GetTimeNS();
+        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+        return Common::Divide128On32(temporal, 1000000000).first;
+    }
+
+    u64 GetCPUCycles() override {
+        std::chrono::nanoseconds time_now = GetTimeNS();
+        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+        return Common::Divide128On32(temporal, 1000000000).first;
+    }
+
+private:
+    base_time_point start_time;
+};
+
+#ifdef ARCHITECTURE_x86_64
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    const auto& caps = GetCPUCaps();
+    u64 rtsc_frequency = 0;
+    if (caps.invariant_tsc) {
+        if (caps.base_frequency != 0) {
+            rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
+        }
+        if (rtsc_frequency == 0) {
+            rtsc_frequency = EstimateRDTSCFrequency();
+        }
+    }
+    if (rtsc_frequency == 0) {
+        return static_cast<WallClock*>(
+            new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+    } else {
+        return static_cast<WallClock*>(
+            new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency));
+    }
+}
+
+#else
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    return static_cast<WallClock*>(
+        new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 0000000000..6f763d74ba
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,40 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+class WallClock {
+public:
+    virtual std::chrono::nanoseconds GetTimeNS() = 0;
+    virtual std::chrono::microseconds GetTimeUS() = 0;
+    virtual std::chrono::milliseconds GetTimeMS() = 0;
+    virtual u64 GetClockCycles() = 0;
+    virtual u64 GetCPUCycles() = 0;
+
+    /// Tells if the wall clock, uses the host CPU's hardware clock
+    bool IsNative() const {
+        return is_native;
+    }
+
+protected:
+    WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
+        : emulated_cpu_frequency{emulated_cpu_frequency},
+          emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
+
+    u64 emulated_cpu_frequency;
+    u64 emulated_clock_frequency;
+
+private:
+    bool is_native;
+};
+
+WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+
+} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b42..d767c544cf 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
     std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
     std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
     std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
+    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
+        caps.manufacturer = Manufacturer::Intel;
+    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
+        caps.manufacturer = Manufacturer::AMD;
+    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
+        caps.manufacturer = Manufacturer::Hygon;
+    else
+        caps.manufacturer = Manufacturer::Unknown;
+
+    u32 family = {};
+    u32 model = {};
 
     __cpuid(cpu_id, 0x80000000);
 
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
     // Detect family and other miscellaneous features
     if (max_std_fn >= 1) {
         __cpuid(cpu_id, 0x00000001);
+        family = (cpu_id[0] >> 8) & 0xf;
+        model = (cpu_id[0] >> 4) & 0xf;
+        if (family == 0xf) {
+            family += (cpu_id[0] >> 20) & 0xff;
+        }
+        if (family >= 6) {
+            model += ((cpu_id[0] >> 16) & 0xf) << 4;
+        }
 
         if ((cpu_id[3] >> 25) & 1)
             caps.sse = true;
@@ -130,6 +149,20 @@ static CPUCaps Detect() {
             caps.fma4 = true;
     }
 
+    if (max_ex_fn >= 0x80000007) {
+        __cpuid(cpu_id, 0x80000007);
+        if (cpu_id[3] & (1 << 8)) {
+            caps.invariant_tsc = true;
+        }
+    }
+
+    if (max_std_fn >= 0x16) {
+        __cpuid(cpu_id, 0x16);
+        caps.base_frequency = cpu_id[0];
+        caps.max_frequency = cpu_id[1];
+        caps.bus_frequency = cpu_id[2];
+    }
+
     return caps;
 }
 
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234a..f0676fa5e5 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
 
 namespace Common {
 
+enum class Manufacturer : u32 {
+    Intel = 0,
+    AMD = 1,
+    Hygon = 2,
+    Unknown = 3,
+};
+
 /// x86/x64 CPU capabilities that may be detected by this module
 struct CPUCaps {
+    Manufacturer manufacturer;
     char cpu_string[0x21];
     char brand_string[0x41];
     bool sse;
@@ -24,6 +32,10 @@ struct CPUCaps {
     bool fma;
     bool fma4;
     bool aes;
+    bool invariant_tsc;
+    u32 base_frequency;
+    u32 max_frequency;
+    u32 bus_frequency;
 };
 
 /**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 0000000000..c799111fdd
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,128 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <thread>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+#include "common/x64/native_clock.h"
+
+namespace Common {
+
+#ifdef _MSC_VER
+
+namespace {
+
+struct uint128 {
+    u64 low;
+    u64 high;
+};
+
+u64 umuldiv64(u64 a, u64 b, u64 d) {
+    uint128 r{};
+    r.low = _umul128(a, b, &r.high);
+    u64 remainder;
+    return _udiv128(r.high, r.low, d, &remainder);
+}
+
+} // namespace
+
+#else
+
+namespace {
+
+u64 umuldiv64(u64 a, u64 b, u64 d) {
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+}
+
+} // namespace
+
+#endif
+
+u64 EstimateRDTSCFrequency() {
+    const auto milli_10 = std::chrono::milliseconds{10};
+    // get current time
+    _mm_mfence();
+    const u64 tscStart = __rdtsc();
+    const auto startTime = std::chrono::high_resolution_clock::now();
+    // wait roughly 3 seconds
+    while (true) {
+        auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
+            std::chrono::high_resolution_clock::now() - startTime);
+        if (milli.count() >= 3000)
+            break;
+        std::this_thread::sleep_for(milli_10);
+    }
+    const auto endTime = std::chrono::high_resolution_clock::now();
+    _mm_mfence();
+    const u64 tscEnd = __rdtsc();
+    // calculate difference
+    const u64 timer_diff =
+        std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
+    const u64 tsc_diff = tscEnd - tscStart;
+    const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff);
+    return tsc_freq;
+}
+
+namespace X64 {
+NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
+                         u64 rtsc_frequency)
+    : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
+                                                                             rtsc_frequency} {
+    _mm_mfence();
+    last_measure = __rdtsc();
+    accumulated_ticks = 0U;
+}
+
+u64 NativeClock::GetRTSC() {
+    rtsc_serialize.lock();
+    _mm_mfence();
+    const u64 current_measure = __rdtsc();
+    u64 diff = current_measure - last_measure;
+    diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
+    if (current_measure > last_measure) {
+        last_measure = current_measure;
+    }
+    accumulated_ticks += diff;
+    rtsc_serialize.unlock();
+    return accumulated_ticks;
+}
+
+std::chrono::nanoseconds NativeClock::GetTimeNS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)};
+}
+
+std::chrono::microseconds NativeClock::GetTimeUS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)};
+}
+
+std::chrono::milliseconds NativeClock::GetTimeMS() {
+    const u64 rtsc_value = GetRTSC();
+    return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)};
+}
+
+u64 NativeClock::GetClockCycles() {
+    const u64 rtsc_value = GetRTSC();
+    return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+}
+
+u64 NativeClock::GetCPUCycles() {
+    const u64 rtsc_value = GetRTSC();
+    return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+}
+
+} // namespace X64
+
+} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 0000000000..b58cf9f5a4
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/spin_lock.h"
+#include "common/wall_clock.h"
+
+namespace Common {
+
+namespace X64 {
+class NativeClock : public WallClock {
+public:
+    NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
+
+    std::chrono::nanoseconds GetTimeNS() override;
+
+    std::chrono::microseconds GetTimeUS() override;
+
+    std::chrono::milliseconds GetTimeMS() override;
+
+    u64 GetClockCycles() override;
+
+    u64 GetCPUCycles() override;
+
+private:
+    u64 GetRTSC();
+
+    SpinLock rtsc_serialize{};
+    u64 last_measure{};
+    u64 accumulated_ticks{};
+    u64 rtsc_frequency;
+};
+} // namespace X64
+
+u64 EstimateRDTSCFrequency();
+
+} // namespace Common
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index d9514b2c57..ef9977b760 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -35,7 +35,11 @@ struct CoreTiming::Event {
     }
 };
 
-CoreTiming::CoreTiming() = default;
+CoreTiming::CoreTiming() {
+    Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
+    clock = std::unique_ptr<Common::WallClock>(wall);
+}
+
 CoreTiming::~CoreTiming() = default;
 
 void CoreTiming::ThreadEntry(CoreTiming& instance) {
@@ -46,7 +50,6 @@ void CoreTiming::Initialize() {
     event_fifo_id = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    start_time = std::chrono::steady_clock::now();
     timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
 }
 
@@ -108,13 +111,11 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
 }
 
 u64 CoreTiming::GetCPUTicks() const {
-    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
-    return Core::Timing::nsToCycles(time_now);
+    return clock->GetCPUCycles();
 }
 
 u64 CoreTiming::GetClockTicks() const {
-    std::chrono::nanoseconds time_now = GetGlobalTimeNs();
-    return Core::Timing::nsToClockCycles(time_now);
+    return clock->GetClockCycles();
 }
 
 void CoreTiming::ClearPendingEvents() {
@@ -174,15 +175,11 @@ void CoreTiming::Advance() {
 }
 
 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    sys_time_point current = std::chrono::steady_clock::now();
-    auto elapsed = current - start_time;
-    return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+    return clock->GetTimeNS();
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    sys_time_point current = std::chrono::steady_clock::now();
-    auto elapsed = current - start_time;
-    return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+    return clock->GetTimeUS();
 }
 
 } // namespace Core::Timing
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 1d053a7faa..f04a150eeb 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -17,12 +17,12 @@
 #include "common/spin_lock.h"
 #include "common/thread.h"
 #include "common/threadsafe_queue.h"
+#include "common/wall_clock.h"
 
 namespace Core::HostTiming {
 
 /// A callback that may be scheduled for a particular core timing event.
 using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
-using sys_time_point = std::chrono::time_point<std::chrono::steady_clock>;
 
 /// Contains the characteristics of a particular event.
 struct EventType {
@@ -112,7 +112,7 @@ private:
     static void ThreadEntry(CoreTiming& instance);
     void Advance();
 
-    sys_time_point start_time;
+    std::unique_ptr<Common::WallClock> clock;
 
     u64 global_timer = 0;
 
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index ca9c8e50aa..3d0532d02b 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -17,7 +17,7 @@
 // Numbers are chosen randomly to make sure the correct one is given.
 static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
 static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
-static constexpr std::array<u64, 5> calls_order{{2,0,1,4,3}};
+static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
 static std::array<s64, 5> delays{};
 
 static std::bitset<CB_IDS.size()> callbacks_ran_flags;
@@ -52,16 +52,11 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     auto& core_timing = guard.core_timing;
     std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
     events.resize(5);
-    events[0] =
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] =
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] =
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] =
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] =
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
 
     expected_callback = 0;
 
@@ -70,14 +65,15 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     u64 one_micro = 1000U;
     for (std::size_t i = 0; i < events.size(); i++) {
         u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
     }
     /// test pause
     REQUIRE(callbacks_ran_flags.none());
 
     core_timing.Pause(false); // No need to sync
 
-    while (core_timing.HasPendingEvents());
+    while (core_timing.HasPendingEvents())
+        ;
 
     REQUIRE(callbacks_ran_flags.all());
 
@@ -106,16 +102,11 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     auto& core_timing = guard.core_timing;
     std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
     events.resize(5);
-    events[0] =
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] =
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] =
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] =
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] =
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
+    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
+    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
+    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
+    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
 
     core_timing.SyncPause(true);
     core_timing.SyncPause(false);
@@ -126,13 +117,14 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     u64 one_micro = 1000U;
     for (std::size_t i = 0; i < events.size(); i++) {
         u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i*one_micro + 100U, events[order], CB_IDS[order]);
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
     }
     u64 end = core_timing.GetGlobalTimeNs().count();
     const double scheduling_time = static_cast<double>(end - start);
     const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
 
-    while (core_timing.HasPendingEvents());
+    while (core_timing.HasPendingEvents())
+        ;
 
     REQUIRE(callbacks_ran_flags.all());
 
@@ -146,5 +138,6 @@ TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     const double micro = scheduling_time / 1000.0f;
     const double mili = micro / 1000.0f;
     printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
-    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, timer_time / 1000000.f);
+    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
+           timer_time / 1000000.f);
 }

From e1a2c6ba95b06411101684f446639bc83784525e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 11:20:40 -0400
Subject: [PATCH 016/122] Common: Refactor & Document Wall clock.

---
 src/common/uint128.cpp          | 22 +++++++++++++++
 src/common/uint128.h            |  3 +++
 src/common/wall_clock.cpp       | 13 ++++-----
 src/common/wall_clock.h         | 13 ++++++++-
 src/common/x64/native_clock.cpp | 47 +++++----------------------------
 src/core/host_timing.cpp        |  3 +--
 6 files changed, 50 insertions(+), 51 deletions(-)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730f..7e77588db1 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,34 @@
 #include <intrin.h>
 
 #pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
 #endif
 #include <cstring>
 #include "common/uint128.h"
 
 namespace Common {
 
+#ifdef _MSC_VER
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+    u128 r{};
+    r[0] = _umul128(a, b, &r[1]);
+    u64 remainder;
+    return _udiv128(r[1], r[0], d, &remainder);
+}
+
+#else
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+}
+
+#endif
+
 u128 Multiply64Into128(u64 a, u64 b) {
     u128 result;
 #ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cba..503cd2d0c6 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
 
 namespace Common {
 
+// This function multiplies 2 u64 values and divides it by a u64 value.
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+
 // This function multiplies 2 u64 values and produces a u128 value;
 u128 Multiply64Into128(u64 a, u64 b);
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index eabbba9da6..8f5e17fa4d 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -58,7 +58,7 @@ private:
 
 #ifdef ARCHITECTURE_x86_64
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
     const auto& caps = GetCPUCaps();
     u64 rtsc_frequency = 0;
     if (caps.invariant_tsc) {
@@ -70,19 +70,16 @@ WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_cloc
         }
     }
     if (rtsc_frequency == 0) {
-        return static_cast<WallClock*>(
-            new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+        return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
     } else {
-        return static_cast<WallClock*>(
-            new X64::NativeClock(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency));
+        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency);
     }
 }
 
 #else
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
-    return static_cast<WallClock*>(
-        new StandardWallClock(emulated_cpu_frequency, emulated_clock_frequency));
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+    return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
 }
 
 #endif
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 6f763d74ba..fc34429bbe 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <chrono>
+#include <memory>
 
 #include "common/common_types.h"
 
@@ -12,10 +13,20 @@ namespace Common {
 
 class WallClock {
 public:
+
+    /// Returns current wall time in nanoseconds
     virtual std::chrono::nanoseconds GetTimeNS() = 0;
+
+    /// Returns current wall time in microseconds
     virtual std::chrono::microseconds GetTimeUS() = 0;
+
+    /// Returns current wall time in milliseconds
     virtual std::chrono::milliseconds GetTimeMS() = 0;
+
+    /// Returns current wall time in emulated clock cycles
     virtual u64 GetClockCycles() = 0;
+
+    /// Returns current wall time in emulated cpu cycles
     virtual u64 GetCPUCycles() = 0;
 
     /// Tells if the wall clock, uses the host CPU's hardware clock
@@ -35,6 +46,6 @@ private:
     bool is_native;
 };
 
-WallClock* CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
 
 } // namespace Common
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index c799111fdd..26d4d0ba68 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -11,44 +11,11 @@
 #include <x86intrin.h>
 #endif
 
+#include "common/uint128.h"
 #include "common/x64/native_clock.h"
 
 namespace Common {
 
-#ifdef _MSC_VER
-
-namespace {
-
-struct uint128 {
-    u64 low;
-    u64 high;
-};
-
-u64 umuldiv64(u64 a, u64 b, u64 d) {
-    uint128 r{};
-    r.low = _umul128(a, b, &r.high);
-    u64 remainder;
-    return _udiv128(r.high, r.low, d, &remainder);
-}
-
-} // namespace
-
-#else
-
-namespace {
-
-u64 umuldiv64(u64 a, u64 b, u64 d) {
-    const u64 diva = a / d;
-    const u64 moda = a % d;
-    const u64 divb = b / d;
-    const u64 modb = b % d;
-    return diva * b + moda * divb + moda * modb / d;
-}
-
-} // namespace
-
-#endif
-
 u64 EstimateRDTSCFrequency() {
     const auto milli_10 = std::chrono::milliseconds{10};
     // get current time
@@ -70,7 +37,7 @@ u64 EstimateRDTSCFrequency() {
     const u64 timer_diff =
         std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
     const u64 tsc_diff = tscEnd - tscStart;
-    const u64 tsc_freq = umuldiv64(tsc_diff, 1000000000ULL, timer_diff);
+    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
     return tsc_freq;
 }
 
@@ -100,27 +67,27 @@ u64 NativeClock::GetRTSC() {
 
 std::chrono::nanoseconds NativeClock::GetTimeNS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::nanoseconds{umuldiv64(rtsc_value, 1000000000, rtsc_frequency)};
+    return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
 }
 
 std::chrono::microseconds NativeClock::GetTimeUS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::microseconds{umuldiv64(rtsc_value, 1000000, rtsc_frequency)};
+    return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
 }
 
 std::chrono::milliseconds NativeClock::GetTimeMS() {
     const u64 rtsc_value = GetRTSC();
-    return std::chrono::milliseconds{umuldiv64(rtsc_value, 1000, rtsc_frequency)};
+    return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
 }
 
 u64 NativeClock::GetClockCycles() {
     const u64 rtsc_value = GetRTSC();
-    return umuldiv64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+    return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
 }
 
 u64 NativeClock::GetCPUCycles() {
     const u64 rtsc_value = GetRTSC();
-    return umuldiv64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+    return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
 }
 
 } // namespace X64
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index ef9977b760..4ccf7c6c1c 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -36,8 +36,7 @@ struct CoreTiming::Event {
 };
 
 CoreTiming::CoreTiming() {
-    Common::WallClock* wall = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
-    clock = std::unique_ptr<Common::WallClock>(wall);
+    clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
 }
 
 CoreTiming::~CoreTiming() = default;

From fbd211a6be131066985afdccb848e5ad03da539f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 13:18:23 -0400
Subject: [PATCH 017/122] Common: Correct fcontext fibers.

---
 src/common/fiber.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a88a30cede..e91d86dbea 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,6 +12,7 @@
 
 namespace Common {
 
+
 #ifdef _MSC_VER
 
 struct Fiber::FiberImpl {
@@ -82,7 +83,6 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-
 constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
 
 struct alignas(64) Fiber::FiberImpl {
@@ -108,9 +108,8 @@ void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    auto start_func = std::bind(&Fiber::start, this);
-    impl->context =
-        boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(), &start_func);
+    impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
+                                                          FiberStartFunc);
 }
 
 Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
@@ -139,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
-    auto transfer = boost::context::detail::jump_fcontext(to->impl.context, nullptr);
+    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr);
     auto previous_fiber = from->previous_fiber;
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;

From eb5ddff4b562815b0a5076173171f4c9c31c9658 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 13:33:13 -0400
Subject: [PATCH 018/122] Common/Tests: Clang Format.

---
 src/common/fiber.cpp        | 21 ++++++++++-----------
 src/common/fiber.h          |  2 +-
 src/common/wall_clock.cpp   | 12 ++++++++----
 src/common/wall_clock.h     |  4 ++--
 src/core/host_timing.cpp    |  8 +++++---
 src/core/host_timing.h      |  2 +-
 src/tests/common/fibers.cpp | 23 ++++++++++++++---------
 7 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e91d86dbea..a46be73c10 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,7 +12,6 @@
 
 namespace Common {
 
-
 #ifdef _MSC_VER
 
 struct Fiber::FiberImpl {
@@ -27,14 +26,14 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
-void __stdcall Fiber::FiberStartFunc(void* fiber_parameter)
-{
-   auto fiber = static_cast<Fiber *>(fiber_parameter);
-   fiber->start();
+void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
+    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    fiber->start();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
+      previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
     impl->handle = CreateFiber(0, &FiberStartFunc, this);
 }
@@ -99,14 +98,14 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
-void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer)
-{
-   auto fiber = static_cast<Fiber *>(transfer.data);
-   fiber->start(transfer);
+void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
+    auto fiber = static_cast<Fiber*>(transfer.data);
+    fiber->start(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter}, previous_fiber{} {
+    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
+      previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
     impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
                                                           FiberStartFunc);
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 89a01fdd8e..b530bf4d2c 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -12,7 +12,7 @@
 
 #ifndef _MSC_VER
 namespace boost::context::detail {
-    struct transfer_t;
+struct transfer_t;
 }
 #endif
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 8f5e17fa4d..e6161c72ce 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -58,7 +58,8 @@ private:
 
 #ifdef ARCHITECTURE_x86_64
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency) {
     const auto& caps = GetCPUCaps();
     u64 rtsc_frequency = 0;
     if (caps.invariant_tsc) {
@@ -70,15 +71,18 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u
         }
     }
     if (rtsc_frequency == 0) {
-        return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
+        return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
+                                                   emulated_clock_frequency);
     } else {
-        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, rtsc_frequency);
+        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
+                                                  rtsc_frequency);
     }
 }
 
 #else
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency) {
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency) {
     return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
 }
 
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index fc34429bbe..ed284cf502 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -13,7 +13,6 @@ namespace Common {
 
 class WallClock {
 public:
-
     /// Returns current wall time in nanoseconds
     virtual std::chrono::nanoseconds GetTimeNS() = 0;
 
@@ -46,6 +45,7 @@ private:
     bool is_native;
 };
 
-std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, u32 emulated_clock_frequency);
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+                                                   u32 emulated_clock_frequency);
 
 } // namespace Common
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index 4ccf7c6c1c..c734a118e4 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -72,7 +72,8 @@ void CoreTiming::SyncPause(bool is_paused) {
     }
     Pause(is_paused);
     event.Set();
-    while (paused_set != is_paused);
+    while (paused_set != is_paused)
+        ;
 }
 
 bool CoreTiming::IsRunning() {
@@ -158,7 +159,8 @@ void CoreTiming::Advance() {
             }
 
             if (!event_queue.empty()) {
-                std::chrono::nanoseconds next_time = std::chrono::nanoseconds(event_queue.front().time - global_timer);
+                std::chrono::nanoseconds next_time =
+                    std::chrono::nanoseconds(event_queue.front().time - global_timer);
                 basic_lock.unlock();
                 event.WaitFor(next_time);
             } else {
@@ -181,4 +183,4 @@ std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
     return clock->GetTimeUS();
 }
 
-} // namespace Core::Timing
+} // namespace Core::HostTiming
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index f04a150eeb..15a150904a 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -145,4 +145,4 @@ private:
 ///
 std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
 
-} // namespace Core::Timing
+} // namespace Core::HostTiming
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index 358393a192..d63194dd4c 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -92,7 +92,8 @@ public:
 
     void DoWork1() {
         trap2 = false;
-        while (trap.load());
+        while (trap.load())
+            ;
         for (u32 i = 0; i < 12000; i++) {
             value1 += i;
         }
@@ -105,7 +106,8 @@ public:
     }
 
     void DoWork2() {
-        while (trap2.load());
+        while (trap2.load())
+            ;
         value2 = 2000;
         trap = false;
         Fiber::YieldTo(fiber2, fiber1);
@@ -197,9 +199,12 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
-    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
-    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
-    test_control.fiber3 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
+    test_control.fiber1 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
+    test_control.fiber2 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
+    test_control.fiber3 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
     std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
     std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
     thread1.join();
@@ -291,8 +296,10 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
 TEST_CASE("Fibers::StartRace", "[common]") {
     TestControl3 test_control{};
     test_control.thread_fibers.resize(2, nullptr);
-    test_control.fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
-    test_control.fiber2 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
+    test_control.fiber1 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
+    test_control.fiber2 =
+        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
     std::thread thread1(ThreadStart3, 0, std::ref(test_control));
     std::thread thread2(ThreadStart3, 1, std::ref(test_control));
     thread1.join();
@@ -302,6 +309,4 @@ TEST_CASE("Fibers::StartRace", "[common]") {
     REQUIRE(test_control.value3 == 1);
 }
 
-
-
 } // namespace Common

From ef0b49f4208df11b3ab56fd347acb106eb1d1871 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 14:21:23 -0400
Subject: [PATCH 019/122] Common: Make MinGW build use Windows Fibers instead
 of fcontext_t

---
 src/common/fiber.cpp | 4 ++--
 src/common/fiber.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index a46be73c10..050c93acba 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -4,7 +4,7 @@
 
 #include "common/assert.h"
 #include "common/fiber.h"
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
 #include <windows.h>
 #else
 #include <boost/context/detail/fcontext.hpp>
@@ -12,7 +12,7 @@
 
 namespace Common {
 
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
diff --git a/src/common/fiber.h b/src/common/fiber.h
index b530bf4d2c..598fe7daaf 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -10,7 +10,7 @@
 #include "common/common_types.h"
 #include "common/spin_lock.h"
 
-#ifndef _MSC_VER
+#if !defined(_WIN32) && !defined(WIN32)
 namespace boost::context::detail {
 struct transfer_t;
 }
@@ -57,7 +57,7 @@ public:
 private:
     Fiber();
 
-#ifdef _MSC_VER
+#if defined(_WIN32) || defined(WIN32)
     void start();
     static void FiberStartFunc(void* fiber_parameter);
 #else

From a59d20a58694bc11506964d58183ee1317ae6cc1 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 14:45:08 -0400
Subject: [PATCH 020/122] Common/Tests: Address Feedback

---
 src/common/fiber.cpp           |  5 ++---
 src/common/fiber.h             |  8 ++++----
 src/common/spin_lock.cpp       |  3 ++-
 src/core/core_timing_util.cpp  | 14 ++++++++++++--
 src/core/core_timing_util.h    |  2 ++
 src/core/host_timing.cpp       |  4 ++--
 src/core/host_timing.h         |  6 +++---
 src/tests/common/fibers.cpp    | 20 ++++++++++----------
 src/tests/core/host_timing.cpp | 28 ++++++++++++++--------------
 9 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 050c93acba..1220eddf05 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -32,13 +32,12 @@ void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
-      previous_fiber{} {
+    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
     impl->handle = CreateFiber(0, &FiberStartFunc, this);
 }
 
-Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
 
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 598fe7daaf..7e3b130a46 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -67,10 +67,10 @@ private:
 
     struct FiberImpl;
 
-    SpinLock guard;
-    std::function<void(void*)> entry_point;
-    void* start_parameter;
-    std::shared_ptr<Fiber> previous_fiber;
+    SpinLock guard{};
+    std::function<void(void*)> entry_point{};
+    void* start_parameter{};
+    std::shared_ptr<Fiber> previous_fiber{};
     std::unique_ptr<FiberImpl> impl;
     bool is_thread_fiber{};
 };
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
index 82a1d39fff..c7b46aac6b 100644
--- a/src/common/spin_lock.cpp
+++ b/src/common/spin_lock.cpp
@@ -35,8 +35,9 @@ void thread_pause() {
 namespace Common {
 
 void SpinLock::lock() {
-    while (lck.test_and_set(std::memory_order_acquire))
+    while (lck.test_and_set(std::memory_order_acquire)) {
         thread_pause();
+    }
 }
 
 void SpinLock::unlock() {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index f42666b4db..be34b26fe4 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -49,9 +49,19 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
     return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }
 
+u64 msToClockCycles(std::chrono::milliseconds ns) {
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000).first;
+}
+
+u64 usToClockCycles(std::chrono::microseconds ns) {
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000000).first;
+}
+
 u64 nsToClockCycles(std::chrono::nanoseconds ns) {
-    const u128 temporal = Common::Multiply64Into128(ns.count(), CNTFREQ);
-    return Common::Divide128On32(temporal, 1000000000).first;
+    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+    return Common::Divide128On32(temp, 1000000000).first;
 }
 
 u64 CpuCyclesToClockCycles(u64 ticks) {
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 65fb7368b6..b3c58447d5 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,6 +13,8 @@ namespace Core::Timing {
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);
+u64 msToClockCycles(std::chrono::milliseconds ns);
+u64 usToClockCycles(std::chrono::microseconds ns);
 u64 nsToClockCycles(std::chrono::nanoseconds ns);
 
 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index c734a118e4..be80d9f8ed 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -76,11 +76,11 @@ void CoreTiming::SyncPause(bool is_paused) {
         ;
 }
 
-bool CoreTiming::IsRunning() {
+bool CoreTiming::IsRunning() const {
     return !paused_set;
 }
 
-bool CoreTiming::HasPendingEvents() {
+bool CoreTiming::HasPendingEvents() const {
     return !(wait_set && event_queue.empty());
 }
 
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 15a150904a..679fcf491a 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -72,15 +72,15 @@ public:
     void SyncPause(bool is_paused);
 
     /// Checks if core timing is running.
-    bool IsRunning();
+    bool IsRunning() const;
 
     /// Checks if the timer thread has started.
-    bool HasStarted() {
+    bool HasStarted() const {
         return has_started;
     }
 
     /// Checks if there are any pending time events.
-    bool HasPendingEvents();
+    bool HasPendingEvents() const;
 
     /// Schedules an event in core timing
     void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index d63194dd4c..0d3d5153d6 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -34,7 +34,7 @@ public:
 };
 
 static void WorkControl1(void* control) {
-    TestControl1* test_control = static_cast<TestControl1*>(control);
+    auto* test_control = static_cast<TestControl1*>(control);
     test_control->DoWork();
 }
 
@@ -70,8 +70,8 @@ static void ThreadStart1(u32 id, TestControl1& test_control) {
 TEST_CASE("Fibers::Setup", "[common]") {
     constexpr u32 num_threads = 7;
     TestControl1 test_control{};
-    test_control.thread_fibers.resize(num_threads, nullptr);
-    test_control.work_fibers.resize(num_threads, nullptr);
+    test_control.thread_fibers.resize(num_threads);
+    test_control.work_fibers.resize(num_threads);
     test_control.items.resize(num_threads, 0);
     test_control.results.resize(num_threads, 0);
     std::vector<std::thread> threads;
@@ -153,17 +153,17 @@ public:
 };
 
 static void WorkControl2_1(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork1();
 }
 
 static void WorkControl2_2(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork2();
 }
 
 static void WorkControl2_3(void* control) {
-    TestControl2* test_control = static_cast<TestControl2*>(control);
+    auto* test_control = static_cast<TestControl2*>(control);
     test_control->DoWork3();
 }
 
@@ -198,7 +198,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
  */
 TEST_CASE("Fibers::InterExchange", "[common]") {
     TestControl2 test_control{};
-    test_control.thread_fibers.resize(2, nullptr);
+    test_control.thread_fibers.resize(2);
     test_control.fiber1 =
         std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
     test_control.fiber2 =
@@ -261,12 +261,12 @@ public:
 };
 
 static void WorkControl3_1(void* control) {
-    TestControl3* test_control = static_cast<TestControl3*>(control);
+    auto* test_control = static_cast<TestControl3*>(control);
     test_control->DoWork1();
 }
 
 static void WorkControl3_2(void* control) {
-    TestControl3* test_control = static_cast<TestControl3*>(control);
+    auto* test_control = static_cast<TestControl3*>(control);
     test_control->DoWork2();
 }
 
@@ -295,7 +295,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
  */
 TEST_CASE("Fibers::StartRace", "[common]") {
     TestControl3 test_control{};
-    test_control.thread_fibers.resize(2, nullptr);
+    test_control.thread_fibers.resize(2);
     test_control.fiber1 =
         std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
     test_control.fiber2 =
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index 3d0532d02b..ed060be55c 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -50,13 +50,13 @@ struct ScopeInit final {
 TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
-    events.resize(5);
-    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
     expected_callback = 0;
 
@@ -100,13 +100,13 @@ u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
 TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events;
-    events.resize(5);
-    events[0] = Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>);
-    events[1] = Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>);
-    events[2] = Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>);
-    events[3] = Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>);
-    events[4] = Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>);
+    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
+        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
     core_timing.SyncPause(true);
     core_timing.SyncPause(false);

From f8655f4feed8214e6e72ad0075c016a51107204e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 10 Feb 2020 15:02:04 -0400
Subject: [PATCH 021/122] Core/HostTiming: Allow events to be advanced
 manually.

---
 src/common/fiber.cpp      |  2 +-
 src/common/wall_clock.cpp |  9 +++---
 src/core/host_timing.cpp  | 61 +++++++++++++++++++++++----------------
 src/core/host_timing.h    |  6 +++-
 4 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 1220eddf05..e9c0946b6a 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -110,7 +110,7 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
                                                           FiberStartFunc);
 }
 
-Fiber::Fiber() : guard{}, entry_point{}, start_parameter{}, previous_fiber{} {
+Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
 
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index e6161c72ce..d4d35f4e7a 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -42,14 +42,15 @@ public:
 
     u64 GetClockCycles() override {
         std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
-        return Common::Divide128On32(temporal, 1000000000).first;
+        const u128 temporary =
+            Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+        return Common::Divide128On32(temporary, 1000000000).first;
     }
 
     u64 GetCPUCycles() override {
         std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporal = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
-        return Common::Divide128On32(temporal, 1000000000).first;
+        const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+        return Common::Divide128On32(temporary, 1000000000).first;
     }
 
 private:
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index be80d9f8ed..5d35a96b14 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -42,7 +42,7 @@ CoreTiming::CoreTiming() {
 CoreTiming::~CoreTiming() = default;
 
 void CoreTiming::ThreadEntry(CoreTiming& instance) {
-    instance.Advance();
+    instance.ThreadLoop();
 }
 
 void CoreTiming::Initialize() {
@@ -137,38 +137,49 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
     basic_lock.unlock();
 }
 
-void CoreTiming::Advance() {
+std::optional<u64> CoreTiming::Advance() {
+    advance_lock.lock();
+    basic_lock.lock();
+    global_timer = GetGlobalTimeNs().count();
+
+    while (!event_queue.empty() && event_queue.front().time <= global_timer) {
+        Event evt = std::move(event_queue.front());
+        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+        event_queue.pop_back();
+        basic_lock.unlock();
+
+        if (auto event_type{evt.type.lock()}) {
+            event_type->callback(evt.userdata, global_timer - evt.time);
+        }
+
+        basic_lock.lock();
+    }
+
+    if (!event_queue.empty()) {
+        const u64 next_time = event_queue.front().time - global_timer;
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return next_time;
+    } else {
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return std::nullopt;
+    }
+}
+
+void CoreTiming::ThreadLoop() {
     has_started = true;
     while (!shutting_down) {
         while (!paused) {
             paused_set = false;
-            basic_lock.lock();
-            global_timer = GetGlobalTimeNs().count();
-
-            while (!event_queue.empty() && event_queue.front().time <= global_timer) {
-                Event evt = std::move(event_queue.front());
-                std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-                event_queue.pop_back();
-                basic_lock.unlock();
-
-                if (auto event_type{evt.type.lock()}) {
-                    event_type->callback(evt.userdata, global_timer - evt.time);
-                }
-
-                basic_lock.lock();
-            }
-
-            if (!event_queue.empty()) {
-                std::chrono::nanoseconds next_time =
-                    std::chrono::nanoseconds(event_queue.front().time - global_timer);
-                basic_lock.unlock();
-                event.WaitFor(next_time);
+            const auto next_time = Advance();
+            if (next_time) {
+                std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
+                event.WaitFor(next_time_ns);
             } else {
-                basic_lock.unlock();
                 wait_set = true;
                 event.Wait();
             }
-
             wait_set = false;
         }
         paused_set = true;
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 679fcf491a..cd44b308cb 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -103,6 +103,9 @@ public:
     /// Returns current time in nanoseconds.
     std::chrono::nanoseconds GetGlobalTimeNs() const;
 
+    /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
+    std::optional<u64> Advance();
+
 private:
     struct Event;
 
@@ -110,7 +113,7 @@ private:
     void ClearPendingEvents();
 
     static void ThreadEntry(CoreTiming& instance);
-    void Advance();
+    void ThreadLoop();
 
     std::unique_ptr<Common::WallClock> clock;
 
@@ -128,6 +131,7 @@ private:
     std::shared_ptr<EventType> ev_lost;
     Common::Event event{};
     Common::SpinLock basic_lock{};
+    Common::SpinLock advance_lock{};
     std::unique_ptr<std::thread> timer_thread;
     std::atomic<bool> paused{};
     std::atomic<bool> paused_set{};

From b6f64952510fee179aa2746e9d438e011efeec24 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 15 Feb 2020 13:56:50 -0400
Subject: [PATCH 022/122] HostTiming: Correct rebase and implement AddTicks.

---
 src/core/host_timing.cpp | 11 ++++++++++-
 src/core/host_timing.h   |  9 +++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
index 5d35a96b14..2f40de1a1c 100644
--- a/src/core/host_timing.cpp
+++ b/src/core/host_timing.cpp
@@ -36,7 +36,8 @@ struct CoreTiming::Event {
 };
 
 CoreTiming::CoreTiming() {
-    clock = Common::CreateBestMatchingClock(Core::Timing::BASE_CLOCK_RATE, Core::Timing::CNTFREQ);
+    clock =
+        Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
 }
 
 CoreTiming::~CoreTiming() = default;
@@ -110,6 +111,14 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
     basic_lock.unlock();
 }
 
+void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
+    ticks_count[core_index] += ticks;
+}
+
+void CoreTiming::ResetTicks(std::size_t core_index) {
+    ticks_count[core_index] = 0;
+}
+
 u64 CoreTiming::GetCPUTicks() const {
     return clock->GetCPUCycles();
 }
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index cd44b308cb..5ad8c5f358 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <chrono>
 #include <functional>
 #include <memory>
@@ -18,6 +19,7 @@
 #include "common/thread.h"
 #include "common/threadsafe_queue.h"
 #include "common/wall_clock.h"
+#include "core/hardware_properties.h"
 
 namespace Core::HostTiming {
 
@@ -91,6 +93,11 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
+
+    void AddTicks(std::size_t core_index, u64 ticks);
+
+    void ResetTicks(std::size_t core_index);
+
     /// Returns current time in emulated CPU cycles
     u64 GetCPUTicks() const;
 
@@ -138,6 +145,8 @@ private:
     std::atomic<bool> wait_set{};
     std::atomic<bool> shutting_down{};
     std::atomic<bool> has_started{};
+
+    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
 };
 
 /// Creates a core timing event with the given name and callback.

From 3ce6df190be1e54fba4ff00444da845f6d23d122 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 22 Feb 2020 12:56:33 -0400
Subject: [PATCH 023/122] Host Timing: Correct clang format.

---
 src/core/host_timing.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/core/host_timing.h b/src/core/host_timing.h
index 5ad8c5f358..be6b68d7cc 100644
--- a/src/core/host_timing.h
+++ b/src/core/host_timing.h
@@ -93,7 +93,6 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
-
     void AddTicks(std::size_t core_index, u64 ticks);
 
     void ResetTicks(std::size_t core_index);

From 04e3077659b313904626e2c0ec09f634c15a4aa5 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 14:39:27 -0400
Subject: [PATCH 024/122] Common/Fiber: Correct f_context based Fibers.

---
 src/common/fiber.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e9c0946b6a..3ef820c626 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -81,10 +81,10 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024 * 4; // 4MB
+constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB
 
-struct alignas(64) Fiber::FiberImpl {
-    std::array<u8, default_stack_size> stack;
+struct Fiber::FiberImpl {
+    alignas(64) std::array<u8, default_stack_size> stack;
     boost::context::detail::fcontext_t context;
 };
 
@@ -106,8 +106,10 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    impl->context = boost::context::detail::make_fcontext(impl->stack.data(), impl->stack.size(),
-                                                          FiberStartFunc);
+    void* stack_start =
+        static_cast<void*>(static_cast<std::uintptr_t>(impl->stack.data()) + default_stack_size);
+    impl->context =
+        boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc);
 }
 
 Fiber::Fiber() {
@@ -136,7 +138,7 @@ void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
-    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, nullptr);
+    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
     auto previous_fiber = from->previous_fiber;
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;

From cb021f6a710f2a58e2fd2f49c6945dd760a81b43 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 17:34:23 -0400
Subject: [PATCH 025/122] Common/Fiber: Additional corrections to f_context.

---
 src/common/fiber.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 3ef820c626..e4ecc73df3 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -81,7 +81,7 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024; // 4MB
+constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
@@ -106,10 +106,10 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    void* stack_start =
-        static_cast<void*>(static_cast<std::uintptr_t>(impl->stack.data()) + default_stack_size);
+    u8* stack_limit = impl->stack.data();
+    u8* stack_base = stack_limit + default_stack_size;
     impl->context =
-        boost::context::detail::make_fcontext(stack_start, impl->stack.size(), FiberStartFunc);
+        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }
 
 Fiber::Fiber() {

From cc19edfd56461fa6dc53c47e885ea24e43aeeb55 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 16:32:47 -0400
Subject: [PATCH 026/122] Common/Fiber: Implement Rewinding.

---
 src/common/fiber.cpp        | 32 ++++++++++++++++++++++++--
 src/common/fiber.h          |  8 +++++++
 src/tests/common/fibers.cpp | 46 +++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index e4ecc73df3..f61479e132 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -12,10 +12,13 @@
 
 namespace Common {
 
+constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
+
 #if defined(_WIN32) || defined(WIN32)
 
 struct Fiber::FiberImpl {
     LPVOID handle = nullptr;
+    LPVOID rewind_handle = nullptr;
 };
 
 void Fiber::start() {
@@ -26,15 +29,29 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
+void Fiber::onRewind() {
+    ASSERT(impl->handle != nullptr);
+    DeleteFiber(impl->handle);
+    impl->handle = impl->rewind_handle;
+    impl->rewind_handle = nullptr;
+    rewind_point(rewind_parameter);
+    UNREACHABLE();
+}
+
 void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
     fiber->start();
 }
 
+void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) {
+    auto fiber = static_cast<Fiber*>(fiber_parameter);
+    fiber->onRewind();
+}
+
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
-    impl->handle = CreateFiber(0, &FiberStartFunc, this);
+    impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
 }
 
 Fiber::Fiber() {
@@ -60,6 +77,18 @@ void Fiber::Exit() {
     guard.unlock();
 }
 
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+    rewind_point = std::move(rewind_func);
+    rewind_parameter = start_parameter;
+}
+
+void Fiber::Rewind() {
+    ASSERT(rewind_point);
+    ASSERT(impl->rewind_handle == nullptr);
+    impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
+    SwitchToFiber(impl->rewind_handle);
+}
+
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
@@ -81,7 +110,6 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 }
 
 #else
-constexpr std::size_t default_stack_size = 1024 * 1024; // 1MB
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 7e3b130a46..a710df257a 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -46,6 +46,10 @@ public:
     static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
     static std::shared_ptr<Fiber> ThreadToFiber();
 
+    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+
+    void Rewind();
+
     /// Only call from main thread's fiber
     void Exit();
 
@@ -58,8 +62,10 @@ private:
     Fiber();
 
 #if defined(_WIN32) || defined(WIN32)
+    void onRewind();
     void start();
     static void FiberStartFunc(void* fiber_parameter);
+    static void RewindStartFunc(void* fiber_parameter);
 #else
     void start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
@@ -69,6 +75,8 @@ private:
 
     SpinLock guard{};
     std::function<void(void*)> entry_point{};
+    std::function<void(void*)> rewind_point{};
+    void* rewind_parameter{};
     void* start_parameter{};
     std::shared_ptr<Fiber> previous_fiber{};
     std::unique_ptr<FiberImpl> impl;
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index 0d3d5153d6..12536b6d88 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -309,4 +309,50 @@ TEST_CASE("Fibers::StartRace", "[common]") {
     REQUIRE(test_control.value3 == 1);
 }
 
+class TestControl4;
+
+static void WorkControl4(void* control);
+
+class TestControl4 {
+public:
+    TestControl4() {
+        fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this);
+        goal_reached = false;
+        rewinded = false;
+    }
+
+    void Execute() {
+        thread_fiber = Fiber::ThreadToFiber();
+        Fiber::YieldTo(thread_fiber, fiber1);
+        thread_fiber->Exit();
+    }
+
+    void DoWork() {
+        fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this);
+        if (rewinded) {
+            goal_reached = true;
+            Fiber::YieldTo(fiber1, thread_fiber);
+        }
+        rewinded = true;
+        fiber1->Rewind();
+    }
+
+    std::shared_ptr<Common::Fiber> fiber1;
+    std::shared_ptr<Common::Fiber> thread_fiber;
+    bool goal_reached;
+    bool rewinded;
+};
+
+static void WorkControl4(void* control) {
+    auto* test_control = static_cast<TestControl4*>(control);
+    test_control->DoWork();
+}
+
+TEST_CASE("Fibers::Rewind", "[common]") {
+    TestControl4 test_control{};
+    test_control.Execute();
+    REQUIRE(test_control.goal_reached);
+    REQUIRE(test_control.rewinded);
+}
+
 } // namespace Common

From d99ce728ee61de47e6c8146697af7ce04139d158 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 11:24:08 -0400
Subject: [PATCH 027/122] Common/Fiber: Document fiber interexchange.

---
 src/common/fiber.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/common/fiber.h b/src/common/fiber.h
index a710df257a..3bbd506b50 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -28,7 +28,10 @@ namespace Common {
  * to implement such patterns. This fiber class is 'threadsafe' only one fiber
  * can be running at a time and threads will be locked while trying to yield to
  * a running fiber until it yields. WARNING exchanging two running fibers between
- * threads will cause a deadlock.
+ * threads will cause a deadlock. In order to prevent a deadlock, each thread should
+ * have an intermediary fiber, you switch to the intermediary fiber of the current
+ * thread and then from it switch to the expected fiber. This way you can exchange
+ * 2 fibers within 2 different threads.
  */
 class Fiber {
 public:

From 63ec12c83ea9e8aecc16f75f8c1afff801e01f00 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 12 Mar 2020 20:10:51 -0400
Subject: [PATCH 028/122] Common/uint128: Correct MSVC Compilation in old
 versions.

---
 src/common/uint128.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 7e77588db1..16bf7c8283 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -19,7 +19,11 @@ u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
     u128 r{};
     r[0] = _umul128(a, b, &r[1]);
     u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], d, &remainder);
+#else
     return _udiv128(r[1], r[0], d, &remainder);
+#endif
 }
 
 #else

From 74b23a849cdfa9b2b5ffe6daa1885ab92512af3c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 1 Apr 2020 09:19:10 -0400
Subject: [PATCH 029/122] Common/Fiber: Implement Rewind on Boost Context.

---
 src/common/fiber.cpp | 39 +++++++++++++++++++++++++++++++++++++--
 src/common/fiber.h   |  2 ++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index f61479e132..6ea314d751 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -113,7 +113,11 @@ std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
 
 struct Fiber::FiberImpl {
     alignas(64) std::array<u8, default_stack_size> stack;
+    u8* stack_limit;
+    alignas(64) std::array<u8, default_stack_size> rewind_stack;
+    u8* rewind_stack_limit;
     boost::context::detail::fcontext_t context;
+    boost::context::detail::fcontext_t rewind_context;
 };
 
 void Fiber::start(boost::context::detail::transfer_t& transfer) {
@@ -125,21 +129,43 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
+void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) {
+    ASSERT(impl->context != nullptr);
+    impl->context = impl->rewind_context;
+    impl->rewind_context = nullptr;
+    u8* tmp = impl->stack_limit;
+    impl->stack_limit = impl->rewind_stack_limit;
+    impl->rewind_stack_limit = tmp;
+    rewind_point(rewind_parameter);
+    UNREACHABLE();
+}
+
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
     fiber->start(transfer);
 }
 
+void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
+    auto fiber = static_cast<Fiber*>(transfer.data);
+    fiber->onRewind(transfer);
+}
+
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
     : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
       previous_fiber{} {
     impl = std::make_unique<FiberImpl>();
-    u8* stack_limit = impl->stack.data();
-    u8* stack_base = stack_limit + default_stack_size;
+    impl->stack_limit = impl->stack.data();
+    impl->rewind_stack_limit = impl->rewind_stack.data();
+    u8* stack_base = impl->stack_limit + default_stack_size;
     impl->context =
         boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }
 
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+    rewind_point = std::move(rewind_func);
+    rewind_parameter = start_parameter;
+}
+
 Fiber::Fiber() {
     impl = std::make_unique<FiberImpl>();
 }
@@ -161,6 +187,15 @@ void Fiber::Exit() {
     guard.unlock();
 }
 
+void Fiber::Rewind() {
+    ASSERT(rewind_point);
+    ASSERT(impl->rewind_context == nullptr);
+    u8* stack_base = impl->rewind_stack_limit + default_stack_size;
+    impl->rewind_context =
+        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
+    boost::context::detail::jump_fcontext(impl->rewind_context, this);
+}
+
 void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 3bbd506b50..cab7bc4b5d 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -70,8 +70,10 @@ private:
     static void FiberStartFunc(void* fiber_parameter);
     static void RewindStartFunc(void* fiber_parameter);
 #else
+    void onRewind(boost::context::detail::transfer_t& transfer);
     void start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
+    static void RewindStartFunc(boost::context::detail::transfer_t transfer);
 #endif
 
     struct FiberImpl;

From 1333be6f6dc4d490e35666f3f81772382efefd95 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 13 May 2020 13:49:36 -0400
Subject: [PATCH 030/122] Common/Fiber: Address Feedback and Correct Memory
 leaks.

---
 src/common/fiber.cpp | 58 ++++++++++++++++++++++++--------------------
 src/common/fiber.h   | 17 +++++++------
 2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 6ea314d751..f97ad433b4 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -21,7 +21,7 @@ struct Fiber::FiberImpl {
     LPVOID rewind_handle = nullptr;
 };
 
-void Fiber::start() {
+void Fiber::Start() {
     ASSERT(previous_fiber != nullptr);
     previous_fiber->guard.unlock();
     previous_fiber.reset();
@@ -29,7 +29,7 @@ void Fiber::start() {
     UNREACHABLE();
 }
 
-void Fiber::onRewind() {
+void Fiber::OnRewind() {
     ASSERT(impl->handle != nullptr);
     DeleteFiber(impl->handle);
     impl->handle = impl->rewind_handle;
@@ -38,14 +38,14 @@ void Fiber::onRewind() {
     UNREACHABLE();
 }
 
-void __stdcall Fiber::FiberStartFunc(void* fiber_parameter) {
+void Fiber::FiberStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->start();
+    fiber->Start();
 }
 
-void __stdcall Fiber::RewindStartFunc(void* fiber_parameter) {
+void Fiber::RewindStartFunc(void* fiber_parameter) {
     auto fiber = static_cast<Fiber*>(fiber_parameter);
-    fiber->onRewind();
+    fiber->OnRewind();
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
@@ -59,8 +59,11 @@ Fiber::Fiber() {
 }
 
 Fiber::~Fiber() {
+    if (released) {
+        return;
+    }
     // Make sure the Fiber is not being used
-    bool locked = guard.try_lock();
+    const bool locked = guard.try_lock();
     ASSERT_MSG(locked, "Destroying a fiber that's still running");
     if (locked) {
         guard.unlock();
@@ -75,6 +78,7 @@ void Fiber::Exit() {
     }
     ConvertFiberToThread();
     guard.unlock();
+    released = true;
 }
 
 void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
@@ -89,22 +93,21 @@ void Fiber::Rewind() {
     SwitchToFiber(impl->rewind_handle);
 }
 
-void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     SwitchToFiber(to->impl->handle);
-    auto previous_fiber = from->previous_fiber;
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
+    ASSERT(from->previous_fiber != nullptr);
+    from->previous_fiber->guard.unlock();
+    from->previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
     std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
     fiber->guard.lock();
-    fiber->impl->handle = ConvertThreadToFiber(NULL);
+    fiber->impl->handle = ConvertThreadToFiber(nullptr);
     fiber->is_thread_fiber = true;
     return fiber;
 }
@@ -120,7 +123,7 @@ struct Fiber::FiberImpl {
     boost::context::detail::fcontext_t rewind_context;
 };
 
-void Fiber::start(boost::context::detail::transfer_t& transfer) {
+void Fiber::Start(boost::context::detail::transfer_t& transfer) {
     ASSERT(previous_fiber != nullptr);
     previous_fiber->impl->context = transfer.fctx;
     previous_fiber->guard.unlock();
@@ -129,7 +132,7 @@ void Fiber::start(boost::context::detail::transfer_t& transfer) {
     UNREACHABLE();
 }
 
-void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transfer) {
+void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
     ASSERT(impl->context != nullptr);
     impl->context = impl->rewind_context;
     impl->rewind_context = nullptr;
@@ -142,17 +145,16 @@ void Fiber::onRewind(boost::context::detail::transfer_t& [[maybe_unused]] transf
 
 void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
-    fiber->start(transfer);
+    fiber->Start(transfer);
 }
 
 void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
     auto fiber = static_cast<Fiber*>(transfer.data);
-    fiber->onRewind(transfer);
+    fiber->OnRewind(transfer);
 }
 
 Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
-    : guard{}, entry_point{std::move(entry_point_func)}, start_parameter{start_parameter},
-      previous_fiber{} {
+    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
     impl = std::make_unique<FiberImpl>();
     impl->stack_limit = impl->stack.data();
     impl->rewind_stack_limit = impl->rewind_stack.data();
@@ -171,8 +173,11 @@ Fiber::Fiber() {
 }
 
 Fiber::~Fiber() {
+    if (released) {
+        return;
+    }
     // Make sure the Fiber is not being used
-    bool locked = guard.try_lock();
+    const bool locked = guard.try_lock();
     ASSERT_MSG(locked, "Destroying a fiber that's still running");
     if (locked) {
         guard.unlock();
@@ -180,11 +185,13 @@ Fiber::~Fiber() {
 }
 
 void Fiber::Exit() {
+
     ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
     if (!is_thread_fiber) {
         return;
     }
     guard.unlock();
+    released = true;
 }
 
 void Fiber::Rewind() {
@@ -196,17 +203,16 @@ void Fiber::Rewind() {
     boost::context::detail::jump_fcontext(impl->rewind_context, this);
 }
 
-void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
     ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
     ASSERT_MSG(to != nullptr, "Next fiber is null!");
     to->guard.lock();
     to->previous_fiber = from;
     auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
-    auto previous_fiber = from->previous_fiber;
-    ASSERT(previous_fiber != nullptr);
-    previous_fiber->impl->context = transfer.fctx;
-    previous_fiber->guard.unlock();
-    previous_fiber.reset();
+    ASSERT(from->previous_fiber != nullptr);
+    from->previous_fiber->impl->context = transfer.fctx;
+    from->previous_fiber->guard.unlock();
+    from->previous_fiber.reset();
 }
 
 std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
diff --git a/src/common/fiber.h b/src/common/fiber.h
index cab7bc4b5d..dafc1100e5 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -46,7 +46,7 @@ public:
 
     /// Yields control from Fiber 'from' to Fiber 'to'
     /// Fiber 'from' must be the currently running fiber.
-    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
+    static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
     static std::shared_ptr<Fiber> ThreadToFiber();
 
     void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
@@ -65,13 +65,13 @@ private:
     Fiber();
 
 #if defined(_WIN32) || defined(WIN32)
-    void onRewind();
-    void start();
+    void OnRewind();
+    void Start();
     static void FiberStartFunc(void* fiber_parameter);
     static void RewindStartFunc(void* fiber_parameter);
 #else
-    void onRewind(boost::context::detail::transfer_t& transfer);
-    void start(boost::context::detail::transfer_t& transfer);
+    void OnRewind(boost::context::detail::transfer_t& transfer);
+    void Start(boost::context::detail::transfer_t& transfer);
     static void FiberStartFunc(boost::context::detail::transfer_t transfer);
     static void RewindStartFunc(boost::context::detail::transfer_t transfer);
 #endif
@@ -79,13 +79,14 @@ private:
     struct FiberImpl;
 
     SpinLock guard{};
-    std::function<void(void*)> entry_point{};
-    std::function<void(void*)> rewind_point{};
+    std::function<void(void*)> entry_point;
+    std::function<void(void*)> rewind_point;
     void* rewind_parameter{};
     void* start_parameter{};
-    std::shared_ptr<Fiber> previous_fiber{};
+    std::shared_ptr<Fiber> previous_fiber;
     std::unique_ptr<FiberImpl> impl;
     bool is_thread_fiber{};
+    bool released{};
 };
 
 } // namespace Common

From 8a45acb7e60656d9fe06db290336c38a7fa59328 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 14 May 2020 14:10:49 -0400
Subject: [PATCH 031/122] Tests/HostTiming: Correct GCC Compile error.

---
 src/tests/core/host_timing.cpp | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
index ed060be55c..5562540983 100644
--- a/src/tests/core/host_timing.cpp
+++ b/src/tests/core/host_timing.cpp
@@ -22,7 +22,6 @@ static std::array<s64, 5> delays{};
 
 static std::bitset<CB_IDS.size()> callbacks_ran_flags;
 static u64 expected_callback = 0;
-static s64 lateness = 0;
 
 template <unsigned int IDX>
 void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
@@ -34,8 +33,6 @@ void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
     ++expected_callback;
 }
 
-static u64 callbacks_done = 0;
-
 struct ScopeInit final {
     ScopeInit() {
         core_timing.Initialize();
@@ -47,6 +44,20 @@ struct ScopeInit final {
     Core::HostTiming::CoreTiming core_timing;
 };
 
+#pragma optimize("", off)
+
+static u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+
+#pragma optimize("", on)
+
 TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
@@ -85,18 +96,6 @@ TEST_CASE("HostTiming[BasicOrder]", "[core]") {
     }
 }
 
-#pragma optimize("", off)
-u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
-    u64 start = core_timing.GetGlobalTimeNs().count();
-    u64 placebo = 0;
-    for (std::size_t i = 0; i < 1000; i++) {
-        placebo += core_timing.GetGlobalTimeNs().count();
-    }
-    u64 end = core_timing.GetGlobalTimeNs().count();
-    return (end - start);
-}
-#pragma optimize("", on)
-
 TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;

From 1c1a5a3401049adb87f5e4584058c53d95996579 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 24 Feb 2020 22:04:12 -0400
Subject: [PATCH 032/122] General: Recover Prometheus project from harddrive
 failure

This commit: Implements CPU Interrupts, Replaces Cycle Timing for Host
Timing, Reworks the Kernel's Scheduler, Introduce Idle State and
Suspended State, Recreates the bootmanager, Initializes Multicore
system.
---
 src/common/thread.cpp                         |   6 +
 src/core/CMakeLists.txt                       |   4 +-
 src/core/arm/arm_interface.h                  |   5 +-
 src/core/arm/cpu_interrupt_handler.cpp        |  29 ++
 src/core/arm/cpu_interrupt_handler.h          |  39 ++
 src/core/arm/dynarmic/arm_dynarmic_32.cpp     |   6 +-
 src/core/arm/dynarmic/arm_dynarmic_32.h       |   4 +-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp     |  28 +-
 src/core/arm/dynarmic/arm_dynarmic_64.h       |   4 +-
 src/core/arm/unicorn/arm_unicorn.cpp          |  14 +-
 src/core/arm/unicorn/arm_unicorn.h            |   3 +-
 src/core/core.cpp                             |  57 +--
 src/core/core.h                               |  34 +-
 src/core/core_manager.cpp                     |   4 +-
 src/core/core_timing.cpp                      | 212 ++++-----
 src/core/core_timing.h                        | 106 ++---
 src/core/cpu_manager.cpp                      | 196 ++++++--
 src/core/cpu_manager.h                        |  51 ++-
 src/core/hle/kernel/kernel.cpp                |  84 +++-
 src/core/hle/kernel/kernel.h                  |  19 +
 src/core/hle/kernel/physical_core.cpp         |  37 +-
 src/core/hle/kernel/physical_core.h           |  21 +
 src/core/hle/kernel/process.cpp               |  17 +-
 src/core/hle/kernel/scheduler.cpp             | 419 +++++++++++++-----
 src/core/hle/kernel/scheduler.h               |  98 ++--
 src/core/hle/kernel/svc.cpp                   |  21 +-
 src/core/hle/kernel/thread.cpp                | 232 ++++------
 src/core/hle/kernel/thread.h                  |  81 +++-
 src/core/hle/kernel/time_manager.cpp          |   2 +-
 .../hle/service/hid/controllers/debug_pad.cpp |   2 +-
 .../hle/service/hid/controllers/gesture.cpp   |   2 +-
 .../hle/service/hid/controllers/keyboard.cpp  |   2 +-
 .../hle/service/hid/controllers/mouse.cpp     |   2 +-
 src/core/hle/service/hid/controllers/npad.cpp |   2 +-
 .../hle/service/hid/controllers/stubbed.cpp   |   2 +-
 .../service/hid/controllers/touchscreen.cpp   |   4 +-
 src/core/hle/service/hid/controllers/xpad.cpp |   2 +-
 src/core/hle/service/hid/hid.cpp              |  16 +-
 src/core/hle/service/hid/irs.cpp              |   2 +-
 .../service/nvdrv/devices/nvhost_ctrl_gpu.cpp |   3 +-
 src/core/hle/service/nvflinger/nvflinger.cpp  |  13 +-
 .../time/standard_steady_clock_core.cpp       |   5 +-
 .../time/tick_based_steady_clock_core.cpp     |   5 +-
 src/core/hle/service/time/time.cpp            |   5 +-
 .../hle/service/time/time_sharedmemory.cpp    |   3 +-
 src/core/memory.cpp                           |  11 +-
 src/core/memory.h                             |   2 +-
 src/core/memory/cheat_engine.cpp              |   8 +-
 src/core/tools/freezer.cpp                    |   8 +-
 src/tests/CMakeLists.txt                      |   1 -
 src/tests/core/core_timing.cpp                | 186 ++++----
 src/video_core/gpu.cpp                        |   5 +-
 src/yuzu/bootmanager.cpp                      |  32 +-
 src/yuzu/bootmanager.h                        |   7 +
 src/yuzu/debugger/wait_tree.cpp               |   6 +-
 src/yuzu_cmd/yuzu.cpp                         |   2 +-
 src/yuzu_tester/yuzu.cpp                      |   2 +-
 57 files changed, 1349 insertions(+), 824 deletions(-)
 create mode 100644 src/core/arm/cpu_interrupt_handler.cpp
 create mode 100644 src/core/arm/cpu_interrupt_handler.h

diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 0cd2d10bff..c9684aed93 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -70,6 +70,12 @@ void SetCurrentThreadName(const char* name) {
 }
 #endif
 
+#if defined(_WIN32)
+void SetCurrentThreadName(const char* name) {
+    // Do Nothing on MingW
+}
+#endif
+
 #endif
 
 } // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index c0d0683769..e65524b8a9 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -7,6 +7,8 @@ endif()
 add_library(core STATIC
     arm/arm_interface.h
     arm/arm_interface.cpp
+    arm/cpu_interrupt_handler.cpp
+    arm/cpu_interrupt_handler.h
     arm/exclusive_monitor.cpp
     arm/exclusive_monitor.h
     arm/unicorn/arm_unicorn.cpp
@@ -547,8 +549,6 @@ add_library(core STATIC
     hle/service/vi/vi_u.h
     hle/service/wlan/wlan.cpp
     hle/service/wlan/wlan.h
-    host_timing.cpp
-    host_timing.h
     loader/deconstructed_rom_directory.cpp
     loader/deconstructed_rom_directory.h
     loader/elf.cpp
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index cb2e640e2b..87a1c29cc9 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -18,11 +18,13 @@ enum class VMAPermission : u8;
 
 namespace Core {
 class System;
+class CPUInterruptHandler;
 
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_) : system{system_} {}
+    explicit ARM_Interface(System& system_, CPUInterruptHandler& interrupt_handler)
+        : system{system_}, interrupt_handler{interrupt_handler} {}
     virtual ~ARM_Interface() = default;
 
     struct ThreadContext32 {
@@ -175,6 +177,7 @@ public:
 protected:
     /// System context that this ARM interface is running under.
     System& system;
+    CPUInterruptHandler& interrupt_handler;
 };
 
 } // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp
new file mode 100644
index 0000000000..2f1a1a269b
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.cpp
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/thread.h"
+#include "core/arm/cpu_interrupt_handler.h"
+
+namespace Core {
+
+CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} {
+    interrupt_event = std::make_unique<Common::Event>();
+}
+
+CPUInterruptHandler::~CPUInterruptHandler() = default;
+
+void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) {
+    if (is_interrupted_) {
+        interrupt_event->Set();
+    }
+    this->is_interrupted = is_interrupted_;
+}
+
+void CPUInterruptHandler::AwaitInterrupt() {
+    interrupt_event->Wait();
+}
+
+} // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h
new file mode 100644
index 0000000000..91c31a2710
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.h
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+namespace Common {
+class Event;
+}
+
+namespace Core {
+
+class CPUInterruptHandler {
+public:
+    CPUInterruptHandler();
+    ~CPUInterruptHandler();
+
+    CPUInterruptHandler(const CPUInterruptHandler&) = delete;
+    CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete;
+
+    CPUInterruptHandler(CPUInterruptHandler&&) = default;
+    CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default;
+
+    constexpr bool IsInterrupted() const {
+        return is_interrupted;
+    }
+
+    void SetInterrupt(bool is_interrupted);
+
+    void AwaitInterrupt();
+
+private:
+    bool is_interrupted{};
+    std::unique_ptr<Common::Event> interrupt_event;
+};
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9a..a081680f1d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -115,9 +115,9 @@ void ARM_Dynarmic_32::Step() {
     cb->InterpreterFallback(jit->Regs()[15], 1);
 }
 
-ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
-                                 std::size_t core_index)
-    : ARM_Interface{system},
+ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler,
+                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler},
       cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
       exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f1..e1e4882223 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -21,13 +21,15 @@ class Memory;
 
 namespace Core {
 
+class CPUInterruptHandler;
 class DynarmicCallbacks32;
 class DynarmicExclusiveMonitor;
 class System;
 
 class ARM_Dynarmic_32 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler,
+                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_32() override;
 
     void SetPC(u64 pc) override;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 337b97be94..2d0a9b6f09 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/page_table.h"
+#include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/core.h"
 #include "core/core_manager.h"
@@ -108,23 +109,16 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
-        // rough approximation of the amount of executed ticks in the system, it may be thrown off
-        // if not all cores are doing a similar amount of work. Instead of doing this, we should
-        // device a way so that timing is consistent across all cores without increasing the ticks 4
-        // times.
-        u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
-        // Always execute at least one tick.
-        amortized_ticks = std::max<u64>(amortized_ticks, 1);
-
-        parent.system.CoreTiming().AddTicks(amortized_ticks);
-        num_interpreted_instructions = 0;
+        /// We are using host timing, NOP
     }
     u64 GetTicksRemaining() override {
-        return std::max(parent.system.CoreTiming().GetDowncount(), s64{0});
+        if (!parent.interrupt_handler.IsInterrupted()) {
+            return 1000ULL;
+        }
+        return 0ULL;
     }
     u64 GetCNTPCT() override {
-        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
+        return parent.system.CoreTiming().GetClockTicks();
     }
 
     ARM_Dynarmic_64& parent;
@@ -183,10 +177,10 @@ void ARM_Dynarmic_64::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
-                                 std::size_t core_index)
-    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
-      inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
+ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler,
+                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
+      inner_unicorn{system, interrupt_handler, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
       exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
 ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 647cecaf0c..9e94b58c2d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -22,12 +22,14 @@ class Memory;
 namespace Core {
 
 class DynarmicCallbacks64;
+class CPUInterruptHandler;
 class DynarmicExclusiveMonitor;
 class System;
 
 class ARM_Dynarmic_64 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler,
+                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_64() override;
 
     void SetPC(u64 pc) override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index e40e9626a3..0393fe641d 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -6,6 +6,7 @@
 #include <unicorn/arm64.h>
 #include "common/assert.h"
 #include "common/microprofile.h"
+#include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@@ -62,7 +63,8 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return false;
 }
 
-ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} {
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture)
+    : ARM_Interface{system, interrupt_handler} {
     const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
     CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
 
@@ -160,8 +162,12 @@ void ARM_Unicorn::Run() {
     if (GDBStub::IsServerEnabled()) {
         ExecuteInstructions(std::max(4000000U, 0U));
     } else {
-        ExecuteInstructions(
-            std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0}));
+        while (true) {
+            if (interrupt_handler.IsInterrupted()) {
+                return;
+            }
+            ExecuteInstructions(10);
+        }
     }
 }
 
@@ -183,8 +189,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
                            UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
     CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
     CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
-
-    system.CoreTiming().AddTicks(num_instructions);
     if (GDBStub::IsServerEnabled()) {
         if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
             uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 725c650851..0a4c087cd8 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -11,6 +11,7 @@
 
 namespace Core {
 
+class CPUInterruptHandler;
 class System;
 
 class ARM_Unicorn final : public ARM_Interface {
@@ -20,7 +21,7 @@ public:
         AArch64, // 64-bit ARM
     };
 
-    explicit ARM_Unicorn(System& system, Arch architecture);
+    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture);
     ~ARM_Unicorn() override;
 
     void SetPC(u64 pc) override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index f9f8a30009..e8936b09d5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -11,7 +11,6 @@
 #include "common/string_util.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
 #include "core/device_memory.h"
@@ -117,23 +116,30 @@ struct System::Impl {
         : kernel{system}, fs_controller{system}, memory{system},
           cpu_manager{system}, reporter{system}, applet_manager{system} {}
 
-    CoreManager& CurrentCoreManager() {
-        return cpu_manager.GetCurrentCoreManager();
-    }
-
     Kernel::PhysicalCore& CurrentPhysicalCore() {
-        const auto index = cpu_manager.GetActiveCoreIndex();
-        return kernel.PhysicalCore(index);
+        return kernel.CurrentPhysicalCore();
     }
 
     Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) {
         return kernel.PhysicalCore(index);
     }
 
-    ResultStatus RunLoop(bool tight_loop) {
+    ResultStatus Run() {
         status = ResultStatus::Success;
 
-        cpu_manager.RunLoop(tight_loop);
+        kernel.Suspend(false);
+        core_timing.SyncPause(false);
+        cpu_manager.Pause(false);
+
+        return status;
+    }
+
+    ResultStatus Pause() {
+        status = ResultStatus::Success;
+
+        kernel.Suspend(true);
+        core_timing.SyncPause(true);
+        cpu_manager.Pause(true);
 
         return status;
     }
@@ -143,7 +149,7 @@ struct System::Impl {
 
         device_memory = std::make_unique<Core::DeviceMemory>(system);
 
-        core_timing.Initialize();
+        core_timing.Initialize([&system]() { system.RegisterHostThread(); });
         kernel.Initialize();
         cpu_manager.Initialize();
 
@@ -387,20 +393,24 @@ struct System::Impl {
 System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;
 
-CoreManager& System::CurrentCoreManager() {
-    return impl->CurrentCoreManager();
+CpuManager& System::GetCpuManager() {
+    return impl->cpu_manager;
 }
 
-const CoreManager& System::CurrentCoreManager() const {
-    return impl->CurrentCoreManager();
+const CpuManager& System::GetCpuManager() const {
+    return impl->cpu_manager;
 }
 
-System::ResultStatus System::RunLoop(bool tight_loop) {
-    return impl->RunLoop(tight_loop);
+System::ResultStatus System::Run() {
+    return impl->Run();
+}
+
+System::ResultStatus System::Pause() {
+    return impl->Pause();
 }
 
 System::ResultStatus System::SingleStep() {
-    return RunLoop(false);
+    return ResultStatus::Success;
 }
 
 void System::InvalidateCpuInstructionCaches() {
@@ -444,7 +454,9 @@ const ARM_Interface& System::CurrentArmInterface() const {
 }
 
 std::size_t System::CurrentCoreIndex() const {
-    return impl->cpu_manager.GetActiveCoreIndex();
+    std::size_t core = impl->kernel.GetCurrentHostThreadID();
+    ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+    return core;
 }
 
 Kernel::Scheduler& System::CurrentScheduler() {
@@ -497,15 +509,6 @@ const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
     return impl->GetPhysicalCore(core_index).ArmInterface();
 }
 
-CoreManager& System::GetCoreManager(std::size_t core_index) {
-    return impl->cpu_manager.GetCoreManager(core_index);
-}
-
-const CoreManager& System::GetCoreManager(std::size_t core_index) const {
-    ASSERT(core_index < NUM_CPU_CORES);
-    return impl->cpu_manager.GetCoreManager(core_index);
-}
-
 ExclusiveMonitor& System::Monitor() {
     return impl->kernel.GetExclusiveMonitor();
 }
diff --git a/src/core/core.h b/src/core/core.h
index acc53d6a1f..7f170fc54e 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -90,7 +90,7 @@ class InterruptManager;
 namespace Core {
 
 class ARM_Interface;
-class CoreManager;
+class CpuManager;
 class DeviceMemory;
 class ExclusiveMonitor;
 class FrameLimiter;
@@ -136,16 +136,18 @@ public:
     };
 
     /**
-     * Run the core CPU loop
-     * This function runs the core for the specified number of CPU instructions before trying to
-     * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU
-     * is not required to do a full dispatch with each instruction. NOTE: the number of instructions
-     * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware
-     * update is requested (e.g. on a thread switch).
-     * @param tight_loop If false, the CPU single-steps.
-     * @return Result status, indicating whether or not the operation succeeded.
+     * Run the OS and Application
+     * This function will start emulation and run the competent devices
      */
-    ResultStatus RunLoop(bool tight_loop = true);
+    ResultStatus Run();
+
+    /**
+     * Pause the OS and Application
+     * This function will pause emulation and stop the competent devices
+     */
+    ResultStatus Pause();
+
+
 
     /**
      * Step the CPU one instruction
@@ -215,11 +217,9 @@ public:
     /// Gets a const reference to an ARM interface from the CPU core with the specified index
     const ARM_Interface& ArmInterface(std::size_t core_index) const;
 
-    /// Gets a CPU interface to the CPU core with the specified index
-    CoreManager& GetCoreManager(std::size_t core_index);
+    CpuManager& GetCpuManager();
 
-    /// Gets a CPU interface to the CPU core with the specified index
-    const CoreManager& GetCoreManager(std::size_t core_index) const;
+    const CpuManager& GetCpuManager() const;
 
     /// Gets a reference to the exclusive monitor
     ExclusiveMonitor& Monitor();
@@ -373,12 +373,6 @@ public:
 private:
     System();
 
-    /// Returns the currently running CPU core
-    CoreManager& CurrentCoreManager();
-
-    /// Returns the currently running CPU core
-    const CoreManager& CurrentCoreManager() const;
-
     /**
      * Initialize the emulated system.
      * @param emu_window Reference to the host-system window used for video output and keyboard
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
index b6b797c80b..45f0bb5470 100644
--- a/src/core/core_manager.cpp
+++ b/src/core/core_manager.cpp
@@ -34,7 +34,6 @@ void CoreManager::RunLoop(bool tight_loop) {
     // instead advance to the next event and try to yield to the next thread
     if (Kernel::GetCurrentThread() == nullptr) {
         LOG_TRACE(Core, "Core-{} idling", core_index);
-        core_timing.Idle();
     } else {
         if (tight_loop) {
             physical_core.Run();
@@ -42,7 +41,6 @@ void CoreManager::RunLoop(bool tight_loop) {
             physical_core.Step();
         }
     }
-    core_timing.Advance();
 
     Reschedule();
 }
@@ -59,7 +57,7 @@ void CoreManager::Reschedule() {
     // Lock the global kernel mutex when we manipulate the HLE state
     std::lock_guard lock(HLE::g_hle_lock);
 
-    global_scheduler.SelectThread(core_index);
+    // global_scheduler.SelectThread(core_index);
 
     physical_core.Scheduler().TryDoContextSwitch();
 }
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 46d4178c43..a3ce69790a 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -1,5 +1,5 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #include "core/core_timing.h"
@@ -10,20 +10,16 @@
 #include <tuple>
 
 #include "common/assert.h"
-#include "common/thread.h"
 #include "core/core_timing_util.h"
-#include "core/hardware_properties.h"
 
 namespace Core::Timing {
 
-constexpr int MAX_SLICE_LENGTH = 10000;
-
 std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
     return std::make_shared<EventType>(std::move(callback), std::move(name));
 }
 
 struct CoreTiming::Event {
-    s64 time;
+    u64 time;
     u64 fifo_order;
     u64 userdata;
     std::weak_ptr<EventType> type;
@@ -39,51 +35,74 @@ struct CoreTiming::Event {
     }
 };
 
-CoreTiming::CoreTiming() = default;
+CoreTiming::CoreTiming() {
+    clock =
+        Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
+}
+
 CoreTiming::~CoreTiming() = default;
 
-void CoreTiming::Initialize() {
-    downcounts.fill(MAX_SLICE_LENGTH);
-    time_slice.fill(MAX_SLICE_LENGTH);
-    slice_length = MAX_SLICE_LENGTH;
-    global_timer = 0;
-    idled_cycles = 0;
-    current_context = 0;
-
-    // The time between CoreTiming being initialized and the first call to Advance() is considered
-    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
-    // executing the first cycle of each slice to prepare the slice length and downcount for
-    // that slice.
-    is_global_timer_sane = true;
+void CoreTiming::ThreadEntry(CoreTiming& instance) {
+    std::string name = "yuzu:HostTiming";
+    Common::SetCurrentThreadName(name.c_str());
+    instance.on_thread_init();
+    instance.ThreadLoop();
+}
 
+void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
+    on_thread_init = std::move(on_thread_init_);
     event_fifo_id = 0;
-
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
+    timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
 }
 
 void CoreTiming::Shutdown() {
+    paused = true;
+    shutting_down = true;
+    event.Set();
+    timer_thread->join();
     ClearPendingEvents();
+    timer_thread.reset();
+    has_started = false;
 }
 
-void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
-                               u64 userdata) {
-    std::lock_guard guard{inner_mutex};
-    const s64 timeout = GetTicks() + cycles_into_future;
+void CoreTiming::Pause(bool is_paused) {
+    paused = is_paused;
+}
 
-    // If this event needs to be scheduled before the next advance(), force one early
-    if (!is_global_timer_sane) {
-        ForceExceptionCheck(cycles_into_future);
+void CoreTiming::SyncPause(bool is_paused) {
+    if (is_paused == paused && paused_set == paused) {
+        return;
     }
+    Pause(is_paused);
+    event.Set();
+    while (paused_set != is_paused)
+        ;
+}
+
+bool CoreTiming::IsRunning() const {
+    return !paused_set;
+}
+
+bool CoreTiming::HasPendingEvents() const {
+    return !(wait_set && event_queue.empty());
+}
+
+void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+                               u64 userdata) {
+    basic_lock.lock();
+    const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
 
     event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
 
     std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    basic_lock.unlock();
+    event.Set();
 }
 
 void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
-    std::lock_guard guard{inner_mutex};
-
+    basic_lock.lock();
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get() && e.userdata == userdata;
     });
@@ -93,23 +112,23 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
-u64 CoreTiming::GetTicks() const {
-    u64 ticks = static_cast<u64>(global_timer);
-    if (!is_global_timer_sane) {
-        ticks += accumulated_ticks;
-    }
-    return ticks;
+void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
+    ticks_count[core_index] += ticks;
 }
 
-u64 CoreTiming::GetIdleTicks() const {
-    return static_cast<u64>(idled_cycles);
+void CoreTiming::ResetTicks(std::size_t core_index) {
+    ticks_count[core_index] = 0;
 }
 
-void CoreTiming::AddTicks(u64 ticks) {
-    accumulated_ticks += ticks;
-    downcounts[current_context] -= static_cast<s64>(ticks);
+u64 CoreTiming::GetCPUTicks() const {
+    return clock->GetCPUCycles();
+}
+
+u64 CoreTiming::GetClockTicks() const {
+    return clock->GetClockCycles();
 }
 
 void CoreTiming::ClearPendingEvents() {
@@ -117,7 +136,7 @@ void CoreTiming::ClearPendingEvents() {
 }
 
 void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
-    std::lock_guard guard{inner_mutex};
+    basic_lock.lock();
 
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get();
@@ -128,99 +147,64 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
         event_queue.erase(itr, event_queue.end());
         std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
     }
+    basic_lock.unlock();
 }
 
-void CoreTiming::ForceExceptionCheck(s64 cycles) {
-    cycles = std::max<s64>(0, cycles);
-    if (downcounts[current_context] <= cycles) {
-        return;
-    }
-
-    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
-    // here. Account for cycles already executed by adjusting the g.slice_length
-    downcounts[current_context] = static_cast<int>(cycles);
-}
-
-std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const {
-    const u64 original_context = current_context;
-    u64 next_context = (original_context + 1) % num_cpu_cores;
-    while (next_context != original_context) {
-        if (time_slice[next_context] >= needed_ticks) {
-            return {next_context};
-        } else if (time_slice[next_context] >= 0) {
-            return std::nullopt;
-        }
-        next_context = (next_context + 1) % num_cpu_cores;
-    }
-    return std::nullopt;
-}
-
-void CoreTiming::Advance() {
-    std::unique_lock<std::mutex> guard(inner_mutex);
-
-    const u64 cycles_executed = accumulated_ticks;
-    time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks);
-    global_timer += cycles_executed;
-
-    is_global_timer_sane = true;
+std::optional<u64> CoreTiming::Advance() {
+    advance_lock.lock();
+    basic_lock.lock();
+    global_timer = GetGlobalTimeNs().count();
 
     while (!event_queue.empty() && event_queue.front().time <= global_timer) {
         Event evt = std::move(event_queue.front());
         std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
         event_queue.pop_back();
-        inner_mutex.unlock();
+        basic_lock.unlock();
 
         if (auto event_type{evt.type.lock()}) {
             event_type->callback(evt.userdata, global_timer - evt.time);
         }
 
-        inner_mutex.lock();
+        basic_lock.lock();
     }
 
-    is_global_timer_sane = false;
-
-    // Still events left (scheduled in the future)
     if (!event_queue.empty()) {
-        const s64 needed_ticks =
-            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
-        const auto next_core = NextAvailableCore(needed_ticks);
-        if (next_core) {
-            downcounts[*next_core] = needed_ticks;
+        const u64 next_time = event_queue.front().time - global_timer;
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return next_time;
+    } else {
+        basic_lock.unlock();
+        advance_lock.unlock();
+        return std::nullopt;
+    }
+}
+
+void CoreTiming::ThreadLoop() {
+    has_started = true;
+    while (!shutting_down) {
+        while (!paused) {
+            paused_set = false;
+            const auto next_time = Advance();
+            if (next_time) {
+                std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
+                event.WaitFor(next_time_ns);
+            } else {
+                wait_set = true;
+                event.Wait();
+            }
+            wait_set = false;
         }
+        paused_set = true;
     }
-
-    accumulated_ticks = 0;
-
-    downcounts[current_context] = time_slice[current_context];
 }
 
-void CoreTiming::ResetRun() {
-    downcounts.fill(MAX_SLICE_LENGTH);
-    time_slice.fill(MAX_SLICE_LENGTH);
-    current_context = 0;
-    // Still events left (scheduled in the future)
-    if (!event_queue.empty()) {
-        const s64 needed_ticks =
-            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
-        downcounts[current_context] = needed_ticks;
-    }
-
-    is_global_timer_sane = false;
-    accumulated_ticks = 0;
-}
-
-void CoreTiming::Idle() {
-    accumulated_ticks += downcounts[current_context];
-    idled_cycles += downcounts[current_context];
-    downcounts[current_context] = 0;
+std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
+    return clock->GetTimeNS();
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE};
-}
-
-s64 CoreTiming::GetDowncount() const {
-    return downcounts[current_context];
+    return clock->GetTimeUS();
 }
 
 } // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index d50f4eb8a7..707c8ef0c2 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -1,19 +1,25 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #pragma once
 
+#include <atomic>
 #include <chrono>
 #include <functional>
 #include <memory>
 #include <mutex>
 #include <optional>
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/spin_lock.h"
+#include "common/thread.h"
 #include "common/threadsafe_queue.h"
+#include "common/wall_clock.h"
+#include "core/hardware_properties.h"
 
 namespace Core::Timing {
 
@@ -56,16 +62,30 @@ public:
 
     /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
     /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
-    void Initialize();
+    void Initialize(std::function<void(void)>&& on_thread_init_);
 
     /// Tears down all timing related functionality.
     void Shutdown();
 
-    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
-    /// event is scheduled earlier than the current values.
-    ///
-    /// Scheduling from a callback will not update the downcount until the Advance() completes.
-    void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
+    /// Pauses/Unpauses the execution of the timer thread.
+    void Pause(bool is_paused);
+
+    /// Pauses/Unpauses the execution of the timer thread and waits until paused.
+    void SyncPause(bool is_paused);
+
+    /// Checks if core timing is running.
+    bool IsRunning() const;
+
+    /// Checks if the timer thread has started.
+    bool HasStarted() const {
+        return has_started;
+    }
+
+    /// Checks if there are any pending time events.
+    bool HasPendingEvents() const;
+
+    /// Schedules an event in core timing
+    void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
                        u64 userdata = 0);
 
     void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
@@ -73,41 +93,24 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
-    void ForceExceptionCheck(s64 cycles);
+    void AddTicks(std::size_t core_index, u64 ticks);
 
-    /// This should only be called from the emu thread, if you are calling it any other thread,
-    /// you are doing something evil
-    u64 GetTicks() const;
+    void ResetTicks(std::size_t core_index);
 
-    u64 GetIdleTicks() const;
+    /// Returns current time in emulated CPU cycles
+    u64 GetCPUTicks() const;
 
-    void AddTicks(u64 ticks);
-
-    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
-    /// the previous timing slice and begins the next one, you must Advance from the previous
-    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
-    /// Advance() is required to initialize the slice length before the first cycle of emulated
-    /// instructions is executed.
-    void Advance();
-
-    /// Pretend that the main CPU has executed enough cycles to reach the next event.
-    void Idle();
+    /// Returns current time in emulated in Clock cycles
+    u64 GetClockTicks() const;
 
+    /// Returns current time in microseconds.
     std::chrono::microseconds GetGlobalTimeUs() const;
 
-    void ResetRun();
+    /// Returns current time in nanoseconds.
+    std::chrono::nanoseconds GetGlobalTimeNs() const;
 
-    s64 GetDowncount() const;
-
-    void SwitchContext(u64 new_context) {
-        current_context = new_context;
-    }
-
-    bool CanCurrentContextRun() const {
-        return time_slice[current_context] > 0;
-    }
-
-    std::optional<u64> NextAvailableCore(const s64 needed_ticks) const;
+    /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
+    std::optional<u64> Advance();
 
 private:
     struct Event;
@@ -115,21 +118,14 @@ private:
     /// Clear all pending events. This should ONLY be done on exit.
     void ClearPendingEvents();
 
-    static constexpr u64 num_cpu_cores = 4;
+    static void ThreadEntry(CoreTiming& instance);
+    void ThreadLoop();
 
-    s64 global_timer = 0;
-    s64 idled_cycles = 0;
-    s64 slice_length = 0;
-    u64 accumulated_ticks = 0;
-    std::array<s64, num_cpu_cores> downcounts{};
-    // Slice of time assigned to each core per run.
-    std::array<s64, num_cpu_cores> time_slice{};
-    u64 current_context = 0;
+    std::unique_ptr<Common::WallClock> clock;
 
-    // Are we in a function that has been called from Advance()
-    // If events are scheduled from a function that gets called from Advance(),
-    // don't change slice_length and downcount.
-    bool is_global_timer_sane = false;
+    u64 global_timer = 0;
+
+    std::chrono::nanoseconds start_point;
 
     // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
     // We don't use std::priority_queue because we need to be able to serialize, unserialize and
@@ -139,8 +135,18 @@ private:
     u64 event_fifo_id = 0;
 
     std::shared_ptr<EventType> ev_lost;
+    Common::Event event{};
+    Common::SpinLock basic_lock{};
+    Common::SpinLock advance_lock{};
+    std::unique_ptr<std::thread> timer_thread;
+    std::atomic<bool> paused{};
+    std::atomic<bool> paused_set{};
+    std::atomic<bool> wait_set{};
+    std::atomic<bool> shutting_down{};
+    std::atomic<bool> has_started{};
+    std::function<void(void)> on_thread_init{};
 
-    std::mutex inner_mutex;
+    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
 };
 
 /// Creates a core timing event with the given name and callback.
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 70ddbdcca7..4948509923 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -2,80 +2,192 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/fiber.h"
+#include "common/thread.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
 
 namespace Core {
 
 CpuManager::CpuManager(System& system) : system{system} {}
 CpuManager::~CpuManager() = default;
 
+void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
+    cpu_manager.RunThread(core);
+}
+
 void CpuManager::Initialize() {
-    for (std::size_t index = 0; index < core_managers.size(); ++index) {
-        core_managers[index] = std::make_unique<CoreManager>(system, index);
+    running_mode = true;
+    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        core_data[core].host_thread =
+            std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
     }
 }
 
 void CpuManager::Shutdown() {
-    for (auto& cpu_core : core_managers) {
-        cpu_core.reset();
+    running_mode = false;
+    Pause(false);
+    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        core_data[core].host_thread->join();
     }
 }
 
-CoreManager& CpuManager::GetCoreManager(std::size_t index) {
-    return *core_managers.at(index);
+void CpuManager::GuestThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    cpu_manager->RunGuestThread();
 }
 
-const CoreManager& CpuManager::GetCoreManager(std::size_t index) const {
-    return *core_managers.at(index);
+void CpuManager::IdleThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    cpu_manager->RunIdleThread();
 }
 
-CoreManager& CpuManager::GetCurrentCoreManager() {
-    // Otherwise, use single-threaded mode active_core variable
-    return *core_managers[active_core];
+void CpuManager::SuspendThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    cpu_manager->RunSuspendThread();
 }
 
-const CoreManager& CpuManager::GetCurrentCoreManager() const {
-    // Otherwise, use single-threaded mode active_core variable
-    return *core_managers[active_core];
+std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
+    return std::function<void(void*)>(GuestThreadFunction);
 }
 
-void CpuManager::RunLoop(bool tight_loop) {
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::HandlePacket();
+std::function<void(void*)> CpuManager::GetIdleThreadStartFunc() {
+    return std::function<void(void*)>(IdleThreadFunction);
+}
 
-        // If the loop is halted and we want to step, use a tiny (1) number of instructions to
-        // execute. Otherwise, get out of the loop function.
-        if (GDBStub::GetCpuHaltFlag()) {
-            if (GDBStub::GetCpuStepFlag()) {
-                tight_loop = false;
-            } else {
-                return;
+std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() {
+    return std::function<void(void*)>(SuspendThreadFunction);
+}
+
+void* CpuManager::GetStartFuncParamater() {
+    return static_cast<void*>(this);
+}
+
+void CpuManager::RunGuestThread() {
+    auto& kernel = system.Kernel();
+    {
+        auto& sched = kernel.CurrentScheduler();
+        sched.OnThreadStart();
+    }
+    while (true) {
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        LOG_CRITICAL(Core_ARM, "Running Guest Thread");
+        physical_core.Idle();
+        LOG_CRITICAL(Core_ARM, "Leaving Guest Thread");
+        // physical_core.Run();
+        auto& scheduler = physical_core.Scheduler();
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::RunIdleThread() {
+    auto& kernel = system.Kernel();
+    while (true) {
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        LOG_CRITICAL(Core_ARM, "Running Idle Thread");
+        physical_core.Idle();
+        auto& scheduler = physical_core.Scheduler();
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::RunSuspendThread() {
+    LOG_CRITICAL(Core_ARM, "Suspending Thread Entered");
+    auto& kernel = system.Kernel();
+    {
+        auto& sched = kernel.CurrentScheduler();
+        sched.OnThreadStart();
+    }
+    while (true) {
+        auto core = kernel.GetCurrentHostThreadID();
+        auto& scheduler = kernel.CurrentScheduler();
+        Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+        LOG_CRITICAL(Core_ARM, "Suspending Core {}", core);
+        Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
+        LOG_CRITICAL(Core_ARM, "Unsuspending Core {}", core);
+        ASSERT(scheduler.ContextSwitchPending());
+        ASSERT(core == kernel.GetCurrentHostThreadID());
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::Pause(bool paused) {
+    if (!paused) {
+        bool all_not_barrier = false;
+        while (!all_not_barrier) {
+            all_not_barrier = true;
+            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+                all_not_barrier &=
+                    !core_data[core].is_running.load() && core_data[core].initialized.load();
             }
         }
-    }
-
-    auto& core_timing = system.CoreTiming();
-    core_timing.ResetRun();
-    bool keep_running{};
-    do {
-        keep_running = false;
-        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
-            core_timing.SwitchContext(active_core);
-            if (core_timing.CanCurrentContextRun()) {
-                core_managers[active_core]->RunLoop(tight_loop);
-            }
-            keep_running |= core_timing.CanCurrentContextRun();
+        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+            core_data[core].enter_barrier->Set();
         }
-    } while (keep_running);
-
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::SetCpuStepFlag(false);
+        if (paused_state.load()) {
+            bool all_barrier = false;
+            while (!all_barrier) {
+                all_barrier = true;
+                for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+                    all_barrier &=
+                        core_data[core].is_paused.load() && core_data[core].initialized.load();
+                }
+            }
+            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+                core_data[core].exit_barrier->Set();
+            }
+        }
+    } else {
+        /// Wait until all cores are paused.
+        bool all_barrier = false;
+        while (!all_barrier) {
+            all_barrier = true;
+            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+                all_barrier &=
+                    core_data[core].is_paused.load() && core_data[core].initialized.load();
+            }
+        }
+        /// Don't release the barrier
     }
+    paused_state = paused;
+}
+
+void CpuManager::RunThread(std::size_t core) {
+    /// Initialization
+    system.RegisterCoreThread(core);
+    std::string name = "yuzu:CoreHostThread_" + std::to_string(core);
+    Common::SetCurrentThreadName(name.c_str());
+    auto& data = core_data[core];
+    data.enter_barrier = std::make_unique<Common::Event>();
+    data.exit_barrier = std::make_unique<Common::Event>();
+    data.host_context = Common::Fiber::ThreadToFiber();
+    data.is_running = false;
+    data.initialized = true;
+    /// Running
+    while (running_mode) {
+        data.is_running = false;
+        data.enter_barrier->Wait();
+        auto& scheduler = system.Kernel().CurrentScheduler();
+        Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+        data.is_running = true;
+        Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext());
+        data.is_running = false;
+        data.is_paused = true;
+        data.exit_barrier->Wait();
+        data.is_paused = false;
+    }
+    /// Time to cleanup
+    data.host_context->Exit();
+    data.enter_barrier.reset();
+    data.exit_barrier.reset();
+    data.initialized = false;
 }
 
 } // namespace Core
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 97554d1bb6..8103ae857d 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -5,12 +5,18 @@
 #pragma once
 
 #include <array>
+#include <functional>
 #include <memory>
+#include <thread>
 #include "core/hardware_properties.h"
 
+namespace Common {
+class Event;
+class Fiber;
+} // namespace Common
+
 namespace Core {
 
-class CoreManager;
 class System;
 
 class CpuManager {
@@ -27,21 +33,40 @@ public:
     void Initialize();
     void Shutdown();
 
-    CoreManager& GetCoreManager(std::size_t index);
-    const CoreManager& GetCoreManager(std::size_t index) const;
+    void Pause(bool paused);
 
-    CoreManager& GetCurrentCoreManager();
-    const CoreManager& GetCurrentCoreManager() const;
-
-    std::size_t GetActiveCoreIndex() const {
-        return active_core;
-    }
-
-    void RunLoop(bool tight_loop);
+    std::function<void(void*)> GetGuestThreadStartFunc();
+    std::function<void(void*)> GetIdleThreadStartFunc();
+    std::function<void(void*)> GetSuspendThreadStartFunc();
+    void* GetStartFuncParamater();
 
 private:
-    std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers;
-    std::size_t active_core{}; ///< Active core, only used in single thread mode
+    static void GuestThreadFunction(void* cpu_manager);
+    static void IdleThreadFunction(void* cpu_manager);
+    static void SuspendThreadFunction(void* cpu_manager);
+
+    void RunGuestThread();
+    void RunIdleThread();
+    void RunSuspendThread();
+
+    static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
+
+    void RunThread(std::size_t core);
+
+    struct CoreData {
+        std::shared_ptr<Common::Fiber> host_context;
+        std::unique_ptr<Common::Event> enter_barrier;
+        std::unique_ptr<Common::Event> exit_barrier;
+        std::atomic<bool> is_running;
+        std::atomic<bool> is_paused;
+        std::atomic<bool> initialized;
+        std::unique_ptr<std::thread> host_thread;
+    };
+
+    std::atomic<bool> running_mode{};
+    std::atomic<bool> paused_state{};
+
+    std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
 
     System& system;
 };
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7655382fac..ba051a7d80 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -13,11 +13,13 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/thread.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
 #include "core/device_memory.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/client_port.h"
@@ -117,7 +119,9 @@ struct KernelCore::Impl {
         InitializeSystemResourceLimit(kernel);
         InitializeMemoryLayout();
         InitializeThreads();
-        InitializePreemption();
+        InitializePreemption(kernel);
+        InitializeSchedulers();
+        InitializeSuspendThreads();
     }
 
     void Shutdown() {
@@ -155,6 +159,12 @@ struct KernelCore::Impl {
         }
     }
 
+    void InitializeSchedulers() {
+        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            cores[i].Scheduler().Initialize();
+        }
+    }
+
     // Creates the default system resource limit
     void InitializeSystemResourceLimit(KernelCore& kernel) {
         system_resource_limit = ResourceLimit::Create(kernel);
@@ -178,10 +188,13 @@ struct KernelCore::Impl {
             Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback);
     }
 
-    void InitializePreemption() {
-        preemption_event =
-            Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
-                global_scheduler.PreemptThreads();
+    void InitializePreemption(KernelCore& kernel) {
+        preemption_event = Core::Timing::CreateEvent(
+            "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) {
+                {
+                    SchedulerLock lock(kernel);
+                    global_scheduler.PreemptThreads();
+                }
                 s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
                 system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
             });
@@ -190,6 +203,20 @@ struct KernelCore::Impl {
         system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
     }
 
+    void InitializeSuspendThreads() {
+        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            std::string name = "Suspend Thread Id:" + std::to_string(i);
+            std::function<void(void*)> init_func =
+                system.GetCpuManager().GetSuspendThreadStartFunc();
+            void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+            ThreadType type =
+                static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND);
+            auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast<u32>(i), 0,
+                                             nullptr, std::move(init_func), init_func_parameter);
+            suspend_threads[i] = std::move(thread_res).Unwrap();
+        }
+    }
+
     void MakeCurrentProcess(Process* process) {
         current_process = process;
 
@@ -201,7 +228,10 @@ struct KernelCore::Impl {
             core.SetIs64Bit(process->Is64BitProcess());
         }
 
-        system.Memory().SetCurrentPageTable(*process);
+        u32 core_id = GetCurrentHostThreadID();
+        if (core_id < Core::Hardware::NUM_CPU_CORES) {
+            system.Memory().SetCurrentPageTable(*process, core_id);
+        }
     }
 
     void RegisterCoreThread(std::size_t core_id) {
@@ -219,7 +249,9 @@ struct KernelCore::Impl {
         std::unique_lock lock{register_thread_mutex};
         const std::thread::id this_id = std::this_thread::get_id();
         const auto it = host_thread_ids.find(this_id);
-        ASSERT(it == host_thread_ids.end());
+        if (it != host_thread_ids.end()) {
+            return;
+        }
         host_thread_ids[this_id] = registered_thread_ids++;
     }
 
@@ -343,6 +375,8 @@ struct KernelCore::Impl {
     std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
     std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
 
+    std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
+
     // System context
     Core::System& system;
 };
@@ -412,6 +446,26 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
     return impl->cores[id];
 }
 
+Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() {
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
+}
+
+const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
+}
+
+Kernel::Scheduler& KernelCore::CurrentScheduler() {
+    return CurrentPhysicalCore().Scheduler();
+}
+
+const Kernel::Scheduler& KernelCore::CurrentScheduler() const {
+    return CurrentPhysicalCore().Scheduler();
+}
+
 Kernel::Synchronization& KernelCore::Synchronization() {
     return impl->synchronization;
 }
@@ -557,4 +611,20 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {
     return *impl->time_shared_mem;
 }
 
+void KernelCore::Suspend(bool in_suspention) {
+    const bool should_suspend = exception_exited || in_suspention;
+    {
+        SchedulerLock lock(*this);
+        ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
+        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            impl->suspend_threads[i]->SetStatus(status);
+        }
+    }
+}
+
+void KernelCore::ExceptionalExit() {
+    exception_exited = true;
+    Suspend(true);
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 83de1f5427..5d32a83294 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -110,6 +110,18 @@ public:
     /// Gets the an instance of the respective physical CPU core.
     const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
 
+    /// Gets the sole instance of the Scheduler at the current running core.
+    Kernel::Scheduler& CurrentScheduler();
+
+    /// Gets the sole instance of the Scheduler at the current running core.
+    const Kernel::Scheduler& CurrentScheduler() const;
+
+    /// Gets the an instance of the current physical CPU core.
+    Kernel::PhysicalCore& CurrentPhysicalCore();
+
+    /// Gets the an instance of the current physical CPU core.
+    const Kernel::PhysicalCore& CurrentPhysicalCore() const;
+
     /// Gets the an instance of the Synchronization Interface.
     Kernel::Synchronization& Synchronization();
 
@@ -191,6 +203,12 @@ public:
     /// Gets the shared memory object for Time services.
     const Kernel::SharedMemory& GetTimeSharedMem() const;
 
+    /// Suspend/unsuspend the OS.
+    void Suspend(bool in_suspention);
+
+    /// Exceptional exit the OS.
+    void ExceptionalExit();
+
 private:
     friend class Object;
     friend class Process;
@@ -219,6 +237,7 @@ private:
 
     struct Impl;
     std::unique_ptr<Impl> impl;
+    bool exception_exited{};
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index a150110767..69202540be 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -2,12 +2,15 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/spin_lock.h"
 #include "core/arm/arm_interface.h"
 #ifdef ARCHITECTURE_x86_64
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #endif
+#include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
@@ -19,21 +22,23 @@ namespace Kernel {
 
 PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
                            Core::ExclusiveMonitor& exclusive_monitor)
-    : core_index{id} {
+    : interrupt_handler{}, core_index{id} {
 #ifdef ARCHITECTURE_x86_64
-    arm_interface_32 =
-        std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index);
-    arm_interface_64 =
-        std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);
-
+    arm_interface_32 = std::make_unique<Core::ARM_Dynarmic_32>(system, interrupt_handler,
+                                                               exclusive_monitor, core_index);
+    arm_interface_64 = std::make_unique<Core::ARM_Dynarmic_64>(system, interrupt_handler,
+                                                               exclusive_monitor, core_index);
 #else
     using Core::ARM_Unicorn;
-    arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32);
-    arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64);
+    arm_interface_32 =
+        std::make_unique<ARM_Unicorn>(system, interrupt_handler, ARM_Unicorn::Arch::AArch32);
+    arm_interface_64 =
+        std::make_unique<ARM_Unicorn>(system, interrupt_handler, ARM_Unicorn::Arch::AArch64);
     LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
 
     scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
+    guard = std::make_unique<Common::SpinLock>();
 }
 
 PhysicalCore::~PhysicalCore() = default;
@@ -47,6 +52,10 @@ void PhysicalCore::Step() {
     arm_interface->Step();
 }
 
+void PhysicalCore::Idle() {
+    interrupt_handler.AwaitInterrupt();
+}
+
 void PhysicalCore::Stop() {
     arm_interface->PrepareReschedule();
 }
@@ -63,4 +72,16 @@ void PhysicalCore::SetIs64Bit(bool is_64_bit) {
     }
 }
 
+void PhysicalCore::Interrupt() {
+    guard->lock();
+    interrupt_handler.SetInterrupt(true);
+    guard->unlock();
+}
+
+void PhysicalCore::ClearInterrupt() {
+    guard->lock();
+    interrupt_handler.SetInterrupt(false);
+    guard->unlock();
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 3269166bec..c3da30b721 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -7,6 +7,12 @@
 #include <cstddef>
 #include <memory>
 
+#include "core/arm/cpu_interrupt_handler.h"
+
+namespace Common {
+    class SpinLock;
+}
+
 namespace Kernel {
 class Scheduler;
 } // namespace Kernel
@@ -32,11 +38,24 @@ public:
 
     /// Execute current jit state
     void Run();
+    /// Set this core in IdleState.
+    void Idle();
     /// Execute a single instruction in current jit.
     void Step();
     /// Stop JIT execution/exit
     void Stop();
 
+    /// Interrupt this physical core.
+    void Interrupt();
+
+    /// Clear this core's interrupt
+    void ClearInterrupt();
+
+    /// Check if this core is interrupted
+    bool IsInterrupted() const {
+        return interrupt_handler.IsInterrupted();
+    }
+
     // Shutdown this physical core.
     void Shutdown();
 
@@ -71,11 +90,13 @@ public:
     void SetIs64Bit(bool is_64_bit);
 
 private:
+    Core::CPUInterruptHandler interrupt_handler;
     std::size_t core_index;
     std::unique_ptr<Core::ARM_Interface> arm_interface_32;
     std::unique_ptr<Core::ARM_Interface> arm_interface_64;
     std::unique_ptr<Kernel::Scheduler> scheduler;
     Core::ARM_Interface* arm_interface{};
+    std::unique_ptr<Common::SpinLock> guard;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f5..b9719389ee 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -30,14 +30,15 @@ namespace {
 /**
  * Sets up the primary application thread
  *
+ * @param system The system instance to create the main thread under.
  * @param owner_process The parent process for the main thread
- * @param kernel The kernel instance to create the main thread under.
  * @param priority The priority to give the main thread
  */
-void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) {
+void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
     const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
-    auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
-                                     owner_process.GetIdealCore(), stack_top, owner_process);
+    ThreadType type = THREADTYPE_USER;
+    auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0,
+                                     owner_process.GetIdealCore(), stack_top, &owner_process);
 
     std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap();
 
@@ -48,8 +49,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V
     thread->GetContext32().cpu_registers[1] = thread_handle;
     thread->GetContext64().cpu_registers[1] = thread_handle;
 
+    auto& kernel = system.Kernel();
     // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
-    thread->ResumeFromWait();
+    {
+        SchedulerLock lock{kernel};
+        thread->SetStatus(ThreadStatus::Ready);
+    }
 }
 } // Anonymous namespace
 
@@ -292,7 +297,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
 
     ChangeStatus(ProcessStatus::Running);
 
-    SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top);
+    SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
     resource_limit->Reserve(ResourceType::Threads, 1);
     resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size);
 }
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 1140c72a34..5166020a00 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -11,11 +11,15 @@
 #include <utility>
 
 #include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/fiber.h"
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/cpu_manager.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/time_manager.h"
@@ -27,78 +31,108 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
 GlobalScheduler::~GlobalScheduler() = default;
 
 void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) {
+    global_list_guard.lock();
     thread_list.push_back(std::move(thread));
+    global_list_guard.unlock();
 }
 
 void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
+    global_list_guard.lock();
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
+    global_list_guard.unlock();
 }
 
-void GlobalScheduler::UnloadThread(std::size_t core) {
-    Scheduler& sched = kernel.Scheduler(core);
-    sched.UnloadThread();
-}
-
-void GlobalScheduler::SelectThread(std::size_t core) {
+u32 GlobalScheduler::SelectThreads() {
     const auto update_thread = [](Thread* thread, Scheduler& sched) {
+        sched.guard.lock();
         if (thread != sched.selected_thread.get()) {
             if (thread == nullptr) {
                 ++sched.idle_selection_count;
             }
             sched.selected_thread = SharedFrom(thread);
         }
-        sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;
+        const bool reschedule_pending = sched.selected_thread != sched.current_thread;
+        sched.is_context_switch_pending = reschedule_pending;
         std::atomic_thread_fence(std::memory_order_seq_cst);
+        sched.guard.unlock();
+        return reschedule_pending;
     };
-    Scheduler& sched = kernel.Scheduler(core);
-    Thread* current_thread = nullptr;
+    if (!is_reselection_pending.load()) {
+        return 0;
+    }
+    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{};
+
+    u32 idle_cores{};
+
     // Step 1: Get top thread in schedule queue.
-    current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
-    if (current_thread) {
-        update_thread(current_thread, sched);
-        return;
-    }
-    // Step 2: Try selecting a suggested thread.
-    Thread* winner = nullptr;
-    std::set<s32> sug_cores;
-    for (auto thread : suggested_queue[core]) {
-        s32 this_core = thread->GetProcessorID();
-        Thread* thread_on_core = nullptr;
-        if (this_core >= 0) {
-            thread_on_core = scheduled_queue[this_core].front();
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        Thread* top_thread =
+            scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
+        if (top_thread != nullptr) {
+            // TODO(Blinkhawk): Implement Thread Pinning
+        } else {
+            idle_cores |= (1ul << core);
         }
-        if (this_core < 0 || thread != thread_on_core) {
-            winner = thread;
-            break;
-        }
-        sug_cores.insert(this_core);
+        top_threads[core] = top_thread;
     }
-    // if we got a suggested thread, select it, else do a second pass.
-    if (winner && winner->GetPriority() > 2) {
-        if (winner->IsRunning()) {
-            UnloadThread(static_cast<u32>(winner->GetProcessorID()));
-        }
-        TransferToCore(winner->GetPriority(), static_cast<s32>(core), winner);
-        update_thread(winner, sched);
-        return;
-    }
-    // Step 3: Select a suggested thread from another core
-    for (auto& src_core : sug_cores) {
-        auto it = scheduled_queue[src_core].begin();
-        it++;
-        if (it != scheduled_queue[src_core].end()) {
-            Thread* thread_on_core = scheduled_queue[src_core].front();
-            Thread* to_change = *it;
-            if (thread_on_core->IsRunning() || to_change->IsRunning()) {
-                UnloadThread(static_cast<u32>(src_core));
+
+    while (idle_cores != 0) {
+        u32 core_id = Common::CountTrailingZeroes32(idle_cores);
+
+        if (!suggested_queue[core_id].empty()) {
+            std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{};
+            std::size_t num_candidates = 0;
+            auto iter = suggested_queue[core_id].begin();
+            Thread* suggested = nullptr;
+            // Step 2: Try selecting a suggested thread.
+            while (iter != suggested_queue[core_id].end()) {
+                suggested = *iter;
+                iter++;
+                s32 suggested_core_id = suggested->GetProcessorID();
+                Thread* top_thread =
+                    suggested_core_id > 0 ? top_threads[suggested_core_id] : nullptr;
+                if (top_thread != suggested) {
+                    if (top_thread != nullptr &&
+                        top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
+                        suggested = nullptr;
+                        break;
+                        // There's a too high thread to do core migration, cancel
+                    }
+                    TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
+                    break;
+                }
+                migration_candidates[num_candidates++] = suggested_core_id;
             }
-            TransferToCore(thread_on_core->GetPriority(), static_cast<s32>(core), thread_on_core);
-            current_thread = thread_on_core;
-            break;
+            // Step 3: Select a suggested thread from another core
+            if (suggested == nullptr) {
+                for (std::size_t i = 0; i < num_candidates; i++) {
+                    s32 candidate_core = migration_candidates[i];
+                    suggested = top_threads[candidate_core];
+                    auto it = scheduled_queue[candidate_core].begin();
+                    it++;
+                    Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr;
+                    if (next != nullptr) {
+                        TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id),
+                                       suggested);
+                        top_threads[candidate_core] = next;
+                        break;
+                    }
+                }
+            }
+            top_threads[core_id] = suggested;
+        }
+
+        idle_cores &= ~(1ul << core_id);
+    }
+    u32 cores_needing_context_switch{};
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        Scheduler& sched = kernel.Scheduler(core);
+        if (update_thread(top_threads[core], sched)) {
+            cores_needing_context_switch |= (1ul << core);
         }
     }
-    update_thread(current_thread, sched);
+    return cores_needing_context_switch;
 }
 
 bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
@@ -153,9 +187,6 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
 
     if (winner != nullptr) {
         if (winner != yielding_thread) {
-            if (winner->IsRunning()) {
-                UnloadThread(static_cast<u32>(winner->GetProcessorID()));
-            }
             TransferToCore(winner->GetPriority(), s32(core_id), winner);
         }
     } else {
@@ -195,9 +226,6 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
         }
         if (winner != nullptr) {
             if (winner != yielding_thread) {
-                if (winner->IsRunning()) {
-                    UnloadThread(static_cast<u32>(winner->GetProcessorID()));
-                }
                 TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);
             }
         } else {
@@ -213,7 +241,9 @@ void GlobalScheduler::PreemptThreads() {
         const u32 priority = preemption_priorities[core_id];
 
         if (scheduled_queue[core_id].size(priority) > 0) {
-            scheduled_queue[core_id].front(priority)->IncrementYieldCount();
+            if (scheduled_queue[core_id].size(priority) > 1) {
+                scheduled_queue[core_id].front(priority)->IncrementYieldCount();
+            }
             scheduled_queue[core_id].yield(priority);
             if (scheduled_queue[core_id].size(priority) > 1) {
                 scheduled_queue[core_id].front(priority)->IncrementYieldCount();
@@ -247,9 +277,6 @@ void GlobalScheduler::PreemptThreads() {
         }
 
         if (winner != nullptr) {
-            if (winner->IsRunning()) {
-                UnloadThread(static_cast<u32>(winner->GetProcessorID()));
-            }
             TransferToCore(winner->GetPriority(), s32(core_id), winner);
             current_thread =
                 winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
@@ -280,9 +307,6 @@ void GlobalScheduler::PreemptThreads() {
             }
 
             if (winner != nullptr) {
-                if (winner->IsRunning()) {
-                    UnloadThread(static_cast<u32>(winner->GetProcessorID()));
-                }
                 TransferToCore(winner->GetPriority(), s32(core_id), winner);
                 current_thread = winner;
             }
@@ -292,6 +316,28 @@ void GlobalScheduler::PreemptThreads() {
     }
 }
 
+void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
+                                                 Core::EmuThreadHandle global_thread) {
+    u32 current_core = global_thread.host_handle;
+    bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
+                               (current_core < Core::Hardware::NUM_CPU_CORES);
+    while (cores_pending_reschedule != 0) {
+        u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule);
+        ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+        if (!must_context_switch || core != current_core) {
+            auto& phys_core = kernel.PhysicalCore(core);
+            phys_core.Interrupt();
+        } else {
+            must_context_switch = true;
+        }
+        cores_pending_reschedule &= ~(1ul << core);
+    }
+    if (must_context_switch) {
+        auto& core_scheduler = kernel.CurrentScheduler();
+        core_scheduler.TryDoContextSwitch();
+    }
+}
+
 void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
     suggested_queue[core].add(thread, priority);
 }
@@ -349,6 +395,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
     }
 }
 
+void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
+    if (old_flags == thread->scheduling_state) {
+        return;
+    }
+
+    if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
+        ThreadSchedStatus::Runnable) {
+        // In this case the thread was running, now it's pausing/exitting
+        if (thread->processor_id >= 0) {
+            Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+        }
+
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+            if (core != static_cast<u32>(thread->processor_id) &&
+                ((thread->affinity_mask >> core) & 1) != 0) {
+                Unsuggest(thread->current_priority, core, thread);
+            }
+        }
+    } else if (thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
+        // The thread is now set to running from being stopped
+        if (thread->processor_id >= 0) {
+            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+        }
+
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+            if (core != static_cast<u32>(thread->processor_id) &&
+                ((thread->affinity_mask >> core) & 1) != 0) {
+                Suggest(thread->current_priority, core, thread);
+            }
+        }
+    }
+
+    SetReselectionPending();
+}
+
+void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
+    if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+        return;
+    }
+    if (thread->processor_id >= 0) {
+        Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
+    }
+
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        if (core != static_cast<u32>(thread->processor_id) &&
+            ((thread->affinity_mask >> core) & 1) != 0) {
+            Unsuggest(old_priority, core, thread);
+        }
+    }
+
+    if (thread->processor_id >= 0) {
+        // TODO(Blinkhawk): compare it with current thread running on current core, instead of
+        // checking running
+        if (thread->IsRunning()) {
+            SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
+                            thread);
+        } else {
+            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+        }
+    }
+
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        if (core != static_cast<u32>(thread->processor_id) &&
+            ((thread->affinity_mask >> core) & 1) != 0) {
+            Suggest(thread->current_priority, core, thread);
+        }
+    }
+    thread->IncrementYieldCount();
+    SetReselectionPending();
+}
+
+void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
+                                                 s32 old_core) {
+    if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
+        thread->current_priority >= THREADPRIO_COUNT) {
+        return;
+    }
+
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        if (((old_affinity_mask >> core) & 1) != 0) {
+            if (core == static_cast<u32>(old_core)) {
+                Unschedule(thread->current_priority, core, thread);
+            } else {
+                Unsuggest(thread->current_priority, core, thread);
+            }
+        }
+    }
+
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+        if (((thread->affinity_mask >> core) & 1) != 0) {
+            if (core == static_cast<u32>(thread->processor_id)) {
+                Schedule(thread->current_priority, core, thread);
+            } else {
+                Suggest(thread->current_priority, core, thread);
+            }
+        }
+    }
+
+    thread->IncrementYieldCount();
+    SetReselectionPending();
+}
+
 void GlobalScheduler::Shutdown() {
     for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
         scheduled_queue[core].clear();
@@ -374,13 +522,12 @@ void GlobalScheduler::Unlock() {
         ASSERT(scope_lock > 0);
         return;
     }
-    for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-        SelectThread(i);
-    }
+    u32 cores_pending_reschedule = SelectThreads();
+    Core::EmuThreadHandle leaving_thread = current_owner;
     current_owner = Core::EmuThreadHandle::InvalidHandle();
     scope_lock = 1;
     inner_lock.unlock();
-    // TODO(Blinkhawk): Setup the interrupts and change context on current core.
+    EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
 }
 
 Scheduler::Scheduler(Core::System& system, std::size_t core_id)
@@ -393,56 +540,83 @@ bool Scheduler::HaveReadyThreads() const {
 }
 
 Thread* Scheduler::GetCurrentThread() const {
-    return current_thread.get();
+    if (current_thread) {
+        return current_thread.get();
+    }
+    return idle_thread.get();
 }
 
 Thread* Scheduler::GetSelectedThread() const {
     return selected_thread.get();
 }
 
-void Scheduler::SelectThreads() {
-    system.GlobalScheduler().SelectThread(core_id);
-}
-
 u64 Scheduler::GetLastContextSwitchTicks() const {
     return last_context_switch_time;
 }
 
 void Scheduler::TryDoContextSwitch() {
+    auto& phys_core = system.Kernel().CurrentPhysicalCore();
+    if (phys_core.IsInterrupted()) {
+        phys_core.ClearInterrupt();
+    }
+    guard.lock();
     if (is_context_switch_pending) {
         SwitchContext();
+    } else {
+        guard.unlock();
     }
 }
 
-void Scheduler::UnloadThread() {
-    Thread* const previous_thread = GetCurrentThread();
-    Process* const previous_process = system.Kernel().CurrentProcess();
+void Scheduler::OnThreadStart() {
+    SwitchContextStep2();
+}
 
-    UpdateLastContextSwitchTime(previous_thread, previous_process);
+void Scheduler::SwitchContextStep2() {
+    Thread* previous_thread = current_thread.get();
+    Thread* new_thread = selected_thread.get();
 
-    // Save context for previous thread
-    if (previous_thread) {
-        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
-        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
-        // Save the TPIDR_EL0 system register in case it was modified.
-        previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
+    // Load context of new thread
+    Process* const previous_process =
+        previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
 
-        if (previous_thread->GetStatus() == ThreadStatus::Running) {
-            // This is only the case when a reschedule is triggered without the current thread
-            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            previous_thread->SetStatus(ThreadStatus::Ready);
+    if (new_thread) {
+        new_thread->context_guard.lock();
+        ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
+                   "Thread must be assigned to this core.");
+        ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
+                   "Thread must be ready to become running.");
+
+        // Cancel any outstanding wakeup events for this thread
+        current_thread = SharedFrom(new_thread);
+        new_thread->SetStatus(ThreadStatus::Running);
+        new_thread->SetIsRunning(true);
+
+        auto* const thread_owner_process = current_thread->GetOwnerProcess();
+        if (previous_process != thread_owner_process && thread_owner_process != nullptr) {
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
-        previous_thread->SetIsRunning(false);
+        if (!new_thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
+            cpu_core.LoadContext(new_thread->GetContext32());
+            cpu_core.LoadContext(new_thread->GetContext64());
+            cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
+            cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+        }
+    } else {
+        current_thread = nullptr;
+        // Note: We do not reset the current process and current page table when idling because
+        // technically we haven't changed processes, our threads are just paused.
     }
-    current_thread = nullptr;
+    guard.unlock();
 }
 
 void Scheduler::SwitchContext() {
-    Thread* const previous_thread = GetCurrentThread();
-    Thread* const new_thread = GetSelectedThread();
+    Thread* previous_thread = current_thread.get();
+    Thread* new_thread = selected_thread.get();
 
     is_context_switch_pending = false;
     if (new_thread == previous_thread) {
+        guard.unlock();
         return;
     }
 
@@ -452,51 +626,44 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
-        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
-        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
-        // Save the TPIDR_EL0 system register in case it was modified.
-        previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
+        if (!previous_thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
+            cpu_core.SaveContext(previous_thread->GetContext32());
+            cpu_core.SaveContext(previous_thread->GetContext64());
+            // Save the TPIDR_EL0 system register in case it was modified.
+            previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
 
+        }
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
-            // This is only the case when a reschedule is triggered without the current thread
-            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
             previous_thread->SetStatus(ThreadStatus::Ready);
         }
         previous_thread->SetIsRunning(false);
+        previous_thread->context_guard.unlock();
     }
 
-    // Load context of new thread
-    if (new_thread) {
-        ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
-                   "Thread must be assigned to this core.");
-        ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
-                   "Thread must be ready to become running.");
-
-        // Cancel any outstanding wakeup events for this thread
-        new_thread->CancelWakeupTimer();
-        current_thread = SharedFrom(new_thread);
-        new_thread->SetStatus(ThreadStatus::Running);
-        new_thread->SetIsRunning(true);
-
-        auto* const thread_owner_process = current_thread->GetOwnerProcess();
-        if (previous_process != thread_owner_process) {
-            system.Kernel().MakeCurrentProcess(thread_owner_process);
-        }
-
-        system.ArmInterface(core_id).LoadContext(new_thread->GetContext32());
-        system.ArmInterface(core_id).LoadContext(new_thread->GetContext64());
-        system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress());
-        system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+    std::shared_ptr<Common::Fiber> old_context;
+    if (previous_thread != nullptr) {
+        old_context = previous_thread->GetHostContext();
     } else {
-        current_thread = nullptr;
-        // Note: We do not reset the current process and current page table when idling because
-        // technically we haven't changed processes, our threads are just paused.
+        old_context = idle_thread->GetHostContext();
     }
+
+    std::shared_ptr<Common::Fiber> next_context;
+    if (new_thread != nullptr) {
+        next_context = new_thread->GetHostContext();
+    } else {
+        next_context = idle_thread->GetHostContext();
+    }
+
+    Common::Fiber::YieldTo(old_context, next_context);
+    /// When a thread wakes up, the scheduler may have changed to other in another core.
+    auto& next_scheduler = system.Kernel().CurrentScheduler();
+    next_scheduler.SwitchContextStep2();
 }
 
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
     const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
 
     if (thread != nullptr) {
@@ -510,6 +677,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     last_context_switch_time = most_recent_switch_ticks;
 }
 
+void Scheduler::Initialize() {
+    std::string name = "Idle Thread Id:" + std::to_string(core_id);
+    std::function<void(void*)> init_func = system.GetCpuManager().GetIdleThreadStartFunc();
+    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+    ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
+    auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
+                                     nullptr, std::move(init_func), init_func_parameter);
+    idle_thread = std::move(thread_res).Unwrap();
+}
+
 void Scheduler::Shutdown() {
     current_thread = nullptr;
     selected_thread = nullptr;
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 07df33f9c3..16655b03fe 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -11,6 +11,7 @@
 
 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
+#include "common/spin_lock.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/thread.h"
 
@@ -41,41 +42,17 @@ public:
         return thread_list;
     }
 
-    /**
-     * Add a thread to the suggested queue of a cpu core. Suggested threads may be
-     * picked if no thread is scheduled to run on the core.
-     */
-    void Suggest(u32 priority, std::size_t core, Thread* thread);
+    /// Notify the scheduler a thread's status has changed.
+    void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags);
+
+    /// Notify the scheduler a thread's priority has changed.
+    void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority);
+
+    /// Notify the scheduler a thread's core and/or affinity mask has changed.
+    void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);
 
     /**
-     * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
-     * picked if no thread is scheduled to run on the core.
-     */
-    void Unsuggest(u32 priority, std::size_t core, Thread* thread);
-
-    /**
-     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
-     * back the queue in its priority level.
-     */
-    void Schedule(u32 priority, std::size_t core, Thread* thread);
-
-    /**
-     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
-     * front the queue in its priority level.
-     */
-    void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
-
-    /// Reschedule an already scheduled thread based on a new priority
-    void Reschedule(u32 priority, std::size_t core, Thread* thread);
-
-    /// Unschedules a thread.
-    void Unschedule(u32 priority, std::size_t core, Thread* thread);
-
-    /// Selects a core and forces it to unload its current thread's context
-    void UnloadThread(std::size_t core);
-
-    /**
-     * Takes care of selecting the new scheduled thread in three steps:
+     * Takes care of selecting the new scheduled threads in three steps:
      *
      * 1. First a thread is selected from the top of the priority queue. If no thread
      *    is obtained then we move to step two, else we are done.
@@ -85,8 +62,10 @@ public:
      *
      * 3. Third is no suggested thread is found, we do a second pass and pick a running
      *    thread in another core and swap it with its current thread.
+     *
+     * returns the cores needing scheduling.
      */
-    void SelectThread(std::size_t core);
+    u32 SelectThreads();
 
     bool HaveReadyThreads(std::size_t core_id) const {
         return !scheduled_queue[core_id].empty();
@@ -149,6 +128,39 @@ private:
     /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling
     /// and reschedules current core if needed.
     void Unlock();
+
+    void EnableInterruptAndSchedule(u32 cores_pending_reschedule, Core::EmuThreadHandle global_thread);
+
+    /**
+     * Add a thread to the suggested queue of a cpu core. Suggested threads may be
+     * picked if no thread is scheduled to run on the core.
+     */
+    void Suggest(u32 priority, std::size_t core, Thread* thread);
+
+    /**
+     * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
+     * picked if no thread is scheduled to run on the core.
+     */
+    void Unsuggest(u32 priority, std::size_t core, Thread* thread);
+
+    /**
+     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
+     * back the queue in its priority level.
+     */
+    void Schedule(u32 priority, std::size_t core, Thread* thread);
+
+    /**
+     * Add a thread to the scheduling queue of a cpu core. The thread is added at the
+     * front the queue in its priority level.
+     */
+    void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
+
+    /// Reschedule an already scheduled thread based on a new priority
+    void Reschedule(u32 priority, std::size_t core, Thread* thread);
+
+    /// Unschedules a thread.
+    void Unschedule(u32 priority, std::size_t core, Thread* thread);
+
     /**
      * Transfers a thread into an specific core. If the destination_core is -1
      * it will be unscheduled from its source code and added into its suggested
@@ -174,6 +186,8 @@ private:
     std::atomic<s64> scope_lock{};
     Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
 
+    Common::SpinLock global_list_guard{};
+
     /// Lists all thread ids that aren't deleted/etc.
     std::vector<std::shared_ptr<Thread>> thread_list;
     KernelCore& kernel;
@@ -190,12 +204,6 @@ public:
     /// Reschedules to the next available thread (call after current thread is suspended)
     void TryDoContextSwitch();
 
-    /// Unloads currently running thread
-    void UnloadThread();
-
-    /// Select the threads in top of the scheduling multilist.
-    void SelectThreads();
-
     /// Gets the current running thread
     Thread* GetCurrentThread() const;
 
@@ -209,15 +217,22 @@ public:
         return is_context_switch_pending;
     }
 
+    void Initialize();
+
     /// Shutdowns the scheduler.
     void Shutdown();
 
+    void OnThreadStart();
+
 private:
     friend class GlobalScheduler;
 
     /// Switches the CPU's active thread context to that of the specified thread
     void SwitchContext();
 
+    /// When a thread wakes up, it must run this through it's new scheduler
+    void SwitchContextStep2();
+
     /**
      * Called on every context switch to update the internal timestamp
      * This also updates the running time ticks for the given thread and
@@ -233,12 +248,15 @@ private:
 
     std::shared_ptr<Thread> current_thread = nullptr;
     std::shared_ptr<Thread> selected_thread = nullptr;
+    std::shared_ptr<Thread> idle_thread = nullptr;
 
     Core::System& system;
     u64 last_context_switch_time = 0;
     u64 idle_selection_count = 0;
     const std::size_t core_id;
 
+    Common::SpinLock guard{};
+
     bool is_context_switch_pending = false;
 };
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4ae4529f52..d7f0dcabd1 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -863,9 +863,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
         if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
             const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
 
-            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
         } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
+            out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
         }
 
         *result = out_ticks;
@@ -1428,9 +1428,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
 
     ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1));
 
+    ThreadType type = THREADTYPE_USER;
     CASCADE_RESULT(std::shared_ptr<Thread> thread,
-                   Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top,
-                                  *current_process));
+                   Thread::Create(system, type, "", entry_point, priority, arg, processor_id, stack_top,
+                                  current_process));
 
     const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
     if (new_thread_handle.Failed()) {
@@ -1513,13 +1514,6 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     } else {
         current_thread->Sleep(nanoseconds);
     }
-
-    if (is_redundant) {
-        // If it's redundant, the core is pretty much idle. Some games keep idling
-        // a core while it's doing nothing, we advance timing to avoid costly continuous
-        // calls.
-        system.CoreTiming().AddTicks(2000);
-    }
     system.PrepareReschedule(current_thread->GetProcessorID());
 }
 
@@ -1725,10 +1719,7 @@ static u64 GetSystemTick(Core::System& system) {
     auto& core_timing = system.CoreTiming();
 
     // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
-    const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())};
-
-    // Advance time to defeat dumb games that busy-wait for the frame to end.
-    core_timing.AddTicks(400);
+    const u64 result{system.CoreTiming().GetClockTicks()};
 
     return result;
 }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index db7f379ac2..8cb3593dbe 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -9,12 +9,14 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/thread_queue_list.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
@@ -23,6 +25,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
 
@@ -44,6 +47,7 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Thread::~Thread() = default;
 
 void Thread::Stop() {
+    SchedulerLock lock(kernel);
     // Cancel any outstanding wakeup events for this thread
     Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
                                                              global_handle);
@@ -71,9 +75,8 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
 
     // This function might be called from any thread so we have to be cautious and use the
     // thread-safe version of ScheduleEvent.
-    const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
     Core::System::GetInstance().CoreTiming().ScheduleEvent(
-        cycles, kernel.ThreadWakeupCallbackEventType(), global_handle);
+        nanoseconds, kernel.ThreadWakeupCallbackEventType(), global_handle);
 }
 
 void Thread::CancelWakeupTimer() {
@@ -125,6 +128,16 @@ void Thread::ResumeFromWait() {
     SetStatus(ThreadStatus::Ready);
 }
 
+void Thread::OnWakeUp() {
+    SchedulerLock lock(kernel);
+    if (activity == ThreadActivity::Paused) {
+        SetStatus(ThreadStatus::Paused);
+        return;
+    }
+
+    SetStatus(ThreadStatus::Ready);
+}
+
 void Thread::CancelWait() {
     if (GetSchedulingStatus() != ThreadSchedStatus::Paused) {
         is_sync_cancelled = true;
@@ -153,12 +166,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
     context.fpcr = 0;
 }
 
-ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name,
-                                                  VAddr entry_point, u32 priority, u64 arg,
-                                                  s32 processor_id, VAddr stack_top,
-                                                  Process& owner_process) {
+std::shared_ptr<Common::Fiber> Thread::GetHostContext() const {
+    return host_context;
+}
+
+ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
+                                                  std::string name, VAddr entry_point, u32 priority,
+                                                  u64 arg, s32 processor_id, VAddr stack_top,
+                                                  Process* owner_process) {
+    std::function<void(void*)> init_func = system.GetCpuManager().GetGuestThreadStartFunc();
+    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+    return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top,
+                  owner_process, std::move(init_func), init_func_parameter);
+}
+
+ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
+                                                  std::string name, VAddr entry_point, u32 priority,
+                                                  u64 arg, s32 processor_id, VAddr stack_top,
+                                                  Process* owner_process,
+                                                  std::function<void(void*)>&& thread_start_func,
+                                                  void* thread_start_parameter) {
+    auto& kernel = system.Kernel();
     // Check if priority is in ranged. Lowest priority -> highest priority id.
-    if (priority > THREADPRIO_LOWEST) {
+    if (priority > THREADPRIO_LOWEST && (type_flags & THREADTYPE_IDLE == 0)) {
         LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
         return ERR_INVALID_THREAD_PRIORITY;
     }
@@ -168,11 +198,12 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
         return ERR_INVALID_PROCESSOR_ID;
     }
 
-    auto& system = Core::System::GetInstance();
-    if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) {
-        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
-        // TODO (bunnei): Find the correct error code to use here
-        return RESULT_UNKNOWN;
+    if (owner_process) {
+        if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) {
+            LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
+            // TODO (bunnei): Find the correct error code to use here
+            return RESULT_UNKNOWN;
+        }
     }
 
     std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);
@@ -183,7 +214,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
     thread->stack_top = stack_top;
     thread->tpidr_el0 = 0;
     thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = system.CoreTiming().GetTicks();
+    thread->last_running_ticks = 0;
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask = 1ULL << processor_id;
@@ -193,16 +224,27 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
     thread->wait_handle = 0;
     thread->name = std::move(name);
     thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
-    thread->owner_process = &owner_process;
-    auto& scheduler = kernel.GlobalScheduler();
-    scheduler.AddThread(thread);
-    thread->tls_address = thread->owner_process->CreateTLSRegion();
-
-    thread->owner_process->RegisterThread(thread.get());
-
-    ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
-                         static_cast<u32>(entry_point), static_cast<u32>(arg));
-    ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
+    thread->owner_process = owner_process;
+    thread->type = type_flags;
+    if ((type_flags & THREADTYPE_IDLE) == 0) {
+        auto& scheduler = kernel.GlobalScheduler();
+        scheduler.AddThread(thread);
+    }
+    if (owner_process) {
+        thread->tls_address = thread->owner_process->CreateTLSRegion();
+        thread->owner_process->RegisterThread(thread.get());
+    } else {
+        thread->tls_address = 0;
+    }
+    // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
+    // to initialize the context
+    if ((type_flags & THREADTYPE_HLE) == 0) {
+        ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
+                             static_cast<u32>(entry_point), static_cast<u32>(arg));
+        ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
+    }
+    thread->host_context =
+        std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);
 
     return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
 }
@@ -258,7 +300,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
     }
 
     if (status == ThreadStatus::Running) {
-        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetCPUTicks();
     }
 
     status = new_status;
@@ -375,38 +417,55 @@ void Thread::SetActivity(ThreadActivity value) {
 }
 
 void Thread::Sleep(s64 nanoseconds) {
-    // Sleep current thread and check for next thread to schedule
-    SetStatus(ThreadStatus::WaitSleep);
+    Handle event_handle{};
+    {
+        SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
+        SetStatus(ThreadStatus::WaitSleep);
+    }
 
-    // Create an event to wake the thread up after the specified nanosecond delay has passed
-    WakeAfterDelay(nanoseconds);
+    if (event_handle != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(event_handle);
+    }
 }
 
 bool Thread::YieldSimple() {
-    auto& scheduler = kernel.GlobalScheduler();
-    return scheduler.YieldThread(this);
+    bool result{};
+    {
+        SchedulerLock lock(kernel);
+        result = kernel.GlobalScheduler().YieldThread(this);
+    }
+    return result;
 }
 
 bool Thread::YieldAndBalanceLoad() {
-    auto& scheduler = kernel.GlobalScheduler();
-    return scheduler.YieldThreadAndBalanceLoad(this);
+    bool result{};
+    {
+        SchedulerLock lock(kernel);
+        result = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
+    }
+    return result;
 }
 
 bool Thread::YieldAndWaitForLoadBalancing() {
-    auto& scheduler = kernel.GlobalScheduler();
-    return scheduler.YieldThreadAndWaitForLoadBalancing(this);
+    bool result{};
+    {
+        SchedulerLock lock(kernel);
+        result = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
+    }
+    return result;
 }
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
     const u32 old_flags = scheduling_state;
     scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
                        static_cast<u32>(new_status);
-    AdjustSchedulingOnStatus(old_flags);
+    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_flags);
 }
 
 void Thread::SetCurrentPriority(u32 new_priority) {
     const u32 old_priority = std::exchange(current_priority, new_priority);
-    AdjustSchedulingOnPriority(old_priority);
+    kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority);
 }
 
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
@@ -443,111 +502,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
                     processor_id = ideal_core;
                 }
             }
-            AdjustSchedulingOnAffinity(old_affinity_mask, old_core);
+            kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core);
         }
     }
     return RESULT_SUCCESS;
 }
 
-void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
-    if (old_flags == scheduling_state) {
-        return;
-    }
-
-    auto& scheduler = kernel.GlobalScheduler();
-    if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
-        ThreadSchedStatus::Runnable) {
-        // In this case the thread was running, now it's pausing/exitting
-        if (processor_id >= 0) {
-            scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
-        }
-
-        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
-                scheduler.Unsuggest(current_priority, core, this);
-            }
-        }
-    } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
-        // The thread is now set to running from being stopped
-        if (processor_id >= 0) {
-            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
-        }
-
-        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
-                scheduler.Suggest(current_priority, core, this);
-            }
-        }
-    }
-
-    scheduler.SetReselectionPending();
-}
-
-void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
-    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
-        return;
-    }
-    auto& scheduler = kernel.GlobalScheduler();
-    if (processor_id >= 0) {
-        scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
-            scheduler.Unsuggest(old_priority, core, this);
-        }
-    }
-
-    // Add thread to the new priority queues.
-    Thread* current_thread = GetCurrentThread();
-
-    if (processor_id >= 0) {
-        if (current_thread == this) {
-            scheduler.SchedulePrepend(current_priority, static_cast<u32>(processor_id), this);
-        } else {
-            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
-        }
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
-            scheduler.Suggest(current_priority, core, this);
-        }
-    }
-
-    scheduler.SetReselectionPending();
-}
-
-void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
-    auto& scheduler = kernel.GlobalScheduler();
-    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
-        current_priority >= THREADPRIO_COUNT) {
-        return;
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (((old_affinity_mask >> core) & 1) != 0) {
-            if (core == static_cast<u32>(old_core)) {
-                scheduler.Unschedule(current_priority, core, this);
-            } else {
-                scheduler.Unsuggest(current_priority, core, this);
-            }
-        }
-    }
-
-    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        if (((affinity_mask >> core) & 1) != 0) {
-            if (core == static_cast<u32>(processor_id)) {
-                scheduler.Schedule(current_priority, core, this);
-            } else {
-                scheduler.Suggest(current_priority, core, this);
-            }
-        }
-    }
-
-    scheduler.SetReselectionPending();
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 23fdef8a40..33d340b47f 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -9,23 +9,42 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "common/spin_lock.h"
 #include "core/arm/arm_interface.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"
 
+namespace Common {
+class Fiber;
+}
+
+namespace Core {
+class System;
+}
+
 namespace Kernel {
 
+class GlobalScheduler;
 class KernelCore;
 class Process;
 class Scheduler;
 
 enum ThreadPriority : u32 {
-    THREADPRIO_HIGHEST = 0,       ///< Highest thread priority
-    THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
-    THREADPRIO_DEFAULT = 44,      ///< Default thread priority for userland apps
-    THREADPRIO_LOWEST = 63,       ///< Lowest thread priority
-    THREADPRIO_COUNT = 64,        ///< Total number of possible thread priorities.
+    THREADPRIO_HIGHEST = 0,             ///< Highest thread priority
+    THREADPRIO_MAX_CORE_MIGRATION = 2,  ///< Highest priority for a core migration
+    THREADPRIO_USERLAND_MAX = 24,       ///< Highest thread priority for userland apps
+    THREADPRIO_DEFAULT = 44,            ///< Default thread priority for userland apps
+    THREADPRIO_LOWEST = 63,             ///< Lowest thread priority
+    THREADPRIO_COUNT = 64,              ///< Total number of possible thread priorities.
+};
+
+enum ThreadType : u32 {
+    THREADTYPE_USER = 0x1,
+    THREADTYPE_KERNEL = 0x2,
+    THREADTYPE_HLE = 0x4,
+    THREADTYPE_IDLE = 0x8,
+    THREADTYPE_SUSPEND = 0x10,
 };
 
 enum ThreadProcessorId : s32 {
@@ -111,22 +130,43 @@ public:
         std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                            std::shared_ptr<SynchronizationObject> object, std::size_t index)>;
 
+   /**
+    * Creates and returns a new thread. The new thread is immediately scheduled
+    * @param system The instance of the whole system
+    * @param name The friendly name desired for the thread
+    * @param entry_point The address at which the thread should start execution
+    * @param priority The thread's priority
+    * @param arg User data to pass to the thread
+    * @param processor_id The ID(s) of the processors on which the thread is desired to be run
+    * @param stack_top The address of the thread's stack top
+    * @param owner_process The parent process for the thread, if null, it's a kernel thread
+    * @return A shared pointer to the newly created thread
+    */
+   static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, std::string name,
+                                                    VAddr entry_point, u32 priority, u64 arg,
+                                                    s32 processor_id, VAddr stack_top,
+                                                    Process* owner_process);
+
     /**
      * Creates and returns a new thread. The new thread is immediately scheduled
-     * @param kernel The kernel instance this thread will be created under.
+     * @param system The instance of the whole system
      * @param name The friendly name desired for the thread
      * @param entry_point The address at which the thread should start execution
      * @param priority The thread's priority
      * @param arg User data to pass to the thread
      * @param processor_id The ID(s) of the processors on which the thread is desired to be run
      * @param stack_top The address of the thread's stack top
-     * @param owner_process The parent process for the thread
+     * @param owner_process The parent process for the thread, if null, it's a kernel thread
+     * @param thread_start_func The function where the host context will start.
+     * @param thread_start_parameter The parameter which will passed to host context on init
      * @return A shared pointer to the newly created thread
      */
-    static ResultVal<std::shared_ptr<Thread>> Create(KernelCore& kernel, std::string name,
+    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, std::string name,
                                                      VAddr entry_point, u32 priority, u64 arg,
                                                      s32 processor_id, VAddr stack_top,
-                                                     Process& owner_process);
+                                                     Process* owner_process,
+                                                     std::function<void(void*)>&& thread_start_func,
+                                                     void* thread_start_parameter);
 
     std::string GetName() const override {
         return name;
@@ -192,7 +232,9 @@ public:
     }
 
     /// Resumes a thread from waiting
-    void ResumeFromWait();
+    void /* deprecated */ ResumeFromWait();
+
+    void OnWakeUp();
 
     /// Cancels a waiting operation that this thread may or may not be within.
     ///
@@ -206,10 +248,10 @@ public:
      * Schedules an event to wake up the specified thread after the specified delay
      * @param nanoseconds The time this thread will be allowed to sleep for
      */
-    void WakeAfterDelay(s64 nanoseconds);
+    void /* deprecated */ WakeAfterDelay(s64 nanoseconds);
 
     /// Cancel any outstanding wakeup events for this thread
-    void CancelWakeupTimer();
+    void /* deprecated */ CancelWakeupTimer();
 
     /**
      * Sets the result after the thread awakens (from svcWaitSynchronization)
@@ -290,6 +332,12 @@ public:
         return context_64;
     }
 
+    bool IsHLEThread() const {
+        return (type & THREADTYPE_HLE) != 0;
+    }
+
+    std::shared_ptr<Common::Fiber> GetHostContext() const;
+
     ThreadStatus GetStatus() const {
         return status;
     }
@@ -467,16 +515,19 @@ public:
     }
 
 private:
+    friend class GlobalScheduler;
+    friend class Scheduler;
+
     void SetSchedulingStatus(ThreadSchedStatus new_status);
     void SetCurrentPriority(u32 new_priority);
     ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
 
-    void AdjustSchedulingOnStatus(u32 old_flags);
-    void AdjustSchedulingOnPriority(u32 old_priority);
     void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
 
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
+    Common::SpinLock context_guard{};
+    std::shared_ptr<Common::Fiber> host_context{};
 
     u64 thread_id = 0;
 
@@ -485,6 +536,8 @@ private:
     VAddr entry_point = 0;
     VAddr stack_top = 0;
 
+    ThreadType type;
+
     /// Nominal thread priority, as set by the emulated application.
     /// The nominal priority is the thread priority without priority
     /// inheritance taken into account.
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 21b2904689..0b8f0d993b 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -19,7 +19,7 @@ TimeManager::TimeManager(Core::System& system) : system{system} {
             Handle proper_handle = static_cast<Handle>(thread_handle);
             std::shared_ptr<Thread> thread =
                 this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
-            thread->ResumeFromWait();
+            thread->OnWakeUp();
         });
 }
 
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index 1f2131ec84..cb35919e93 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {}
 
 void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                    std::size_t size) {
-    shared_memory.header.timestamp = core_timing.GetTicks();
+    shared_memory.header.timestamp = core_timing.GetCPUTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 6e990dd009..b7b7bfeae5 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {}
 
 void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                   std::size_t size) {
-    shared_memory.header.timestamp = core_timing.GetTicks();
+    shared_memory.header.timestamp = core_timing.GetCPUTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index 9a8d354ba7..feae89525a 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {}
 
 void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                    std::size_t size) {
-    shared_memory.header.timestamp = core_timing.GetTicks();
+    shared_memory.header.timestamp = core_timing.GetCPUTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 93d88ea50d..ac40989c58 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {}
 
 void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                 std::size_t size) {
-    shared_memory.header.timestamp = core_timing.GetTicks();
+    shared_memory.header.timestamp = core_timing.GetCPUTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index c55d900e27..2edd3c9932 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
             const auto& last_entry =
                 main_controller->npad[main_controller->common.last_entry_index];
 
-            main_controller->common.timestamp = core_timing.GetTicks();
+            main_controller->common.timestamp = core_timing.GetCPUTicks();
             main_controller->common.last_entry_index =
                 (main_controller->common.last_entry_index + 1) % 17;
 
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 9e527d176d..e7483bfa22 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u
     }
 
     CommonHeader header{};
-    header.timestamp = core_timing.GetTicks();
+    header.timestamp = core_timing.GetCPUTicks();
     header.total_entry_count = 17;
     header.entry_count = 0;
     header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index 1c6e55566a..e326f8f5c4 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {}
 
 void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                       std::size_t size) {
-    shared_memory.header.timestamp = core_timing.GetTicks();
+    shared_memory.header.timestamp = core_timing.GetCPUTicks();
     shared_memory.header.total_entry_count = 17;
 
     if (!IsControllerActivated()) {
@@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin
         touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
         touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
         touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = core_timing.GetTicks();
+        const u64 tick = core_timing.GetCPUTicks();
         touch_entry.delta_time = tick - last_touch;
         last_touch = tick;
         touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index 27511b27b3..2503ef241e 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {}
 void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
                                std::size_t size) {
     for (auto& xpad_entry : shared_memory.shared_memory_entries) {
-        xpad_entry.header.timestamp = core_timing.GetTicks();
+        xpad_entry.header.timestamp = core_timing.GetCPUTicks();
         xpad_entry.header.total_entry_count = 17;
 
         if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 72a050de25..b2db6515f5 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -39,11 +39,9 @@ namespace Service::HID {
 
 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66);
-[[maybe_unused]] constexpr s64 accelerometer_update_ticks =
-    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
-[[maybe_unused]] constexpr s64 gyroscope_update_ticks =
-    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
+constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66);
+[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100);
+[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
 
 IAppletResource::IAppletResource(Core::System& system)
@@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system)
 
     // Register update callbacks
     pad_update_event =
-        Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
-            UpdateControllers(userdata, cycles_late);
+        Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) {
+            UpdateControllers(userdata, ns_late);
         });
 
     // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
@@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
     rb.PushCopyObjects(shared_mem);
 }
 
-void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) {
     auto& core_timing = system.CoreTiming();
 
     const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
@@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
         controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
     }
 
-    core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event);
 }
 
 class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 36ed6f7da4..e82fd031b7 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
 
     IPC::ResponseBuilder rb{ctx, 5};
     rb.Push(RESULT_SUCCESS);
-    rb.PushRaw<u64>(system.CoreTiming().GetTicks());
+    rb.PushRaw<u64>(system.CoreTiming().GetCPUTicks());
     rb.PushRaw<u32>(0);
 }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index cc2192e5ca..2c4c8856c0 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -195,8 +195,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
 
     IoctlGetGpuTime params{};
     std::memcpy(&params, input.data(), input.size());
-    const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks());
-    params.gpu_time = static_cast<u64_le>(ns.count());
+    params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count());
     std::memcpy(output.data(), &params, output.size());
     return 0;
 }
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 437bc5dee9..aaf28995db 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -27,8 +27,8 @@
 
 namespace Service::NVFlinger {
 
-constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
-constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30);
+constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30);
 
 NVFlinger::NVFlinger(Core::System& system) : system(system) {
     displays.emplace_back(0, "Default", system);
@@ -39,11 +39,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
 
     // Schedule the screen composition events
     composition_event =
-        Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
+        Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) {
             Compose();
-            const auto ticks =
-                Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
-            this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late),
+            const auto ticks = GetNextTicks();
+            this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),
                                                     composition_event);
         });
 
@@ -223,7 +222,7 @@ void NVFlinger::Compose() {
 
 s64 NVFlinger::GetNextTicks() const {
     constexpr s64 max_hertz = 120LL;
-    return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
+    return (1000000000 * (1LL << swap_interval)) / max_hertz;
 }
 
 } // namespace Service::NVFlinger
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index 1575f0b49d..59a272f4a8 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -11,9 +11,8 @@
 namespace Service::Time::Clock {
 
 TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
-    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
-        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Hardware::CNTFREQ)};
+    const TimeSpanType ticks_time_span{
+        TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
     TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
 
     if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index 44d5bc651e..8baaa2a6af 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -11,9 +11,8 @@
 namespace Service::Time::Clock {
 
 SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
-    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
-        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Hardware::CNTFREQ)};
+    const TimeSpanType ticks_time_span{
+        TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
 
     return {ticks_time_span.ToSeconds(), GetClockSourceId()};
 }
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 67f1bbcf36..4cf58a61a2 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -234,9 +234,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
     const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
 
     if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
-        const auto ticks{Clock::TimeSpanType::FromTicks(
-            Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-            Core::Hardware::CNTFREQ)};
+        const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(),
+                                                        Core::Hardware::CNTFREQ)};
         const s64 base_time_point{context.offset + current_time_point.time_point -
                                   ticks.ToSeconds()};
         IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index 999ec1e513..e0ae9f8748 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
                                             const Common::UUID& clock_source_id,
                                             Clock::TimeSpanType current_time_point) {
     const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
-        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Hardware::CNTFREQ)};
+        system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
     const Clock::SteadyClockContext context{
         static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
         clock_source_id};
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 9d87045a05..66634596d1 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -29,15 +29,12 @@ namespace Core::Memory {
 struct Memory::Impl {
     explicit Impl(Core::System& system_) : system{system_} {}
 
-    void SetCurrentPageTable(Kernel::Process& process) {
+    void SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
         current_page_table = &process.PageTable().PageTableImpl();
 
         const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth();
 
-        system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width);
-        system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width);
-        system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width);
-        system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
+        system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width);
     }
 
     void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
@@ -689,8 +686,8 @@ struct Memory::Impl {
 Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
 Memory::~Memory() = default;
 
-void Memory::SetCurrentPageTable(Kernel::Process& process) {
-    impl->SetCurrentPageTable(process);
+void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
+    impl->SetCurrentPageTable(process, core_id);
 }
 
 void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
diff --git a/src/core/memory.h b/src/core/memory.h
index 9292f3b0ae..93f0c1d6c4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -64,7 +64,7 @@ public:
      *
      * @param process The process to use the page table of.
      */
-    void SetCurrentPageTable(Kernel::Process& process);
+    void SetCurrentPageTable(Kernel::Process& process, u32 core_id);
 
     /**
      * Maps an allocated buffer onto a region of the emulated process address space.
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index b139e84657..53d27859b9 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -20,7 +20,7 @@
 
 namespace Core::Memory {
 
-constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12);
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
 
 StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
@@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() {
 void CheatEngine::Initialize() {
     event = Core::Timing::CreateEvent(
         "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id),
-        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+        [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
     core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
 
     metadata.process_id = system.CurrentProcess()->GetProcessID();
@@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) {
 
 MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
 
-void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) {
     if (is_pending_reload.exchange(false)) {
         vm.LoadProgram(cheats);
     }
@@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
 
     vm.Execute(metadata);
 
-    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
+    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event);
 }
 
 } // namespace Core::Memory
diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp
index b2c6c537e8..8b0c50d111 100644
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -14,7 +14,7 @@
 namespace Tools {
 namespace {
 
-constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
+constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60);
 
 u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) {
     switch (width) {
@@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m
     : core_timing{core_timing_}, memory{memory_} {
     event = Core::Timing::CreateEvent(
         "MemoryFreezer::FrameCallback",
-        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+        [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
     core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event);
 }
 
@@ -158,7 +158,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const {
     return entries;
 }
 
-void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
+void Freezer::FrameCallback(u64 userdata, s64 ns_late) {
     if (!IsActive()) {
         LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events.");
         return;
@@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
         MemoryWriteWidth(memory, entry.width, entry.address, entry.value);
     }
 
-    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event);
+    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event);
 }
 
 void Freezer::FillEntryReads() {
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 3f750b51c9..47ef30aa91 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -8,7 +8,6 @@ add_executable(tests
     core/arm/arm_test_common.cpp
     core/arm/arm_test_common.h
     core/core_timing.cpp
-    core/host_timing.cpp
     tests.cpp
 )
 
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index ff2d11cc8e..795f3da099 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -16,31 +16,30 @@
 
 namespace {
 // Numbers are chosen randomly to make sure the correct one is given.
-constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
-constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
+static std::array<s64, 5> delays{};
 
 std::bitset<CB_IDS.size()> callbacks_ran_flags;
 u64 expected_callback = 0;
 s64 lateness = 0;
 
 template <unsigned int IDX>
-void CallbackTemplate(u64 userdata, s64 cycles_late) {
+void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
     static_assert(IDX < CB_IDS.size(), "IDX out of range");
     callbacks_ran_flags.set(IDX);
     REQUIRE(CB_IDS[IDX] == userdata);
-    REQUIRE(CB_IDS[IDX] == expected_callback);
-    REQUIRE(lateness == cycles_late);
+    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
+    delays[IDX] = nanoseconds_late;
+    ++expected_callback;
 }
 
 u64 callbacks_done = 0;
 
-void EmptyCallback(u64 userdata, s64 cycles_late) {
-    ++callbacks_done;
-}
-
 struct ScopeInit final {
     ScopeInit() {
-        core_timing.Initialize();
+        core_timing.Initialize([]() {});
     }
     ~ScopeInit() {
         core_timing.Shutdown();
@@ -49,110 +48,97 @@ struct ScopeInit final {
     Core::Timing::CoreTiming core_timing;
 };
 
-void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
-                     int expected_lateness = 0, int cpu_downcount = 0) {
-    callbacks_ran_flags = 0;
-    expected_callback = CB_IDS[idx];
-    lateness = expected_lateness;
-
-    // Pretend we executed X cycles of instructions.
-    core_timing.SwitchContext(context);
-    core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
-    core_timing.Advance();
-    core_timing.SwitchContext((context + 1) % 4);
-
-    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
-}
-} // Anonymous namespace
-
 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::Timing::EventType>> events{
+        Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
-    std::shared_ptr<Core::Timing::EventType> cb_a =
-        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
-    std::shared_ptr<Core::Timing::EventType> cb_b =
-        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
-    std::shared_ptr<Core::Timing::EventType> cb_c =
-        Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>);
-    std::shared_ptr<Core::Timing::EventType> cb_d =
-        Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>);
-    std::shared_ptr<Core::Timing::EventType> cb_e =
-        Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>);
+    expected_callback = 0;
 
-    // Enter slice 0
-    core_timing.ResetRun();
+    core_timing.SyncPause(true);
 
-    // D -> B -> C -> A -> E
-    core_timing.SwitchContext(0);
-    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    REQUIRE(1000 == core_timing.GetDowncount());
-    core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
-    REQUIRE(500 == core_timing.GetDowncount());
-    core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
-    REQUIRE(500 == core_timing.GetDowncount());
-    core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
-    REQUIRE(100 == core_timing.GetDowncount());
-    core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
-    REQUIRE(100 == core_timing.GetDowncount());
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    /// test pause
+    REQUIRE(callbacks_ran_flags.none());
 
-    AdvanceAndCheck(core_timing, 3, 0);
-    AdvanceAndCheck(core_timing, 1, 1);
-    AdvanceAndCheck(core_timing, 2, 2);
-    AdvanceAndCheck(core_timing, 0, 3);
-    AdvanceAndCheck(core_timing, 4, 0);
+    core_timing.Pause(false); // No need to sync
+
+    while (core_timing.HasPendingEvents())
+        ;
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+    }
 }
 
-TEST_CASE("CoreTiming[FairSharing]", "[core]") {
+#pragma optimize("", off)
+u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+#pragma optimize("", on)
 
+TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
+    std::vector<std::shared_ptr<Core::Timing::EventType>> events{
+        Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
+        Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
+        Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
+        Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
+        Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
+    };
 
-    std::shared_ptr<Core::Timing::EventType> empty_callback =
-        Core::Timing::CreateEvent("empty_callback", EmptyCallback);
+    core_timing.SyncPause(true);
+    core_timing.SyncPause(false);
 
-    callbacks_done = 0;
-    u64 MAX_CALLBACKS = 10;
-    for (std::size_t i = 0; i < 10; i++) {
-        core_timing.ScheduleEvent(i * 3333U, empty_callback, 0);
+    expected_callback = 0;
+
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 one_micro = 1000U;
+    for (std::size_t i = 0; i < events.size(); i++) {
+        u64 order = calls_order[i];
+        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    const double scheduling_time = static_cast<double>(end - start);
+    const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
+
+    while (core_timing.HasPendingEvents())
+        ;
+
+    REQUIRE(callbacks_ran_flags.all());
+
+    for (std::size_t i = 0; i < delays.size(); i++) {
+        const double delay = static_cast<double>(delays[i]);
+        const double micro = delay / 1000.0f;
+        const double mili = micro / 1000.0f;
+        printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
     }
 
-    const s64 advances = MAX_SLICE_LENGTH / 10;
-    core_timing.ResetRun();
-    u64 current_time = core_timing.GetTicks();
-    bool keep_running{};
-    do {
-        keep_running = false;
-        for (u32 active_core = 0; active_core < 4; ++active_core) {
-            core_timing.SwitchContext(active_core);
-            if (core_timing.CanCurrentContextRun()) {
-                core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount()));
-                core_timing.Advance();
-            }
-            keep_running |= core_timing.CanCurrentContextRun();
-        }
-    } while (keep_running);
-    u64 current_time_2 = core_timing.GetTicks();
-
-    REQUIRE(MAX_CALLBACKS == callbacks_done);
-    REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4);
-}
-
-TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
-    ScopeInit guard;
-    auto& core_timing = guard.core_timing;
-
-    std::shared_ptr<Core::Timing::EventType> cb_a =
-        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
-    std::shared_ptr<Core::Timing::EventType> cb_b =
-        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
-
-    // Enter slice 0
-    core_timing.ResetRun();
-
-    core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
-    core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
-
-    AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10)
-    AdvanceAndCheck(core_timing, 1, 1, 50, -50);
+    const double micro = scheduling_time / 1000.0f;
+    const double mili = micro / 1000.0f;
+    printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
+    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
+           timer_time / 1000000.f);
 }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65d..482e497118 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <chrono>
+
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/core.h"
@@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {
     constexpr u64 gpu_ticks_num = 384;
     constexpr u64 gpu_ticks_den = 625;
 
-    const u64 cpu_ticks = system.CoreTiming().GetTicks();
-    u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
     if (Settings::values.use_fast_gpu_time) {
         nanoseconds /= 256;
     }
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1f5e43043c..e721c73dc8 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -49,6 +49,8 @@ void EmuThread::run() {
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
 
+    Core::System::GetInstance().RegisterHostThread();
+
     Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
         stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
             emit LoadProgress(stage, value, total);
@@ -62,28 +64,30 @@ void EmuThread::run() {
     bool was_active = false;
     while (!stop_run) {
         if (running) {
-            if (!was_active)
+            if (was_active) {
                 emit DebugModeLeft();
+            }
 
-            Core::System::ResultStatus result = Core::System::GetInstance().RunLoop();
+            running_guard = true;
+            Core::System::ResultStatus result = Core::System::GetInstance().Run();
             if (result != Core::System::ResultStatus::Success) {
+                running_guard = false;
                 this->SetRunning(false);
                 emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
             }
+            running_wait.Wait();
+            result = Core::System::GetInstance().Pause();
+            if (result != Core::System::ResultStatus::Success) {
+                running_guard = false;
+                this->SetRunning(false);
+                emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
+            }
+            running_guard = false;
 
-            was_active = running || exec_step;
-            if (!was_active && !stop_run)
-                emit DebugModeEntered();
-        } else if (exec_step) {
-            if (!was_active)
-                emit DebugModeLeft();
-
-            exec_step = false;
-            Core::System::GetInstance().SingleStep();
+            was_active = true;
             emit DebugModeEntered();
-            yieldCurrentThread();
-
-            was_active = false;
+        } else if (exec_step) {
+            UNIMPLEMENTED();
         } else {
             std::unique_lock lock{running_mutex};
             running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 3626604cad..768568b3e9 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -59,6 +59,11 @@ public:
         this->running = running;
         lock.unlock();
         running_cv.notify_all();
+        if (!running) {
+            running_wait.Set();
+            /// Wait until effectively paused
+            while (running_guard);
+        }
     }
 
     /**
@@ -84,6 +89,8 @@ private:
     std::atomic_bool stop_run{false};
     std::mutex running_mutex;
     std::condition_variable running_cv;
+    Common::Event running_wait{};
+    std::atomic_bool running_guard{false};
 
 signals:
     /**
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index c1ea25fb8d..765908c5a7 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -59,8 +59,10 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
     std::size_t row = 0;
     auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) {
         for (std::size_t i = 0; i < threads.size(); ++i) {
-            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
-            item_list.back()->row = row;
+            if (!threads[i]->IsHLEThread()) {
+                item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
+                item_list.back()->row = row;
+            }
             ++row;
         }
     };
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 4d2ea7e9e9..1e5377840d 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -237,7 +237,7 @@ int main(int argc, char** argv) {
 
     std::thread render_thread([&emu_window] { emu_window->Present(); });
     while (emu_window->IsOpen()) {
-        system.RunLoop();
+        //system.RunLoop();
     }
     render_thread.join();
 
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 676e70ebd7..1a45506d46 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -256,7 +256,7 @@ int main(int argc, char** argv) {
     system.Renderer().Rasterizer().LoadDiskResources();
 
     while (!finished) {
-        system.RunLoop();
+        //system.RunLoop();
     }
 
     detached_tasks.WaitForAllTasks();

From 8af7539cba2f484b5c17afbc627cdf42c6268db4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 10:30:23 -0400
Subject: [PATCH 033/122] YuzuCMD/Tester: Correct execution

---
 src/yuzu_cmd/yuzu.cpp    | 7 ++++---
 src/yuzu_tester/yuzu.cpp | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 1e5377840d..38ffdfbd33 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -236,9 +236,10 @@ int main(int argc, char** argv) {
     system.Renderer().Rasterizer().LoadDiskResources();
 
     std::thread render_thread([&emu_window] { emu_window->Present(); });
-    while (emu_window->IsOpen()) {
-        //system.RunLoop();
-    }
+    system.Run();
+    while (emu_window->IsOpen())
+        ;
+    system.Pause();
     render_thread.join();
 
     system.Shutdown();
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 1a45506d46..d62686dd27 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -255,9 +255,10 @@ int main(int argc, char** argv) {
     system.GPU().Start();
     system.Renderer().Rasterizer().LoadDiskResources();
 
-    while (!finished) {
-        //system.RunLoop();
-    }
+    system.Run();
+    while (!finished)
+        ;
+    system.Pause();
 
     detached_tasks.WaitForAllTasks();
     return return_value;

From 35f68628f23b2af8a90618b7470e03fd40fe3275 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 10:43:34 -0400
Subject: [PATCH 034/122] CPU_Manager: remove debugging code.

---
 src/core/cpu_manager.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 4948509923..ff2fe8eadc 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -78,10 +78,10 @@ void CpuManager::RunGuestThread() {
     }
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
-        LOG_CRITICAL(Core_ARM, "Running Guest Thread");
-        physical_core.Idle();
-        LOG_CRITICAL(Core_ARM, "Leaving Guest Thread");
-        // physical_core.Run();
+        if (!physical_core.IsInterrupted()) {
+            physical_core.Idle();
+            //physical_core.Run();
+        }
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
     }
@@ -91,7 +91,6 @@ void CpuManager::RunIdleThread() {
     auto& kernel = system.Kernel();
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
-        LOG_CRITICAL(Core_ARM, "Running Idle Thread");
         physical_core.Idle();
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
@@ -99,7 +98,6 @@ void CpuManager::RunIdleThread() {
 }
 
 void CpuManager::RunSuspendThread() {
-    LOG_CRITICAL(Core_ARM, "Suspending Thread Entered");
     auto& kernel = system.Kernel();
     {
         auto& sched = kernel.CurrentScheduler();
@@ -109,9 +107,7 @@ void CpuManager::RunSuspendThread() {
         auto core = kernel.GetCurrentHostThreadID();
         auto& scheduler = kernel.CurrentScheduler();
         Kernel::Thread* current_thread = scheduler.GetCurrentThread();
-        LOG_CRITICAL(Core_ARM, "Suspending Core {}", core);
         Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
-        LOG_CRITICAL(Core_ARM, "Unsuspending Core {}", core);
         ASSERT(scheduler.ContextSwitchPending());
         ASSERT(core == kernel.GetCurrentHostThreadID());
         scheduler.TryDoContextSwitch();

From 69d043490e546bdef831533322b21df192c8ce86 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 11:12:46 -0400
Subject: [PATCH 035/122] General: Setup yuzu threads' microprofile, naming and
 registry.

---
 src/core/core_timing.cpp      | 5 +++--
 src/core/cpu_manager.cpp      | 5 ++++-
 src/video_core/gpu_thread.cpp | 6 +++++-
 src/yuzu/bootmanager.cpp      | 4 +++-
 src/yuzu/main.cpp             | 2 ++
 src/yuzu_cmd/yuzu.cpp         | 6 ++++--
 src/yuzu_tester/yuzu.cpp      | 6 ++++--
 7 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index a3ce69790a..cc32a853b9 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -2,14 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "core/core_timing.h"
-
 #include <algorithm>
 #include <mutex>
 #include <string>
 #include <tuple>
 
 #include "common/assert.h"
+#include "common/microprofile.h"
+#include "core/core_timing.h"
 #include "core/core_timing_util.h"
 
 namespace Core::Timing {
@@ -44,6 +44,7 @@ CoreTiming::~CoreTiming() = default;
 
 void CoreTiming::ThreadEntry(CoreTiming& instance) {
     std::string name = "yuzu:HostTiming";
+    MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
     instance.on_thread_init();
     instance.ThreadLoop();
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index ff2fe8eadc..9b93371318 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/fiber.h"
+#include "common/microprofile.h"
 #include "common/thread.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
@@ -36,6 +37,7 @@ void CpuManager::Shutdown() {
     Pause(false);
     for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
         core_data[core].host_thread->join();
+        core_data[core].host_thread.reset();
     }
 }
 
@@ -80,7 +82,7 @@ void CpuManager::RunGuestThread() {
         auto& physical_core = kernel.CurrentPhysicalCore();
         if (!physical_core.IsInterrupted()) {
             physical_core.Idle();
-            //physical_core.Run();
+            // physical_core.Run();
         }
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
@@ -159,6 +161,7 @@ void CpuManager::RunThread(std::size_t core) {
     /// Initialization
     system.RegisterCoreThread(core);
     std::string name = "yuzu:CoreHostThread_" + std::to_string(core);
+    MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
     auto& data = core_data[core];
     data.enter_barrier = std::make_unique<Common::Event>();
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe062..323185bfcd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
 
 #include "common/assert.h"
 #include "common/microprofile.h"
+#include "common/thread.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
 #include "core/settings.h"
@@ -18,7 +19,10 @@ namespace VideoCommon::GPUThread {
 static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
                       Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
                       SynchState& state) {
-    MicroProfileOnThreadCreate("GpuThread");
+    std::string name = "yuzu:GPU";
+    MicroProfileOnThreadCreate(name.c_str());
+    Common::SetCurrentThreadName(name.c_str());
+    system.RegisterHostThread();
 
     // Wait for first GPU command before acquiring the window context
     while (state.queue.Empty())
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index e721c73dc8..fcac0db911 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -41,7 +41,9 @@ EmuThread::EmuThread() = default;
 EmuThread::~EmuThread() = default;
 
 void EmuThread::run() {
-    MicroProfileOnThreadCreate("EmuThread");
+    std::string name = "yuzu:EmuControlThread";
+    MicroProfileOnThreadCreate(name.c_str());
+    Common::SetCurrentThreadName(name.c_str());
 
     // Main process has been loaded. Make the context current to this thread and begin GPU and CPU
     // execution.
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 270cccc772..2d0d535c97 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -910,6 +910,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
         nullptr,                                     // E-Commerce
     });
 
+    system.RegisterHostThread();
+
     const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())};
 
     const auto drd_callout =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 38ffdfbd33..e6c6a839de 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <chrono>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -237,8 +238,9 @@ int main(int argc, char** argv) {
 
     std::thread render_thread([&emu_window] { emu_window->Present(); });
     system.Run();
-    while (emu_window->IsOpen())
-        ;
+    while (emu_window->IsOpen()) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
     system.Pause();
     render_thread.join();
 
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index d62686dd27..083667baf1 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <chrono>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -256,8 +257,9 @@ int main(int argc, char** argv) {
     system.Renderer().Rasterizer().LoadDiskResources();
 
     system.Run();
-    while (!finished)
-        ;
+    while (!finished) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
     system.Pause();
 
     detached_tasks.WaitForAllTasks();

From c482d3c84903b0afd88896dc46fdf1103b72ddaf Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 11:51:33 -0400
Subject: [PATCH 036/122] AudioCore: Use nanoseconds instead of cycles for
 buffer time.

---
 src/audio_core/stream.cpp | 10 +++++-----
 src/audio_core/stream.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ca98f8ea7..ad3c27e69b 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -59,11 +59,11 @@ Stream::State Stream::GetState() const {
     return state;
 }
 
-s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
+s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const {
     const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
-    const auto us =
-        std::chrono::microseconds((static_cast<u64>(num_samples) * 1000000) / sample_rate);
-    return Core::Timing::usToCycles(us);
+    const auto ns =
+        std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate);
+    return ns.count();
 }
 
 static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
@@ -105,7 +105,7 @@ void Stream::PlayNextBuffer() {
 
     sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
 
-    core_timing.ScheduleEvent(GetBufferReleaseCycles(*active_buffer), release_event, {});
+    core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {});
 }
 
 void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 1708a4d989..0663ce4354 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -96,7 +96,7 @@ private:
     void ReleaseActiveBuffer();
 
     /// Gets the number of core cycles when the specified buffer will be released
-    s64 GetBufferReleaseCycles(const Buffer& buffer) const;
+    s64 GetBufferReleaseNS(const Buffer& buffer) const;
 
     u32 sample_rate;                  ///< Sample rate of the stream
     Format format;                    ///< Format of the stream

From 52f469b266ddffed6f0f4a125a2b2fe14b04515f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 12:28:55 -0400
Subject: [PATCH 037/122] HostTiming: Pause the hardware clock on pause.

---
 src/common/wall_clock.cpp       | 4 ++++
 src/common/wall_clock.h         | 2 ++
 src/common/x64/native_clock.cpp | 7 +++++++
 src/common/x64/native_clock.h   | 2 ++
 src/core/core.cpp               | 2 +-
 src/core/core_timing.cpp        | 6 ++++++
 src/core/core_timing.h          | 1 +
 7 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index d4d35f4e7a..a46db6bbfb 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -53,6 +53,10 @@ public:
         return Common::Divide128On32(temporary, 1000000000).first;
     }
 
+    void Pause(bool is_paused) override {
+        // Do nothing in this clock type.
+    }
+
 private:
     base_time_point start_time;
 };
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index ed284cf502..367d721348 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -28,6 +28,8 @@ public:
     /// Returns current wall time in emulated cpu cycles
     virtual u64 GetCPUCycles() = 0;
 
+    virtual void Pause(bool is_paused) = 0;
+
     /// Tells if the wall clock, uses the host CPU's hardware clock
     bool IsNative() const {
         return is_native;
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 26d4d0ba68..926f92ff86 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -65,6 +65,13 @@ u64 NativeClock::GetRTSC() {
     return accumulated_ticks;
 }
 
+void NativeClock::Pause(bool is_paused) {
+    if (!is_paused) {
+        _mm_mfence();
+        last_measure = __rdtsc();
+    }
+}
+
 std::chrono::nanoseconds NativeClock::GetTimeNS() {
     const u64 rtsc_value = GetRTSC();
     return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index b58cf9f5a4..3851f8fc22 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -26,6 +26,8 @@ public:
 
     u64 GetCPUCycles() override;
 
+    void Pause(bool is_paused) override;
+
 private:
     u64 GetRTSC();
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index e8936b09d5..1d6179a80e 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -137,8 +137,8 @@ struct System::Impl {
     ResultStatus Pause() {
         status = ResultStatus::Success;
 
-        kernel.Suspend(true);
         core_timing.SyncPause(true);
+        kernel.Suspend(true);
         cpu_manager.Pause(true);
 
         return status;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index cc32a853b9..5a7abcfca9 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -77,6 +77,9 @@ void CoreTiming::SyncPause(bool is_paused) {
         return;
     }
     Pause(is_paused);
+    if (!is_paused) {
+        pause_event.Set();
+    }
     event.Set();
     while (paused_set != is_paused)
         ;
@@ -197,6 +200,9 @@ void CoreTiming::ThreadLoop() {
             wait_set = false;
         }
         paused_set = true;
+        clock->Pause(true);
+        pause_event.Wait();
+        clock->Pause(false);
     }
 }
 
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 707c8ef0c2..c70b605c89 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -136,6 +136,7 @@ private:
 
     std::shared_ptr<EventType> ev_lost;
     Common::Event event{};
+    Common::Event pause_event{};
     Common::SpinLock basic_lock{};
     Common::SpinLock advance_lock{};
     std::unique_ptr<std::thread> timer_thread;

From 12f69fa598a879a048fa46ea2e1ab95adddc0677 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 12:40:33 -0400
Subject: [PATCH 038/122] SVC: Correct CreateThread, StartThread, ExitThread,
 SleepThread.

---
 src/core/hle/kernel/svc.cpp    | 18 +++++-----------
 src/core/hle/kernel/thread.cpp | 38 ++++++++++++++++------------------
 src/core/hle/kernel/thread.h   | 12 +++++++----
 3 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index d7f0dcabd1..dfb032b4b6 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1464,13 +1464,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
 
     ASSERT(thread->GetStatus() == ThreadStatus::Dormant);
 
-    thread->ResumeFromWait();
-
-    if (thread->GetStatus() == ThreadStatus::Ready) {
-        system.PrepareReschedule(thread->GetProcessorID());
-    }
-
-    return RESULT_SUCCESS;
+    return thread->Start();
 }
 
 /// Called when a thread exits
@@ -1478,9 +1472,8 @@ static void ExitThread(Core::System& system) {
     LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
 
     auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
-    current_thread->Stop();
     system.GlobalScheduler().RemoveThread(SharedFrom(current_thread));
-    system.PrepareReschedule();
+    current_thread->Stop();
 }
 
 /// Sleep the current thread
@@ -1500,13 +1493,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            is_redundant = current_thread->YieldSimple();
+            current_thread->YieldSimple();
             break;
         case SleepType::YieldWithLoadBalancing:
-            is_redundant = current_thread->YieldAndBalanceLoad();
+            current_thread->YieldAndBalanceLoad();
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            is_redundant = current_thread->YieldAndWaitForLoadBalancing();
+            current_thread->YieldAndWaitForLoadBalancing();
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1514,7 +1507,6 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     } else {
         current_thread->Sleep(nanoseconds);
     }
-    system.PrepareReschedule(current_thread->GetProcessorID());
 }
 
 /// Wait process wide key atomic
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 8cb3593dbe..d9e6102722 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -56,12 +56,6 @@ void Thread::Stop() {
     SetStatus(ThreadStatus::Dead);
     Signal();
 
-    // Clean up any dangling references in objects that this thread was waiting for
-    for (auto& wait_object : wait_objects) {
-        wait_object->RemoveWaitingThread(SharedFrom(this));
-    }
-    wait_objects.clear();
-
     owner_process->UnregisterThread(this);
 
     // Mark the TLS slot in the thread's page as free.
@@ -138,6 +132,12 @@ void Thread::OnWakeUp() {
     SetStatus(ThreadStatus::Ready);
 }
 
+ResultCode Thread::Start() {
+    SchedulerLock lock(kernel);
+    SetStatus(ThreadStatus::Ready);
+    return RESULT_SUCCESS;
+}
+
 void Thread::CancelWait() {
     if (GetSchedulingStatus() != ThreadSchedStatus::Paused) {
         is_sync_cancelled = true;
@@ -188,7 +188,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
                                                   void* thread_start_parameter) {
     auto& kernel = system.Kernel();
     // Check if priority is in ranged. Lowest priority -> highest priority id.
-    if (priority > THREADPRIO_LOWEST && (type_flags & THREADTYPE_IDLE == 0)) {
+    if (priority > THREADPRIO_LOWEST && ((type_flags & THREADTYPE_IDLE) == 0)) {
         LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
         return ERR_INVALID_THREAD_PRIORITY;
     }
@@ -416,7 +416,7 @@ void Thread::SetActivity(ThreadActivity value) {
     }
 }
 
-void Thread::Sleep(s64 nanoseconds) {
+ResultCode Thread::Sleep(s64 nanoseconds) {
     Handle event_handle{};
     {
         SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
@@ -427,33 +427,31 @@ void Thread::Sleep(s64 nanoseconds) {
         auto& time_manager = kernel.TimeManager();
         time_manager.UnscheduleTimeEvent(event_handle);
     }
+    return RESULT_SUCCESS;
 }
 
-bool Thread::YieldSimple() {
-    bool result{};
+ResultCode Thread::YieldSimple() {
     {
         SchedulerLock lock(kernel);
-        result = kernel.GlobalScheduler().YieldThread(this);
+        kernel.GlobalScheduler().YieldThread(this);
     }
-    return result;
+    return RESULT_SUCCESS;
 }
 
-bool Thread::YieldAndBalanceLoad() {
-    bool result{};
+ResultCode Thread::YieldAndBalanceLoad() {
     {
         SchedulerLock lock(kernel);
-        result = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
+        kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
     }
-    return result;
+    return RESULT_SUCCESS;
 }
 
-bool Thread::YieldAndWaitForLoadBalancing() {
-    bool result{};
+ResultCode Thread::YieldAndWaitForLoadBalancing() {
     {
         SchedulerLock lock(kernel);
-        result = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
+        kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
     }
-    return result;
+    return RESULT_SUCCESS;
 }
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 33d340b47f..78a4357b00 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -236,6 +236,8 @@ public:
 
     void OnWakeUp();
 
+    ResultCode Start();
+
     /// Cancels a waiting operation that this thread may or may not be within.
     ///
     /// When the thread is within a waiting state, this will set the thread's
@@ -470,16 +472,16 @@ public:
     void SetActivity(ThreadActivity value);
 
     /// Sleeps this thread for the given amount of nanoseconds.
-    void Sleep(s64 nanoseconds);
+    ResultCode Sleep(s64 nanoseconds);
 
     /// Yields this thread without rebalancing loads.
-    bool YieldSimple();
+    ResultCode YieldSimple();
 
     /// Yields this thread and does a load rebalancing.
-    bool YieldAndBalanceLoad();
+    ResultCode YieldAndBalanceLoad();
 
     /// Yields this thread and if the core is left idle, loads are rebalanced
-    bool YieldAndWaitForLoadBalancing();
+    ResultCode YieldAndWaitForLoadBalancing();
 
     void IncrementYieldCount() {
         yield_count++;
@@ -603,6 +605,8 @@ private:
     bool is_running = false;
     bool is_sync_cancelled = false;
 
+    bool will_be_terminated{};
+
     std::string name;
 };
 

From e9fe507da960f39e88180d0ac903bd8accea924e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 13:22:11 -0400
Subject: [PATCH 039/122] SVC: Correct GetThreadPriority, SetThreadPriority,
 GetThreadCoreMask, SetThreadCoreMask, GetCurrentProcessorNumber

---
 src/core/core.cpp              |  8 ++++++++
 src/core/core.h                |  7 +++++++
 src/core/hle/kernel/svc.cpp    | 17 ++++++++---------
 src/core/hle/kernel/thread.cpp |  6 ++----
 src/core/hle/kernel/thread.h   |  3 +--
 5 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 1d6179a80e..5d4ecdce5a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -467,6 +467,14 @@ const Kernel::Scheduler& System::CurrentScheduler() const {
     return impl->CurrentPhysicalCore().Scheduler();
 }
 
+Kernel::PhysicalCore& System::CurrentPhysicalCore() {
+    return impl->CurrentPhysicalCore();
+}
+
+const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
+    return impl->CurrentPhysicalCore();
+}
+
 Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
     return impl->GetPhysicalCore(core_index).Scheduler();
 }
diff --git a/src/core/core.h b/src/core/core.h
index 7f170fc54e..9a0dd10753 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -27,6 +27,7 @@ class VfsFilesystem;
 namespace Kernel {
 class GlobalScheduler;
 class KernelCore;
+class PhysicalCore;
 class Process;
 class Scheduler;
 } // namespace Kernel
@@ -211,6 +212,12 @@ public:
     /// Gets the scheduler for the CPU core that is currently running
     const Kernel::Scheduler& CurrentScheduler() const;
 
+    /// Gets the physical core for the CPU core that is currently running
+    Kernel::PhysicalCore& CurrentPhysicalCore();
+
+    /// Gets the physical core for the CPU core that is currently running
+    const Kernel::PhysicalCore& CurrentPhysicalCore() const;
+
     /// Gets a reference to an ARM interface for the CPU core with the specified index
     ARM_Interface& ArmInterface(std::size_t core_index);
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index dfb032b4b6..2a218e2942 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -27,6 +27,7 @@
 #include "core/hle/kernel/memory/memory_block.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/resource_limit.h"
@@ -1071,6 +1072,7 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle);
     if (!thread) {
+        *priority = 0;
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
         return ERR_INVALID_HANDLE;
     }
@@ -1105,14 +1107,13 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
 
     thread->SetPriority(priority);
 
-    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
 /// Get which CPU core is executing the current thread
 static u32 GetCurrentProcessorNumber(Core::System& system) {
     LOG_TRACE(Kernel_SVC, "called");
-    return system.CurrentScheduler().GetCurrentThread()->GetProcessorID();
+    return static_cast<u32>(system.CurrentPhysicalCore().CoreIndex());
 }
 
 static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
@@ -1430,8 +1431,8 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
 
     ThreadType type = THREADTYPE_USER;
     CASCADE_RESULT(std::shared_ptr<Thread> thread,
-                   Thread::Create(system, type, "", entry_point, priority, arg, processor_id, stack_top,
-                                  current_process));
+                   Thread::Create(system, type, "", entry_point, priority, arg, processor_id,
+                                  stack_top, current_process));
 
     const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
     if (new_thread_handle.Failed()) {
@@ -1804,6 +1805,8 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
     if (!thread) {
         LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
                   thread_handle);
+        *core = 0;
+        *mask = 0;
         return ERR_INVALID_HANDLE;
     }
 
@@ -1866,11 +1869,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
         return ERR_INVALID_HANDLE;
     }
 
-    system.PrepareReschedule(thread->GetProcessorID());
-    thread->ChangeCore(core, affinity_mask);
-    system.PrepareReschedule(thread->GetProcessorID());
-
-    return RESULT_SUCCESS;
+    return thread->SetCoreAndAffinityMask(core, affinity_mask);
 }
 
 static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d9e6102722..e6bb7c6663 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -250,6 +250,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
 }
 
 void Thread::SetPriority(u32 priority) {
+    SchedulerLock lock(kernel);
     ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
                "Invalid priority value.");
     nominal_priority = priority;
@@ -383,10 +384,6 @@ void Thread::UpdatePriority() {
     lock_owner->UpdatePriority();
 }
 
-void Thread::ChangeCore(u32 core, u64 mask) {
-    SetCoreAndAffinityMask(core, mask);
-}
-
 bool Thread::AllSynchronizationObjectsReady() const {
     return std::none_of(wait_objects.begin(), wait_objects.end(),
                         [this](const std::shared_ptr<SynchronizationObject>& object) {
@@ -467,6 +464,7 @@ void Thread::SetCurrentPriority(u32 new_priority) {
 }
 
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
+    SchedulerLock lock(kernel);
     const auto HighestSetCore = [](u64 mask, u32 max_cores) {
         for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) {
             if (((mask >> core) & 1) != 0) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 78a4357b00..29fe5483b4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -221,7 +221,7 @@ public:
     void UpdatePriority();
 
     /// Changes the core that the thread is running or scheduled to run on.
-    void ChangeCore(u32 core, u64 mask);
+    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
 
     /**
      * Gets the thread's thread ID
@@ -522,7 +522,6 @@ private:
 
     void SetSchedulingStatus(ThreadSchedStatus new_status);
     void SetCurrentPriority(u32 new_priority);
-    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
 
     void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
 

From 2dded4e40be3685cc03905d49b89b8e6769c14d9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 13:24:29 -0400
Subject: [PATCH 040/122] SVC: Remove global HLE Lock.

---
 src/core/hle/kernel/svc.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 2a218e2942..a071b0c09a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2410,9 +2410,6 @@ MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 void Call(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
-    // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard lock{HLE::g_hle_lock};
-
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
     if (info) {

From beb8887db80222bdf849933986af02208ea8e2f4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 16:38:33 -0400
Subject: [PATCH 041/122] SVC: Correct SignalEvent, ClearEvent, ResetSignal,
 WaitSynchronization, CancelSynchronization, ArbitrateLock

---
 src/core/hle/kernel/mutex.cpp                |  71 +++++++-----
 src/core/hle/kernel/process.cpp              |   1 +
 src/core/hle/kernel/readable_event.cpp       |   3 +
 src/core/hle/kernel/svc.cpp                  |   1 -
 src/core/hle/kernel/synchronization.cpp      | 116 ++++++++++---------
 src/core/hle/kernel/synchronization_object.h |   5 +-
 src/core/hle/kernel/thread.cpp               |  14 ++-
 src/core/hle/kernel/thread.h                 |  17 ++-
 8 files changed, 136 insertions(+), 92 deletions(-)

diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 7869eb32b6..3520c5e498 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -72,42 +72,55 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
         return ERR_INVALID_ADDRESS;
     }
 
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto& kernel = system.Kernel();
     std::shared_ptr<Thread> current_thread =
-        SharedFrom(system.CurrentScheduler().GetCurrentThread());
-    std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
-    std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
+        SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+    {
+        SchedulerLock lock(kernel);
+        // The mutex address must be 4-byte aligned
+        if ((address % sizeof(u32)) != 0) {
+            return ERR_INVALID_ADDRESS;
+        }
 
-    // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
-    // thread.
-    ASSERT(requesting_thread == current_thread);
+        const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+        std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
+        std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
 
-    const u32 addr_value = system.Memory().Read32(address);
+        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
+        // thread.
+        ASSERT(requesting_thread == current_thread);
 
-    // If the mutex isn't being held, just return success.
-    if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
-        return RESULT_SUCCESS;
+        current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
+
+        const u32 addr_value = system.Memory().Read32(address);
+
+        // If the mutex isn't being held, just return success.
+        if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
+            return RESULT_SUCCESS;
+        }
+
+        if (holding_thread == nullptr) {
+            return ERR_INVALID_HANDLE;
+        }
+
+        // Wait until the mutex is released
+        current_thread->SetMutexWaitAddress(address);
+        current_thread->SetWaitHandle(requesting_thread_handle);
+
+        current_thread->SetStatus(ThreadStatus::WaitMutex);
+
+        // Update the lock holder thread's priority to prevent priority inversion.
+        holding_thread->AddMutexWaiter(current_thread);
     }
 
-    if (holding_thread == nullptr) {
-        LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}",
-                  holding_thread_handle);
-        return ERR_INVALID_HANDLE;
+    {
+        SchedulerLock lock(kernel);
+        auto* owner = current_thread->GetLockOwner();
+        if (owner != nullptr) {
+            owner->RemoveMutexWaiter(current_thread);
+        }
     }
-
-    // Wait until the mutex is released
-    current_thread->SetMutexWaitAddress(address);
-    current_thread->SetWaitHandle(requesting_thread_handle);
-
-    current_thread->SetStatus(ThreadStatus::WaitMutex);
-    current_thread->InvalidateWakeupCallback();
-
-    // Update the lock holder thread's priority to prevent priority inversion.
-    holding_thread->AddMutexWaiter(current_thread);
-
-    system.PrepareReschedule();
-
-    return RESULT_SUCCESS;
+    return current_thread->GetSignalingResult();
 }
 
 ResultCode Mutex::Release(VAddr address) {
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index b9719389ee..174713a5a6 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -210,6 +210,7 @@ void Process::UnregisterThread(const Thread* thread) {
 }
 
 ResultCode Process::ClearSignalState() {
+    SchedulerLock lock(system.Kernel());
     if (status == ProcessStatus::Exited) {
         LOG_ERROR(Kernel, "called on a terminated process instance.");
         return ERR_INVALID_STATE;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index ef5e19e63b..6e286419e9 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -6,8 +6,10 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/readable_event.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 
 namespace Kernel {
@@ -37,6 +39,7 @@ void ReadableEvent::Clear() {
 }
 
 ResultCode ReadableEvent::Reset() {
+    SchedulerLock lock(kernel);
     if (!is_signaled) {
         LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
                   GetObjectId(), GetTypeName(), GetName());
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index a071b0c09a..0d905c0ca5 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -448,7 +448,6 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
     }
 
     thread->CancelWait();
-    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index dc37fad1a7..b36e550a04 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -10,78 +10,88 @@
 #include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
 
 namespace Kernel {
 
-/// Default thread wakeup callback for WaitSynchronization
-static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                        std::shared_ptr<SynchronizationObject> object,
-                                        std::size_t index) {
-    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-
-    if (reason == ThreadWakeupReason::Timeout) {
-        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
-        return true;
-    }
-
-    ASSERT(reason == ThreadWakeupReason::Signal);
-    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
-    thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
-    return true;
-}
-
 Synchronization::Synchronization(Core::System& system) : system{system} {}
 
 void Synchronization::SignalObject(SynchronizationObject& obj) const {
+    SchedulerLock lock(system.Kernel());
     if (obj.IsSignaled()) {
-        obj.WakeupAllWaitingThreads();
+        for (auto thread : obj.GetWaitingThreads()) {
+            if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
+                thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
+                thread->ResumeFromWait();
+            }
+        }
     }
 }
 
 std::pair<ResultCode, Handle> Synchronization::WaitFor(
     std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
+    auto& kernel = system.Kernel();
     auto* const thread = system.CurrentScheduler().GetCurrentThread();
-    // Find the first object that is acquirable in the provided list of objects
-    const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(),
-                                  [thread](const std::shared_ptr<SynchronizationObject>& object) {
-                                      return object->IsSignaled();
-                                  });
+    Handle event_handle = InvalidHandle;
+    {
+        SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
+        const auto itr =
+            std::find_if(sync_objects.begin(), sync_objects.end(),
+                         [thread](const std::shared_ptr<SynchronizationObject>& object) {
+                             return object->IsSignaled();
+                         });
 
-    if (itr != sync_objects.end()) {
-        // We found a ready object, acquire it and set the result value
-        SynchronizationObject* object = itr->get();
-        object->Acquire(thread);
-        const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-        return {RESULT_SUCCESS, index};
+        if (itr != sync_objects.end()) {
+            // We found a ready object, acquire it and set the result value
+            SynchronizationObject* object = itr->get();
+            object->Acquire(thread);
+            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+            lock.CancelSleep();
+            return {RESULT_SUCCESS, index};
+        }
+
+        if (nano_seconds == 0) {
+            lock.CancelSleep();
+            return {RESULT_TIMEOUT, InvalidHandle};
+        }
+
+        /// TODO(Blinkhawk): Check for termination pending
+
+        if (thread->IsSyncCancelled()) {
+            thread->SetSyncCancelled(false);
+            lock.CancelSleep();
+            return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+        }
+
+        for (auto& object : sync_objects) {
+            object->AddWaitingThread(SharedFrom(thread));
+        }
+        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+        thread->SetStatus(ThreadStatus::WaitSynch);
     }
 
-    // No objects were ready to be acquired, prepare to suspend the thread.
-
-    // If a timeout value of 0 was provided, just return the Timeout error code instead of
-    // suspending the thread.
-    if (nano_seconds == 0) {
-        return {RESULT_TIMEOUT, InvalidHandle};
+    if (event_handle != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(event_handle);
     }
 
-    if (thread->IsSyncCancelled()) {
-        thread->SetSyncCancelled(false);
-        return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+    {
+        SchedulerLock lock(kernel);
+        ResultCode signaling_result = thread->GetSignalingResult();
+        SynchronizationObject* signaling_object = thread->GetSignalingObject();
+        if (signaling_result == RESULT_SUCCESS) {
+            const auto itr = std::find_if(
+                sync_objects.begin(), sync_objects.end(),
+                [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
+                    return object.get() == signaling_object;
+                });
+            ASSERT(itr != sync_objects.end());
+            signaling_object->Acquire(thread);
+            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+            return {RESULT_SUCCESS, index};
+        }
+        return {signaling_result, -1};
     }
-
-    for (auto& object : sync_objects) {
-        object->AddWaitingThread(SharedFrom(thread));
-    }
-
-    thread->SetSynchronizationObjects(std::move(sync_objects));
-    thread->SetStatus(ThreadStatus::WaitSynch);
-
-    // Create an event to wake the thread up after the specified nanosecond delay has passed
-    thread->WakeAfterDelay(nano_seconds);
-    thread->SetWakeupCallback(DefaultThreadWakeupCallback);
-
-    system.PrepareReschedule(thread->GetProcessorID());
-
-    return {RESULT_TIMEOUT, InvalidHandle};
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index 741c31faf1..0a0d069e0e 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -12,6 +12,7 @@
 namespace Kernel {
 
 class KernelCore;
+class Synchronization;
 class Thread;
 
 /// Class that represents a Kernel object that a thread can be waiting on
@@ -53,7 +54,7 @@ public:
      * Wake up all threads waiting on this object that can be awoken, in priority order,
      * and set the synchronization result and output of the thread.
      */
-    void WakeupAllWaitingThreads();
+    void /* deprecated */ WakeupAllWaitingThreads();
 
     /**
      * Wakes up a single thread waiting on this object.
@@ -62,7 +63,7 @@ public:
     void WakeupWaitingThread(std::shared_ptr<Thread> thread);
 
     /// Obtains the highest priority thread that is ready to run from this object's waiting list.
-    std::shared_ptr<Thread> GetHighestPriorityReadyThread() const;
+    std::shared_ptr<Thread> /* deprecated */ GetHighestPriorityReadyThread() const;
 
     /// Get a const reference to the waiting threads list for debug use
     const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index e6bb7c6663..5fef3945be 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -139,12 +139,13 @@ ResultCode Thread::Start() {
 }
 
 void Thread::CancelWait() {
+    SchedulerLock lock(kernel);
     if (GetSchedulingStatus() != ThreadSchedStatus::Paused) {
         is_sync_cancelled = true;
         return;
     }
     is_sync_cancelled = false;
-    SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED);
+    SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
     ResumeFromWait();
 }
 
@@ -258,13 +259,16 @@ void Thread::SetPriority(u32 priority) {
 }
 
 void Thread::SetWaitSynchronizationResult(ResultCode result) {
-    context_32.cpu_registers[0] = result.raw;
-    context_64.cpu_registers[0] = result.raw;
+    UNREACHABLE();
 }
 
 void Thread::SetWaitSynchronizationOutput(s32 output) {
-    context_32.cpu_registers[1] = output;
-    context_64.cpu_registers[1] = output;
+    UNREACHABLE();
+}
+
+void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
+    signaling_object = object;
+    signaling_result = result;
 }
 
 s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 29fe5483b4..a8ae1a66f2 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -259,13 +259,23 @@ public:
      * Sets the result after the thread awakens (from svcWaitSynchronization)
      * @param result Value to set to the returned result
      */
-    void SetWaitSynchronizationResult(ResultCode result);
+    void /*deprecated*/ SetWaitSynchronizationResult(ResultCode result);
 
     /**
      * Sets the output parameter value after the thread awakens (from svcWaitSynchronization)
      * @param output Value to set to the output parameter
      */
-    void SetWaitSynchronizationOutput(s32 output);
+    void /*deprecated*/ SetWaitSynchronizationOutput(s32 output);
+
+    void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
+
+    SynchronizationObject* GetSignalingObject() const {
+        return signaling_object;
+    }
+
+    ResultCode GetSignalingResult() const {
+        return signaling_result;
+    }
 
     /**
      * Retrieves the index that this particular object occupies in the list of objects
@@ -565,6 +575,9 @@ private:
     /// passed to WaitSynchronization.
     ThreadSynchronizationObjects wait_objects;
 
+    SynchronizationObject* signaling_object;
+    ResultCode signaling_result{RESULT_SUCCESS};
+
     /// List of threads that are waiting for a mutex that is held by this thread.
     MutexWaitingThreads wait_mutex_threads;
 

From 25455d992648aa3e915f96502d91c3f356a9d6c3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 17:37:12 -0400
Subject: [PATCH 042/122] SVC: Correct ArbitrateUnlock

---
 src/core/hle/kernel/mutex.cpp  | 73 +++++++++++++++++-----------------
 src/core/hle/kernel/mutex.h    |  3 ++
 src/core/hle/kernel/thread.cpp |  2 +-
 3 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 3520c5e498..18325db575 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -84,10 +84,11 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
 
         const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
         std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
-        std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
+        std::shared_ptr<Thread> requesting_thread =
+            handle_table.Get<Thread>(requesting_thread_handle);
 
-        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
-        // thread.
+        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of
+        // another thread.
         ASSERT(requesting_thread == current_thread);
 
         current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
@@ -123,47 +124,47 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
     return current_thread->GetSignalingResult();
 }
 
-ResultCode Mutex::Release(VAddr address) {
-    // The mutex address must be 4-byte aligned
-    if ((address % sizeof(u32)) != 0) {
-        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-        return ERR_INVALID_ADDRESS;
-    }
+std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
+                                                             VAddr address) {
+     // The mutex address must be 4-byte aligned
+     if ((address % sizeof(u32)) != 0) {
+         LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
+         return {ERR_INVALID_ADDRESS, nullptr};
+     }
 
-    std::shared_ptr<Thread> current_thread =
-        SharedFrom(system.CurrentScheduler().GetCurrentThread());
-    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
-
-    // There are no more threads waiting for the mutex, release it completely.
-    if (thread == nullptr) {
+    auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
+    if (new_owner == nullptr) {
         system.Memory().Write32(address, 0);
-        return RESULT_SUCCESS;
+        return {RESULT_SUCCESS, nullptr};
     }
-
     // Transfer the ownership of the mutex from the previous owner to the new one.
-    TransferMutexOwnership(address, current_thread, thread);
-
-    u32 mutex_value = thread->GetWaitHandle();
-
+    TransferMutexOwnership(address, owner, new_owner);
+    u32 mutex_value = new_owner->GetWaitHandle();
     if (num_waiters >= 2) {
         // Notify the guest that there are still some threads waiting for the mutex
         mutex_value |= Mutex::MutexHasWaitersFlag;
     }
-
-    // Grant the mutex to the next waiting thread and resume it.
+    new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
+    new_owner->ResumeFromWait();
+    new_owner->SetLockOwner(nullptr);
     system.Memory().Write32(address, mutex_value);
-
-    ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
-    thread->ResumeFromWait();
-
-    thread->SetLockOwner(nullptr);
-    thread->SetCondVarWaitAddress(0);
-    thread->SetMutexWaitAddress(0);
-    thread->SetWaitHandle(0);
-    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
-
-    system.PrepareReschedule();
-
-    return RESULT_SUCCESS;
+    return {RESULT_SUCCESS, new_owner};
 }
+
+ResultCode Mutex::Release(VAddr address) {
+    auto& kernel = system.Kernel();
+    SchedulerLock lock(kernel);
+
+    std::shared_ptr<Thread> current_thread =
+        SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+
+    auto [result, new_owner] = Unlock(current_thread, address);
+
+    if (result != RESULT_SUCCESS && new_owner != nullptr) {
+        new_owner->SetSynchronizationResults(nullptr, result);
+    }
+
+    return result;
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index b904de2e82..bce06ecea9 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -28,6 +28,9 @@ public:
     ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
                           Handle requesting_thread_handle);
 
+    /// Unlocks a mutex for owner at address
+    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner, VAddr address);
+
     /// Releases the mutex at the specified address.
     ResultCode Release(VAddr address);
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 5fef3945be..f100ffc70d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -80,7 +80,7 @@ void Thread::CancelWakeupTimer() {
 
 void Thread::ResumeFromWait() {
     ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
-
+    SchedulerLock lock(kernel);
     switch (status) {
     case ThreadStatus::Paused:
     case ThreadStatus::WaitSynch:

From 666e7033e7f42ceb62780ddbc07b7df071c0e954 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Feb 2020 19:43:28 -0400
Subject: [PATCH 043/122] SVC: Correct SendSyncRequest.

---
 src/core/cpu_manager.cpp               |  3 +-
 src/core/hle/kernel/hle_ipc.cpp        | 24 ++++----
 src/core/hle/kernel/scheduler.cpp      |  9 +++
 src/core/hle/kernel/scheduler.h        |  2 +
 src/core/hle/kernel/server_session.cpp | 15 +++--
 src/core/hle/kernel/svc.cpp            | 21 +++++--
 src/core/hle/kernel/thread.cpp         | 14 +++--
 src/core/hle/kernel/thread.h           | 82 +++++++++++++++++---------
 8 files changed, 116 insertions(+), 54 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 9b93371318..6032cb0bf1 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -81,8 +81,7 @@ void CpuManager::RunGuestThread() {
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
         if (!physical_core.IsInterrupted()) {
-            physical_core.Idle();
-            // physical_core.Run();
+            physical_core.Run();
         }
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index ba0eac4c27..5917640caa 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,6 +14,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/kernel.h"
@@ -21,7 +22,9 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"
 
@@ -46,11 +49,10 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
     const std::string& reason, u64 timeout, WakeupCallback&& callback,
     std::shared_ptr<WritableEvent> writable_event) {
     // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->SetWakeupCallback(
+    thread->SetHLECallback(
         [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                                     std::shared_ptr<SynchronizationObject> object,
                                     std::size_t index) mutable -> bool {
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
             callback(thread, context, reason);
             context.WriteToOutgoingCommandBuffer(*thread);
             return true;
@@ -62,14 +64,16 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
         writable_event = pair.writable;
     }
 
-    const auto readable_event{writable_event->GetReadableEvent()};
-    writable_event->Clear();
-    thread->SetStatus(ThreadStatus::WaitHLEEvent);
-    thread->SetSynchronizationObjects({readable_event});
-    readable_event->AddWaitingThread(thread);
-
-    if (timeout > 0) {
-        thread->WakeAfterDelay(timeout);
+    {
+        Handle event_handle = InvalidHandle;
+        SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
+        const auto readable_event{writable_event->GetReadableEvent()};
+        writable_event->Clear();
+        thread->SetStatus(ThreadStatus::WaitHLEEvent);
+        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+        readable_event->AddWaitingThread(thread);
+        lock.Release();
+        thread->SetHLETimeEvent(event_handle);
     }
 
     is_thread_waiting = true;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5166020a00..0e85ee69ed 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -715,4 +715,13 @@ SchedulerLockAndSleep::~SchedulerLockAndSleep() {
     time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
 }
 
+void SchedulerLockAndSleep::Release() {
+    if (sleep_cancelled) {
+        return;
+    }
+    auto& time_manager = kernel.TimeManager();
+    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
+    sleep_cancelled = true;
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 16655b03fe..f5f64338f6 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -279,6 +279,8 @@ public:
         sleep_cancelled = true;
     }
 
+    void Release();
+
 private:
     Handle& event_handle;
     Thread* time_task;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 25438b86bb..05516a453f 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -19,6 +19,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/memory.h"
 
@@ -168,9 +169,12 @@ ResultCode ServerSession::CompleteSyncRequest() {
     }
 
     // Some service requests require the thread to block
-    if (!context.IsThreadWaiting()) {
-        context.GetThread().ResumeFromWait();
-        context.GetThread().SetWaitSynchronizationResult(result);
+    {
+        SchedulerLock lock(kernel);
+        if (!context.IsThreadWaiting()) {
+            context.GetThread().ResumeFromWait();
+            context.GetThread().SetSynchronizationResults(nullptr, result);
+        }
     }
 
     request_queue.Pop();
@@ -180,8 +184,9 @@ ResultCode ServerSession::CompleteSyncRequest() {
 
 ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
                                             Core::Memory::Memory& memory) {
-    Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {});
-    return QueueSyncRequest(std::move(thread), memory);
+    ResultCode result = QueueSyncRequest(std::move(thread), memory);
+    Core::System::GetInstance().CoreTiming().ScheduleEvent(0, request_event, {});
+    return result;
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 0d905c0ca5..768d72b92b 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -38,6 +38,7 @@
 #include "core/hle/kernel/svc_wrap.h"
 #include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/lock.h"
@@ -318,11 +319,23 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
     LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
 
     auto thread = system.CurrentScheduler().GetCurrentThread();
-    thread->InvalidateWakeupCallback();
-    thread->SetStatus(ThreadStatus::WaitIPC);
-    system.PrepareReschedule(thread->GetProcessorID());
+    {
+        SchedulerLock lock(system.Kernel());
+        thread->InvalidateHLECallback();
+        thread->SetStatus(ThreadStatus::WaitIPC);
+        session->SendSyncRequest(SharedFrom(thread), system.Memory());
+    }
+    ResultCode result = thread->GetSignalingResult();
+    if (thread->HasHLECallback()) {
+        Handle event_handle = thread->GetHLETimeEvent();
+        if (event_handle != InvalidHandle) {
+            auto& time_manager = system.Kernel().TimeManager();
+            time_manager.UnscheduleTimeEvent(event_handle);
+        }
+        thread->InvokeHLECallback(ThreadWakeupReason::Timeout, SharedFrom(thread), nullptr, 0);
+    }
 
-    return session->SendSyncRequest(SharedFrom(thread), system.Memory());
+    return result;
 }
 
 static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index f100ffc70d..fb97535a3b 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -96,7 +96,7 @@ void Thread::ResumeFromWait() {
     case ThreadStatus::Ready:
         // The thread's wakeup callback must have already been cleared when the thread was first
         // awoken.
-        ASSERT(wakeup_callback == nullptr);
+        ASSERT(hle_callback == nullptr);
         // If the thread is waiting on multiple wait objects, it might be awoken more than once
         // before actually resuming. We can ignore subsequent wakeups if the thread status has
         // already been set to ThreadStatus::Ready.
@@ -112,7 +112,7 @@ void Thread::ResumeFromWait() {
         return;
     }
 
-    wakeup_callback = nullptr;
+    hle_callback = nullptr;
 
     if (activity == ThreadActivity::Paused) {
         SetStatus(ThreadStatus::Paused);
@@ -398,8 +398,14 @@ bool Thread::AllSynchronizationObjectsReady() const {
 bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                                   std::shared_ptr<SynchronizationObject> object,
                                   std::size_t index) {
-    ASSERT(wakeup_callback);
-    return wakeup_callback(reason, std::move(thread), std::move(object), index);
+    ASSERT(hle_callback);
+    return hle_callback(reason, std::move(thread), std::move(object), index);
+}
+
+bool Thread::InvokeHLECallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                               std::shared_ptr<SynchronizationObject> object, std::size_t index) {
+    ASSERT(hle_callback);
+    return hle_callback(reason, std::move(thread), std::move(object), index);
 }
 
 void Thread::SetActivity(ThreadActivity value) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index a8ae1a66f2..04496f96ed 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -31,12 +31,12 @@ class Process;
 class Scheduler;
 
 enum ThreadPriority : u32 {
-    THREADPRIO_HIGHEST = 0,             ///< Highest thread priority
-    THREADPRIO_MAX_CORE_MIGRATION = 2,  ///< Highest priority for a core migration
-    THREADPRIO_USERLAND_MAX = 24,       ///< Highest thread priority for userland apps
-    THREADPRIO_DEFAULT = 44,            ///< Default thread priority for userland apps
-    THREADPRIO_LOWEST = 63,             ///< Lowest thread priority
-    THREADPRIO_COUNT = 64,              ///< Total number of possible thread priorities.
+    THREADPRIO_HIGHEST = 0,            ///< Highest thread priority
+    THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration
+    THREADPRIO_USERLAND_MAX = 24,      ///< Highest thread priority for userland apps
+    THREADPRIO_DEFAULT = 44,           ///< Default thread priority for userland apps
+    THREADPRIO_LOWEST = 63,            ///< Lowest thread priority
+    THREADPRIO_COUNT = 64,             ///< Total number of possible thread priorities.
 };
 
 enum ThreadType : u32 {
@@ -129,23 +129,24 @@ public:
     using WakeupCallback =
         std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                            std::shared_ptr<SynchronizationObject> object, std::size_t index)>;
+    using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
 
-   /**
-    * Creates and returns a new thread. The new thread is immediately scheduled
-    * @param system The instance of the whole system
-    * @param name The friendly name desired for the thread
-    * @param entry_point The address at which the thread should start execution
-    * @param priority The thread's priority
-    * @param arg User data to pass to the thread
-    * @param processor_id The ID(s) of the processors on which the thread is desired to be run
-    * @param stack_top The address of the thread's stack top
-    * @param owner_process The parent process for the thread, if null, it's a kernel thread
-    * @return A shared pointer to the newly created thread
-    */
-   static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, std::string name,
-                                                    VAddr entry_point, u32 priority, u64 arg,
-                                                    s32 processor_id, VAddr stack_top,
-                                                    Process* owner_process);
+    /**
+     * Creates and returns a new thread. The new thread is immediately scheduled
+     * @param system The instance of the whole system
+     * @param name The friendly name desired for the thread
+     * @param entry_point The address at which the thread should start execution
+     * @param priority The thread's priority
+     * @param arg User data to pass to the thread
+     * @param processor_id The ID(s) of the processors on which the thread is desired to be run
+     * @param stack_top The address of the thread's stack top
+     * @param owner_process The parent process for the thread, if null, it's a kernel thread
+     * @return A shared pointer to the newly created thread
+     */
+    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
+                                                     std::string name, VAddr entry_point,
+                                                     u32 priority, u64 arg, s32 processor_id,
+                                                     VAddr stack_top, Process* owner_process);
 
     /**
      * Creates and returns a new thread. The new thread is immediately scheduled
@@ -161,10 +162,10 @@ public:
      * @param thread_start_parameter The parameter which will passed to host context on init
      * @return A shared pointer to the newly created thread
      */
-    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, std::string name,
-                                                     VAddr entry_point, u32 priority, u64 arg,
-                                                     s32 processor_id, VAddr stack_top,
-                                                     Process* owner_process,
+    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
+                                                     std::string name, VAddr entry_point,
+                                                     u32 priority, u64 arg, s32 processor_id,
+                                                     VAddr stack_top, Process* owner_process,
                                                      std::function<void(void*)>&& thread_start_func,
                                                      void* thread_start_parameter);
 
@@ -447,17 +448,37 @@ public:
     }
 
     bool HasWakeupCallback() const {
-        return wakeup_callback != nullptr;
+        return hle_callback != nullptr;
+    }
+
+    bool HasHLECallback() const {
+        return hle_callback != nullptr;
     }
 
     void SetWakeupCallback(WakeupCallback callback) {
-        wakeup_callback = std::move(callback);
+        hle_callback = std::move(callback);
+    }
+
+    void SetHLECallback(WakeupCallback callback) {
+        hle_callback = std::move(callback);
+    }
+
+    void SetHLETimeEvent(Handle time_event) {
+        hle_time_event = time_event;
+    }
+
+    Handle GetHLETimeEvent() const {
+        return hle_time_event;
     }
 
     void InvalidateWakeupCallback() {
         SetWakeupCallback(nullptr);
     }
 
+    void InvalidateHLECallback() {
+        SetHLECallback(nullptr);
+    }
+
     /**
      * Invokes the thread's wakeup callback.
      *
@@ -466,6 +487,8 @@ public:
      */
     bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                               std::shared_ptr<SynchronizationObject> object, std::size_t index);
+    bool InvokeHLECallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                           std::shared_ptr<SynchronizationObject> object, std::size_t index);
 
     u32 GetIdealCore() const {
         return ideal_core;
@@ -600,7 +623,8 @@ private:
     /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
     /// was waiting via WaitSynchronization then the object will be the last object that became
     /// available. In case of a timeout, the object will be nullptr.
-    WakeupCallback wakeup_callback;
+    WakeupCallback hle_callback;
+    Handle hle_time_event;
 
     Scheduler* scheduler = nullptr;
 

From 1400998bfa0645afa238eed233e439244c59c859 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 10:44:21 -0400
Subject: [PATCH 044/122] CPU_Manager: Reconfigre guest threads for dynamrmic
 downsides

---
 src/core/cpu_manager.cpp              | 3 ++-
 src/core/hle/kernel/physical_core.cpp | 3 +++
 src/core/hle/kernel/physical_core.h   | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 6032cb0bf1..241971ff38 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -80,9 +80,10 @@ void CpuManager::RunGuestThread() {
     }
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
-        if (!physical_core.IsInterrupted()) {
+        while (!physical_core.IsInterrupted()) {
             physical_core.Run();
         }
+        physical_core.ClearExclusive();
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
     }
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 69202540be..ff14fcb424 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -45,6 +45,9 @@ PhysicalCore::~PhysicalCore() = default;
 
 void PhysicalCore::Run() {
     arm_interface->Run();
+}
+
+void PhysicalCore::ClearExclusive() {
     arm_interface->ClearExclusiveState();
 }
 
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index c3da30b721..cd2e42fc3c 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -38,6 +38,8 @@ public:
 
     /// Execute current jit state
     void Run();
+    /// Clear Exclusive state.
+    void ClearExclusive();
     /// Set this core in IdleState.
     void Idle();
     /// Execute a single instruction in current jit.

From 9aa352dafae137d9a40a2334d1de6e056dfe0c07 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 18:55:11 -0400
Subject: [PATCH 045/122] SVC: Cleanup old methods.

---
 src/core/hle/kernel/svc.cpp | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 768d72b92b..8634d3feb9 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -622,7 +622,6 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
 
         // Kill the current thread
         current_thread->Stop();
-        system.PrepareReschedule();
     }
 }
 
@@ -1004,6 +1003,7 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
 /// Sets the thread activity
 static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
+    UNIMPLEMENTED();
     if (activity > static_cast<u32>(ThreadActivity::Paused)) {
         return ERR_INVALID_ENUM_VALUE;
     }
@@ -1032,7 +1032,6 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
 
     thread->SetActivity(static_cast<ThreadActivity>(activity));
 
-    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1385,6 +1384,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
 /// Exits the current process
 static void ExitProcess(Core::System& system) {
     auto* current_process = system.Kernel().CurrentProcess();
+    UNIMPLEMENTED();
 
     LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
     ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1394,8 +1394,6 @@ static void ExitProcess(Core::System& system) {
 
     // Kill the current thread
     system.CurrentScheduler().GetCurrentThread()->Stop();
-
-    system.PrepareReschedule();
 }
 
 /// Creates a new thread
@@ -1458,8 +1456,6 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
     thread->SetName(
         fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
 
-    system.PrepareReschedule(thread->GetProcessorID());
-
     return RESULT_SUCCESS;
 }
 
@@ -1545,6 +1541,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
         return ERR_INVALID_ADDRESS;
     }
 
+    UNIMPLEMENTED();
+
     ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
 
     auto* const current_process = system.Kernel().CurrentProcess();
@@ -1569,7 +1567,6 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    system.PrepareReschedule(current_thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1580,6 +1577,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
 
     ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
 
+    UNIMPLEMENTED();
+
     // Retrieve a list of all threads that are waiting for this condition variable.
     auto* const current_process = system.Kernel().CurrentProcess();
     std::vector<std::shared_ptr<Thread>> waiting_threads =
@@ -1634,7 +1633,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
             thread->SetMutexWaitAddress(0);
             thread->SetWaitHandle(0);
             thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
-            system.PrepareReschedule(thread->GetProcessorID());
         } else {
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
@@ -1646,7 +1644,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
             thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
-            system.PrepareReschedule(thread->GetProcessorID());
         }
     }
 }
@@ -1661,6 +1658,7 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
     LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
               type, value, timeout);
 
+    UNIMPLEMENTED();
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Core::Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
@@ -1677,9 +1675,6 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
     auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
     const ResultCode result =
         address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
-    if (result == RESULT_SUCCESS) {
-        system.PrepareReschedule();
-    }
     return result;
 }
 
@@ -1689,6 +1684,8 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
     LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
               address, type, value, num_to_wake);
 
+    UNIMPLEMENTED();
+
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Core::Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
@@ -1945,7 +1942,6 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
     }
 
     writable_event->Signal();
-    system.PrepareReschedule();
     return RESULT_SUCCESS;
 }
 

From 90f63c0f87c5d797368edd282a1ff73a6903de23 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 26 Feb 2020 22:26:53 -0400
Subject: [PATCH 046/122] SVC: Correct WaitSynchronization, WaitProcessWideKey,
 SignalProcessWideKey.

---
 src/core/hle/kernel/process.cpp               |  1 -
 src/core/hle/kernel/scheduler.cpp             |  2 +-
 src/core/hle/kernel/svc.cpp                   | 75 ++++++++++++-------
 src/core/hle/kernel/synchronization.cpp       |  8 +-
 .../hle/kernel/synchronization_object.cpp     |  4 +
 src/core/hle/kernel/synchronization_object.h  |  2 +
 src/core/hle/kernel/thread.cpp                |  2 +-
 src/core/hle/kernel/time_manager.cpp          | 18 ++++-
 src/core/hle/kernel/time_manager.h            |  5 ++
 9 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 174713a5a6..9123a5ee6d 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -185,7 +185,6 @@ void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {
         }
         ++it;
     }
-    UNREACHABLE();
 }
 
 std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads(
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 0e85ee69ed..758fa81881 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -632,7 +632,7 @@ void Scheduler::SwitchContext() {
             cpu_core.SaveContext(previous_thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
             previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
-
+            cpu_core.ClearExclusiveState();
         }
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
             previous_thread->SetStatus(ThreadStatus::Ready);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 8634d3feb9..a5193063b4 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1541,33 +1541,50 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
         return ERR_INVALID_ADDRESS;
     }
 
-    UNIMPLEMENTED();
-
     ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
-
+    auto& kernel = system.Kernel();
+    Handle event_handle;
+    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
     auto* const current_process = system.Kernel().CurrentProcess();
-    const auto& handle_table = current_process->GetHandleTable();
-    std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
-    ASSERT(thread);
+    {
+        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
+        const auto& handle_table = current_process->GetHandleTable();
+        std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
+        ASSERT(thread);
 
-    const auto release_result = current_process->GetMutex().Release(mutex_addr);
-    if (release_result.IsError()) {
-        return release_result;
+        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+
+        const auto release_result = current_process->GetMutex().Release(mutex_addr);
+        if (release_result.IsError()) {
+            lock.CancelSleep();
+            return release_result;
+        }
+
+        if (nano_seconds == 0) {
+            lock.CancelSleep();
+            return RESULT_TIMEOUT;
+        }
+
+        current_thread->SetCondVarWaitAddress(condition_variable_addr);
+        current_thread->SetMutexWaitAddress(mutex_addr);
+        current_thread->SetWaitHandle(thread_handle);
+        current_thread->SetStatus(ThreadStatus::WaitCondVar);
+        current_process->InsertConditionVariableThread(SharedFrom(current_thread));
     }
 
-    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
-    current_thread->SetCondVarWaitAddress(condition_variable_addr);
-    current_thread->SetMutexWaitAddress(mutex_addr);
-    current_thread->SetWaitHandle(thread_handle);
-    current_thread->SetStatus(ThreadStatus::WaitCondVar);
-    current_thread->InvalidateWakeupCallback();
-    current_process->InsertConditionVariableThread(SharedFrom(current_thread));
+    if (event_handle != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(event_handle);
+    }
 
-    current_thread->WakeAfterDelay(nano_seconds);
+    {
+        SchedulerLock lock(kernel);
 
+        current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
+    }
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    return RESULT_SUCCESS;
+    return current_thread->GetSignalingResult();
 }
 
 /// Signal process wide key
@@ -1577,10 +1594,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
 
     ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
 
-    UNIMPLEMENTED();
-
     // Retrieve a list of all threads that are waiting for this condition variable.
-    auto* const current_process = system.Kernel().CurrentProcess();
+    auto& kernel = system.Kernel();
+    SchedulerLock lock(kernel);
+    auto* const current_process = kernel.CurrentProcess();
     std::vector<std::shared_ptr<Thread>> waiting_threads =
         current_process->GetConditionVariableThreads(condition_variable_addr);
 
@@ -1589,10 +1606,18 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
     std::size_t last = waiting_threads.size();
     if (target > 0)
         last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
-
+    auto& time_manager = kernel.TimeManager();
     for (std::size_t index = 0; index < last; ++index) {
         auto& thread = waiting_threads[index];
 
+        if (thread->GetStatus() != ThreadStatus::WaitCondVar) {
+            last++;
+            last = std::min(waiting_threads.size(), last);
+            continue;
+        }
+
+        time_manager.CancelTimeEvent(thread.get());
+
         ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
 
         // liberate Cond Var Thread.
@@ -1630,17 +1655,13 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
             }
 
             thread->SetLockOwner(nullptr);
-            thread->SetMutexWaitAddress(0);
-            thread->SetWaitHandle(0);
-            thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
+            thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
         } else {
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
             const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
-            thread->InvalidateWakeupCallback();
             thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index b36e550a04..c60c5bb422 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -17,12 +17,15 @@ namespace Kernel {
 Synchronization::Synchronization(Core::System& system) : system{system} {}
 
 void Synchronization::SignalObject(SynchronizationObject& obj) const {
-    SchedulerLock lock(system.Kernel());
+    auto& kernel = system.Kernel();
+    SchedulerLock lock(kernel);
+    auto& time_manager = kernel.TimeManager();
     if (obj.IsSignaled()) {
         for (auto thread : obj.GetWaitingThreads()) {
             if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
                 thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
                 thread->ResumeFromWait();
+                time_manager.CancelTimeEvent(thread.get());
             }
         }
     }
@@ -79,6 +82,9 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
         SchedulerLock lock(kernel);
         ResultCode signaling_result = thread->GetSignalingResult();
         SynchronizationObject* signaling_object = thread->GetSignalingObject();
+        for (auto& obj : sync_objects) {
+            obj->RemoveWaitingThread(SharedFrom(thread));
+        }
         if (signaling_result == RESULT_SUCCESS) {
             const auto itr = std::find_if(
                 sync_objects.begin(), sync_objects.end(),
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
index 43f3eef185..be9e091068 100644
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -102,6 +102,10 @@ void SynchronizationObject::WakeupAllWaitingThreads() {
     }
 }
 
+void SynchronizationObject::ClearWaitingThreads() {
+    waiting_threads.clear();
+}
+
 const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
     return waiting_threads;
 }
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index 0a0d069e0e..a35544ac13 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -68,6 +68,8 @@ public:
     /// Get a const reference to the waiting threads list for debug use
     const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
 
+    void ClearWaitingThreads();
+
 protected:
     bool is_signaled{}; // Tells if this sync object is signalled;
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index fb97535a3b..a645ee3a20 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -49,12 +49,12 @@ Thread::~Thread() = default;
 void Thread::Stop() {
     SchedulerLock lock(kernel);
     // Cancel any outstanding wakeup events for this thread
+    Signal();
     Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
                                                              global_handle);
     kernel.GlobalHandleTable().Close(global_handle);
     global_handle = 0;
     SetStatus(ThreadStatus::Dead);
-    Signal();
 
     owner_process->UnregisterThread(this);
 
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 0b8f0d993b..dab5fc4c6f 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -8,15 +8,21 @@
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 
 namespace Kernel {
 
-TimeManager::TimeManager(Core::System& system) : system{system} {
+TimeManager::TimeManager(Core::System& system_) : system{system_} {
     time_manager_event_type = Core::Timing::CreateEvent(
         "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
+            SchedulerLock lock(system.Kernel());
             Handle proper_handle = static_cast<Handle>(thread_handle);
+            if (cancelled_events[proper_handle]) {
+                return;
+            }
+            event_fired[proper_handle] = true;
             std::shared_ptr<Thread> thread =
                 this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
             thread->OnWakeUp();
@@ -24,14 +30,16 @@ TimeManager::TimeManager(Core::System& system) : system{system} {
 }
 
 void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) {
+    event_handle = timetask->GetGlobalHandle();
     if (nanoseconds > 0) {
         ASSERT(timetask);
-        event_handle = timetask->GetGlobalHandle();
         const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
         system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle);
     } else {
         event_handle = InvalidHandle;
     }
+    cancelled_events[event_handle] = false;
+    event_fired[event_handle] = false;
 }
 
 void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
@@ -39,6 +47,12 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
         return;
     }
     system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle);
+    cancelled_events[event_handle] = true;
+}
+
+void TimeManager::CancelTimeEvent(Thread* time_task) {
+    Handle event_handle = time_task->GetGlobalHandle();
+    UnscheduleTimeEvent(event_handle);
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
index eaec486d1a..3080ac8383 100644
--- a/src/core/hle/kernel/time_manager.h
+++ b/src/core/hle/kernel/time_manager.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <unordered_map>
 
 #include "core/hle/kernel/object.h"
 
@@ -35,9 +36,13 @@ public:
     /// Unschedule an existing time event
     void UnscheduleTimeEvent(Handle event_handle);
 
+    void CancelTimeEvent(Thread* time_task);
+
 private:
     Core::System& system;
     std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
+    std::unordered_map<Handle, bool> cancelled_events;
+    std::unordered_map<Handle, bool> event_fired;
 };
 
 } // namespace Kernel

From 4df9cec67c1bb3c6f1c07f02810edc03881e9424 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 10:28:44 -0400
Subject: [PATCH 047/122] SVC: Add locks to the memory management.

---
 src/core/hle/kernel/svc.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index a5193063b4..279fe5888d 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -135,6 +135,7 @@ enum class ResourceLimitValueType {
 
 ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
                                           u32 resource_type, ResourceLimitValueType value_type) {
+    std::lock_guard lock{HLE::g_hle_lock};
     const auto type = static_cast<ResourceType>(resource_type);
     if (!IsValidResourceType(type)) {
         LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
@@ -162,6 +163,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
 
 /// Set the process heap to a given Size. It can both extend and shrink the heap.
 static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
 
     // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -192,6 +194,7 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s
 
 static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
                                      u32 attribute) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_DEBUG(Kernel_SVC,
               "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
               size, mask, attribute);
@@ -230,6 +233,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
 
 /// Maps a memory range into a different range.
 static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
@@ -245,6 +249,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
 
 /// Unmaps a region that was previously mapped with svcMapMemory
 static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
               src_addr, size);
 
@@ -261,6 +266,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
 /// Connect to an OS service given the port name, returns the handle to the port to out
 static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
                                      VAddr port_name_address) {
+    std::lock_guard lock{HLE::g_hle_lock};
     auto& memory = system.Memory();
 
     if (!memory.IsValidVirtualAddress(port_name_address)) {
@@ -309,6 +315,7 @@ static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
 
 /// Makes a blocking IPC call to an OS service.
 static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
+    std::lock_guard lock{HLE::g_hle_lock};
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {
@@ -639,6 +646,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre
 /// Gets system/memory information for the current process
 static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
                           u64 info_sub_id) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
               info_sub_id, handle);
 
@@ -904,6 +912,7 @@ static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_h
 
 /// Maps memory at a desired address
 static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
 
     if (!Common::Is4KBAligned(addr)) {
@@ -953,6 +962,7 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
 
 /// Unmaps memory previously mapped via MapPhysicalMemory
 static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
 
     if (!Common::Is4KBAligned(addr)) {
@@ -1129,6 +1139,7 @@ static u32 GetCurrentProcessorNumber(Core::System& system) {
 
 static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
                                   u64 size, u32 permissions) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC,
               "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
               shared_memory_handle, addr, size, permissions);
@@ -1202,6 +1213,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
 static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
                                      VAddr page_info_address, Handle process_handle,
                                      VAddr address) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle);
@@ -1782,6 +1794,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
 /// Creates a TransferMemory object
 static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
                                        u32 permissions) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
               permissions);
 
@@ -1993,6 +2006,7 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
 }
 
 static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
+    std::lock_guard lock{HLE::g_hle_lock};
     LOG_DEBUG(Kernel_SVC, "called");
 
     auto& kernel = system.Kernel();
@@ -2439,6 +2453,13 @@ MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 void Call(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
+    auto& physical_core = system.CurrentPhysicalCore();
+    if (physical_core.IsInterrupted()) {
+        auto& sched = physical_core.Scheduler();
+        sched.TryDoContextSwitch();
+    }
+    physical_core.ClearExclusive();
+
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
     if (info) {

From 6ae71376f3456fe378fcb8d38a0c200355cb1b33 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 10:47:02 -0400
Subject: [PATCH 048/122] NVFlinger: Lock race condition between CPU, Host
 Timing, VSync.

---
 src/core/hle/service/nvflinger/nvflinger.cpp | 2 ++
 src/core/hle/service/nvflinger/nvflinger.h   | 7 +++++++
 src/core/hle/service/vi/vi.cpp               | 2 ++
 3 files changed, 11 insertions(+)

diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index aaf28995db..b97f713500 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -36,10 +36,12 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
     displays.emplace_back(2, "Edid", system);
     displays.emplace_back(3, "Internal", system);
     displays.emplace_back(4, "Null", system);
+    guard = std::make_shared<std::mutex>();
 
     // Schedule the screen composition events
     composition_event =
         Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) {
+            Lock();
             Compose();
             const auto ticks = GetNextTicks();
             this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 57a21f33b6..02c081494d 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <optional>
 #include <string>
 #include <string_view>
@@ -79,6 +80,10 @@ public:
 
     s64 GetNextTicks() const;
 
+    std::unique_lock<std::mutex> Lock() {
+        return std::unique_lock{*guard};
+    }
+
 private:
     /// Finds the display identified by the specified ID.
     VI::Display* FindDisplay(u64 display_id);
@@ -108,6 +113,8 @@ private:
     /// Event that handles screen composition.
     std::shared_ptr<Core::Timing::EventType> composition_event;
 
+    std::shared_ptr<std::mutex> guard;
+
     Core::System& system;
 };
 
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 46e14c2a3f..1570920746 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -511,6 +511,7 @@ private:
         LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                   static_cast<u32>(transaction), flags);
 
+        nv_flinger->Lock();
         auto& buffer_queue = nv_flinger->FindBufferQueue(id);
 
         switch (transaction) {
@@ -550,6 +551,7 @@ private:
                     [=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                         Kernel::ThreadWakeupReason reason) {
                         // Repeat TransactParcel DequeueBuffer when a buffer is available
+                        nv_flinger->Lock();
                         auto& buffer_queue = nv_flinger->FindBufferQueue(id);
                         auto result = buffer_queue.DequeueBuffer(width, height);
                         ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");

From 15a54c844d31a8dca791d240fd9fc2900bc4f35d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 11:25:42 -0400
Subject: [PATCH 049/122] SVC: Correct races on physical core switching.

---
 src/core/cpu_manager.cpp    | 11 ++++++-----
 src/core/hle/kernel/svc.cpp |  9 ++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 241971ff38..904aacd97e 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -79,12 +79,13 @@ void CpuManager::RunGuestThread() {
         sched.OnThreadStart();
     }
     while (true) {
-        auto& physical_core = kernel.CurrentPhysicalCore();
-        while (!physical_core.IsInterrupted()) {
-            physical_core.Run();
+        auto* physical_core = &kernel.CurrentPhysicalCore();
+        while (!physical_core->IsInterrupted()) {
+            physical_core->Run();
+            physical_core = &kernel.CurrentPhysicalCore();
         }
-        physical_core.ClearExclusive();
-        auto& scheduler = physical_core.Scheduler();
+        physical_core->ClearExclusive();
+        auto& scheduler = physical_core->Scheduler();
         scheduler.TryDoContextSwitch();
     }
 }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 279fe5888d..1e6c60d78e 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2454,11 +2454,6 @@ void Call(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
     auto& physical_core = system.CurrentPhysicalCore();
-    if (physical_core.IsInterrupted()) {
-        auto& sched = physical_core.Scheduler();
-        sched.TryDoContextSwitch();
-    }
-    physical_core.ClearExclusive();
 
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
@@ -2471,6 +2466,10 @@ void Call(Core::System& system, u32 immediate) {
     } else {
         LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);
     }
+    auto& physical_core_2 = system.CurrentPhysicalCore();
+    if (physical_core.CoreIndex() != physical_core_2.CoreIndex()) {
+        physical_core.Stop();
+    }
 }
 
 } // namespace Kernel::Svc

From e9174a2c0d56ac2c0317dbe30b5d6015d89e8a5a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 27 Feb 2020 19:12:41 -0400
Subject: [PATCH 050/122] General: Add better safety for JIT use.

---
 src/core/arm/arm_interface.h      | 10 ++++++++++
 src/core/cpu_manager.cpp          | 25 +++++++++++++++++++------
 src/core/cpu_manager.h            |  2 ++
 src/core/hle/kernel/scheduler.cpp |  3 +++
 src/core/hle/kernel/svc.cpp       |  6 +++++-
 5 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 87a1c29cc9..be9f3703a6 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <vector>
+#include <mutex>
 #include "common/common_types.h"
 
 namespace Common {
@@ -164,6 +165,14 @@ public:
         std::string name;
     };
 
+    void Lock() {
+        guard.lock();
+    }
+
+    void Unlock() {
+        guard.unlock();
+    }
+
     std::vector<BacktraceEntry> GetBacktrace() const;
 
     /// fp (= r29) points to the last frame record.
@@ -178,6 +187,7 @@ protected:
     /// System context that this ARM interface is running under.
     System& system;
     CPUInterruptHandler& interrupt_handler;
+    std::mutex guard;
 };
 
 } // namespace Core
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 904aacd97e..9a261968a7 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -46,6 +46,11 @@ void CpuManager::GuestThreadFunction(void* cpu_manager_) {
     cpu_manager->RunGuestThread();
 }
 
+void CpuManager::GuestRewindFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    cpu_manager->RunGuestLoop();
+}
+
 void CpuManager::IdleThreadFunction(void* cpu_manager_) {
     CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
     cpu_manager->RunIdleThread();
@@ -78,14 +83,22 @@ void CpuManager::RunGuestThread() {
         auto& sched = kernel.CurrentScheduler();
         sched.OnThreadStart();
     }
+    RunGuestLoop();
+}
+
+void CpuManager::RunGuestLoop() {
+    auto& kernel = system.Kernel();
+    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    auto host_context = thread->GetHostContext();
+    host_context->SetRewindPoint(std::function<void(void*)>(GuestRewindFunction), this);
+    host_context.reset();
     while (true) {
-        auto* physical_core = &kernel.CurrentPhysicalCore();
-        while (!physical_core->IsInterrupted()) {
-            physical_core->Run();
-            physical_core = &kernel.CurrentPhysicalCore();
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        while (!physical_core.IsInterrupted()) {
+            physical_core.Run();
         }
-        physical_core->ClearExclusive();
-        auto& scheduler = physical_core->Scheduler();
+        physical_core.ClearExclusive();
+        auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
     }
 }
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 8103ae857d..e83ab20f9c 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -42,10 +42,12 @@ public:
 
 private:
     static void GuestThreadFunction(void* cpu_manager);
+    static void GuestRewindFunction(void* cpu_manager);
     static void IdleThreadFunction(void* cpu_manager);
     static void SuspendThreadFunction(void* cpu_manager);
 
     void RunGuestThread();
+    void RunGuestLoop();
     void RunIdleThread();
     void RunSuspendThread();
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 758fa81881..727d2e6cc1 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -581,6 +581,7 @@ void Scheduler::SwitchContextStep2() {
 
     if (new_thread) {
         new_thread->context_guard.lock();
+        cpu_core.Lock();
         ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
                    "Thread must be assigned to this core.");
         ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
@@ -601,6 +602,7 @@ void Scheduler::SwitchContextStep2() {
             cpu_core.LoadContext(new_thread->GetContext64());
             cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
             cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
         }
     } else {
         current_thread = nullptr;
@@ -639,6 +641,7 @@ void Scheduler::SwitchContext() {
         }
         previous_thread->SetIsRunning(false);
         previous_thread->context_guard.unlock();
+        cpu_core.Unlock();
     }
 
     std::shared_ptr<Common::Fiber> old_context;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 1e6c60d78e..b535593c76 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -10,6 +10,7 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/string_util.h"
@@ -2468,7 +2469,10 @@ void Call(Core::System& system, u32 immediate) {
     }
     auto& physical_core_2 = system.CurrentPhysicalCore();
     if (physical_core.CoreIndex() != physical_core_2.CoreIndex()) {
-        physical_core.Stop();
+        LOG_CRITICAL(Kernel_SVC, "Rewinding");
+        auto* thread = physical_core_2.Scheduler().GetCurrentThread();
+        auto* host_context = thread->GetHostContext().get();
+        host_context->Rewind();
     }
 }
 

From b5f150e4d332ea04957fee231093d3f335fca37b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 28 Feb 2020 09:42:06 -0400
Subject: [PATCH 051/122] General: Add Asserts

---
 src/core/hardware_properties.h    |  4 ++++
 src/core/hle/kernel/scheduler.cpp | 18 ++++++++++++++++++
 src/core/hle/kernel/scheduler.h   |  1 +
 src/core/hle/kernel/svc.cpp       |  1 +
 4 files changed, 24 insertions(+)

diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index b04e046ed3..456b41e1b1 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -42,6 +42,10 @@ struct EmuThreadHandle {
         constexpr u32 invalid_handle = 0xFFFFFFFF;
         return {invalid_handle, invalid_handle};
     }
+
+    bool IsInvalid() const {
+        return (*this) == InvalidHandle();
+    }
 };
 
 } // namespace Core
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 727d2e6cc1..d67d3c5cdd 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -44,6 +44,7 @@ void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
 }
 
 u32 GlobalScheduler::SelectThreads() {
+    ASSERT(is_locked);
     const auto update_thread = [](Thread* thread, Scheduler& sched) {
         sched.guard.lock();
         if (thread != sched.selected_thread.get()) {
@@ -136,6 +137,7 @@ u32 GlobalScheduler::SelectThreads() {
 }
 
 bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
+    ASSERT(is_locked);
     // Note: caller should use critical section, etc.
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
     const u32 priority = yielding_thread->GetPriority();
@@ -149,6 +151,7 @@ bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
 }
 
 bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
+    ASSERT(is_locked);
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
@@ -197,6 +200,7 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
 }
 
 bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
+    ASSERT(is_locked);
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
     Thread* winner = nullptr;
@@ -237,6 +241,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 }
 
 void GlobalScheduler::PreemptThreads() {
+    ASSERT(is_locked);
     for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
         const u32 priority = preemption_priorities[core_id];
 
@@ -339,33 +344,40 @@ void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
 }
 
 void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     suggested_queue[core].add(thread, priority);
 }
 
 void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     suggested_queue[core].remove(thread, priority);
 }
 
 void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
     scheduled_queue[core].add(thread, priority);
 }
 
 void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
     scheduled_queue[core].add(thread, priority, false);
 }
 
 void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     scheduled_queue[core].remove(thread, priority);
     scheduled_queue[core].add(thread, priority);
 }
 
 void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) {
+    ASSERT(is_locked);
     scheduled_queue[core].remove(thread, priority);
 }
 
 void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
+    ASSERT(is_locked);
     const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
     const s32 source_core = thread->GetProcessorID();
     if (source_core == destination_core || !schedulable) {
@@ -399,6 +411,7 @@ void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
     if (old_flags == thread->scheduling_state) {
         return;
     }
+    ASSERT(is_locked);
 
     if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
         ThreadSchedStatus::Runnable) {
@@ -434,6 +447,7 @@ void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priorit
     if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
         return;
     }
+    ASSERT(is_locked);
     if (thread->processor_id >= 0) {
         Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
     }
@@ -472,6 +486,7 @@ void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinit
         thread->current_priority >= THREADPRIO_COUNT) {
         return;
     }
+    ASSERT(is_locked);
 
     for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
         if (((old_affinity_mask >> core) & 1) != 0) {
@@ -507,10 +522,12 @@ void GlobalScheduler::Shutdown() {
 
 void GlobalScheduler::Lock() {
     Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
+    ASSERT(!current_thread.IsInvalid());
     if (current_thread == current_owner) {
         ++scope_lock;
     } else {
         inner_lock.lock();
+        is_locked = true;
         current_owner = current_thread;
         ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
         scope_lock = 1;
@@ -526,6 +543,7 @@ void GlobalScheduler::Unlock() {
     Core::EmuThreadHandle leaving_thread = current_owner;
     current_owner = Core::EmuThreadHandle::InvalidHandle();
     scope_lock = 1;
+    is_locked = false;
     inner_lock.unlock();
     EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
 }
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index f5f64338f6..f26a554f50 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -182,6 +182,7 @@ private:
     std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
 
     /// Scheduler lock mechanisms.
+    bool is_locked{};
     std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock
     std::atomic<s64> scope_lock{};
     Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index b535593c76..4c1040a3b9 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1657,6 +1657,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
                 update_val = thread->GetWaitHandle();
             }
         } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
+        monitor.ClearExclusive();
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
             ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);

From 711e59036dc3c9e7761982fceee8e2b119b70e16 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 11:04:37 -0400
Subject: [PATCH 052/122] Process: Protect TLS region and Modules.

---
 src/core/hle/kernel/process.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 9123a5ee6d..0ed1c6cc20 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -22,6 +22,7 @@
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/settings.h"
 
@@ -343,6 +344,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
 }
 
 VAddr Process::CreateTLSRegion() {
+    SchedulerLock lock(system.Kernel());
     if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
         tls_page_iter != tls_pages.cend()) {
         return *tls_page_iter->ReserveSlot();
@@ -373,6 +375,7 @@ VAddr Process::CreateTLSRegion() {
 }
 
 void Process::FreeTLSRegion(VAddr tls_address) {
+    SchedulerLock lock(system.Kernel());
     const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
     auto iter =
         std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
@@ -387,6 +390,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }
 
 void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
+    std::lock_guard lock{HLE::g_hle_lock};
     const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
                                       Memory::MemoryPermission permission) {
         page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission);

From 2daf58437db0c7b7172eb30fcc6bba6206473600 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 13:02:50 -0400
Subject: [PATCH 053/122] Core: Correct HLE Event Callbacks and other issues.

---
 src/core/hle/kernel/hle_ipc.cpp         | 19 +++++++------
 src/core/hle/kernel/svc.cpp             |  5 ++--
 src/core/hle/kernel/synchronization.cpp |  1 +
 src/core/hle/kernel/thread.cpp          | 37 ++++++++++++-------------
 src/core/hle/kernel/thread.h            | 16 ++++++-----
 5 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 5917640caa..c3d612f341 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -21,8 +21,8 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/kernel/writable_event.h"
@@ -49,14 +49,6 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
     const std::string& reason, u64 timeout, WakeupCallback&& callback,
     std::shared_ptr<WritableEvent> writable_event) {
     // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->SetHLECallback(
-        [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                    std::shared_ptr<SynchronizationObject> object,
-                                    std::size_t index) mutable -> bool {
-            callback(thread, context, reason);
-            context.WriteToOutgoingCommandBuffer(*thread);
-            return true;
-        });
 
     if (!writable_event) {
         // Create event if not provided
@@ -67,6 +59,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
     {
         Handle event_handle = InvalidHandle;
         SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
+        thread->SetHLECallback(
+            [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
+                ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
+                                                ? ThreadWakeupReason::Timeout
+                                                : ThreadWakeupReason::Signal;
+                callback(thread, context, reason);
+                context.WriteToOutgoingCommandBuffer(*thread);
+                return true;
+            });
         const auto readable_event{writable_event->GetReadableEvent()};
         writable_event->Clear();
         thread->SetStatus(ThreadStatus::WaitHLEEvent);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4c1040a3b9..9f46a17589 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -333,17 +333,16 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
         thread->SetStatus(ThreadStatus::WaitIPC);
         session->SendSyncRequest(SharedFrom(thread), system.Memory());
     }
-    ResultCode result = thread->GetSignalingResult();
     if (thread->HasHLECallback()) {
         Handle event_handle = thread->GetHLETimeEvent();
         if (event_handle != InvalidHandle) {
             auto& time_manager = system.Kernel().TimeManager();
             time_manager.UnscheduleTimeEvent(event_handle);
         }
-        thread->InvokeHLECallback(ThreadWakeupReason::Timeout, SharedFrom(thread), nullptr, 0);
+        thread->InvokeHLECallback(SharedFrom(thread));
     }
 
-    return result;
+    return RESULT_SUCCESS;
 }
 
 static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index c60c5bb422..4ee7ad93ca 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -28,6 +28,7 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const {
                 time_manager.CancelTimeEvent(thread.get());
             }
         }
+        obj.ClearWaitingThreads();
     }
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index a645ee3a20..16babe71a2 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -47,19 +47,21 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Thread::~Thread() = default;
 
 void Thread::Stop() {
-    SchedulerLock lock(kernel);
-    // Cancel any outstanding wakeup events for this thread
-    Signal();
-    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                             global_handle);
-    kernel.GlobalHandleTable().Close(global_handle);
+    {
+        SchedulerLock lock(kernel);
+        // Cancel any outstanding wakeup events for this thread
+        Signal();
+        Core::System::GetInstance().CoreTiming().UnscheduleEvent(
+            kernel.ThreadWakeupCallbackEventType(), global_handle);
+        kernel.GlobalHandleTable().Close(global_handle);
+        SetStatus(ThreadStatus::Dead);
+
+        owner_process->UnregisterThread(this);
+
+        // Mark the TLS slot in the thread's page as free.
+        owner_process->FreeTLSRegion(tls_address);
+    }
     global_handle = 0;
-    SetStatus(ThreadStatus::Dead);
-
-    owner_process->UnregisterThread(this);
-
-    // Mark the TLS slot in the thread's page as free.
-    owner_process->FreeTLSRegion(tls_address);
 }
 
 void Thread::WakeAfterDelay(s64 nanoseconds) {
@@ -112,8 +114,6 @@ void Thread::ResumeFromWait() {
         return;
     }
 
-    hle_callback = nullptr;
-
     if (activity == ThreadActivity::Paused) {
         SetStatus(ThreadStatus::Paused);
         return;
@@ -398,14 +398,13 @@ bool Thread::AllSynchronizationObjectsReady() const {
 bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                                   std::shared_ptr<SynchronizationObject> object,
                                   std::size_t index) {
-    ASSERT(hle_callback);
-    return hle_callback(reason, std::move(thread), std::move(object), index);
+    ASSERT(wakeup_callback);
+    return wakeup_callback(reason, std::move(thread), std::move(object), index);
 }
 
-bool Thread::InvokeHLECallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                               std::shared_ptr<SynchronizationObject> object, std::size_t index) {
+bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
     ASSERT(hle_callback);
-    return hle_callback(reason, std::move(thread), std::move(object), index);
+    return hle_callback(std::move(thread));
 }
 
 void Thread::SetActivity(ThreadActivity value) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 04496f96ed..c4c9d69ec9 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -448,7 +448,7 @@ public:
     }
 
     bool HasWakeupCallback() const {
-        return hle_callback != nullptr;
+        return wakeup_callback != nullptr;
     }
 
     bool HasHLECallback() const {
@@ -456,10 +456,10 @@ public:
     }
 
     void SetWakeupCallback(WakeupCallback callback) {
-        hle_callback = std::move(callback);
+        wakeup_callback = std::move(callback);
     }
 
-    void SetHLECallback(WakeupCallback callback) {
+    void SetHLECallback(HLECallback callback) {
         hle_callback = std::move(callback);
     }
 
@@ -487,8 +487,7 @@ public:
      */
     bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
                               std::shared_ptr<SynchronizationObject> object, std::size_t index);
-    bool InvokeHLECallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                           std::shared_ptr<SynchronizationObject> object, std::size_t index);
+    bool InvokeHLECallback(std::shared_ptr<Thread> thread);
 
     u32 GetIdealCore() const {
         return ideal_core;
@@ -622,8 +621,11 @@ private:
 
     /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
     /// was waiting via WaitSynchronization then the object will be the last object that became
-    /// available. In case of a timeout, the object will be nullptr.
-    WakeupCallback hle_callback;
+    /// available. In case of a timeout, the object will be nullptr. DEPRECATED
+    WakeupCallback wakeup_callback;
+
+    /// Callback for HLE Events
+    HLECallback hle_callback;
     Handle hle_time_event;
 
     Scheduler* scheduler = nullptr;

From ddc7c342a2bf5b9b76f28f3054b86cc34ed745b6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 13:37:11 -0400
Subject: [PATCH 054/122] Kernel: Correct Signal on Thread Death and Setup Sync
 Objects on Thread for Debugging

---
 src/core/hle/kernel/synchronization.cpp |  3 +++
 src/core/hle/kernel/thread.cpp          | 15 +++++++--------
 src/core/hle/kernel/thread.h            | 14 +++++++-------
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index 4ee7ad93ca..ac43a70946 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -70,6 +70,8 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
         for (auto& object : sync_objects) {
             object->AddWaitingThread(SharedFrom(thread));
         }
+
+        thread->SetSynchronizationObjects(&sync_objects);
         thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
         thread->SetStatus(ThreadStatus::WaitSynch);
     }
@@ -83,6 +85,7 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
         SchedulerLock lock(kernel);
         ResultCode signaling_result = thread->GetSignalingResult();
         SynchronizationObject* signaling_object = thread->GetSignalingObject();
+        thread->SetSynchronizationObjects(nullptr);
         for (auto& obj : sync_objects) {
             obj->RemoveWaitingThread(SharedFrom(thread));
         }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 16babe71a2..fb17518605 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -50,11 +50,11 @@ void Thread::Stop() {
     {
         SchedulerLock lock(kernel);
         // Cancel any outstanding wakeup events for this thread
-        Signal();
         Core::System::GetInstance().CoreTiming().UnscheduleEvent(
             kernel.ThreadWakeupCallbackEventType(), global_handle);
-        kernel.GlobalHandleTable().Close(global_handle);
         SetStatus(ThreadStatus::Dead);
+        Signal();
+        kernel.GlobalHandleTable().Close(global_handle);
 
         owner_process->UnregisterThread(this);
 
@@ -81,7 +81,6 @@ void Thread::CancelWakeupTimer() {
 }
 
 void Thread::ResumeFromWait() {
-    ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
     SchedulerLock lock(kernel);
     switch (status) {
     case ThreadStatus::Paused:
@@ -219,7 +218,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
     thread->affinity_mask = 1ULL << processor_id;
-    thread->wait_objects.clear();
+    thread->wait_objects = nullptr;
     thread->mutex_wait_address = 0;
     thread->condvar_wait_address = 0;
     thread->wait_handle = 0;
@@ -272,9 +271,9 @@ void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode
 }
 
 s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
-    ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
-    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
-    return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
+    ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything");
+    const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object);
+    return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);
 }
 
 VAddr Thread::GetCommandBufferAddress() const {
@@ -389,7 +388,7 @@ void Thread::UpdatePriority() {
 }
 
 bool Thread::AllSynchronizationObjectsReady() const {
-    return std::none_of(wait_objects.begin(), wait_objects.end(),
+    return std::none_of(wait_objects->begin(), wait_objects->end(),
                         [this](const std::shared_ptr<SynchronizationObject>& object) {
                             return object->ShouldWait(this);
                         });
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c4c9d69ec9..7b6d1b4ec7 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -21,7 +21,7 @@ class Fiber;
 
 namespace Core {
 class System;
-}
+} // namespace Core
 
 namespace Kernel {
 
@@ -386,18 +386,18 @@ public:
     }
 
     const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
-        return wait_objects;
+        return *wait_objects;
     }
 
-    void SetSynchronizationObjects(ThreadSynchronizationObjects objects) {
-        wait_objects = std::move(objects);
+    void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) {
+        wait_objects = objects;
     }
 
     void ClearSynchronizationObjects() {
-        for (const auto& waiting_object : wait_objects) {
+        for (const auto& waiting_object : *wait_objects) {
             waiting_object->RemoveWaitingThread(SharedFrom(this));
         }
-        wait_objects.clear();
+        wait_objects->clear();
     }
 
     /// Determines whether all the objects this thread is waiting on are ready.
@@ -595,7 +595,7 @@ private:
 
     /// Objects that the thread is waiting on, in the same order as they were
     /// passed to WaitSynchronization.
-    ThreadSynchronizationObjects wait_objects;
+    ThreadSynchronizationObjects* wait_objects;
 
     SynchronizationObject* signaling_object;
     ResultCode signaling_result{RESULT_SUCCESS};

From 8f67aa7e448ed9a89f889ba13dbc110d55b44089 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 15:50:38 -0400
Subject: [PATCH 055/122] Kernel: Corrections to Scheduling.

---
 src/core/core_timing.cpp          | 11 +++++++----
 src/core/core_timing.h            |  2 +-
 src/core/hle/kernel/scheduler.cpp | 26 +++++++++++++-------------
 src/core/hle/kernel/scheduler.h   |  2 ++
 src/core/hle/kernel/svc.cpp       |  1 -
 5 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 5a7abcfca9..c91ae99759 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -154,7 +154,7 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
     basic_lock.unlock();
 }
 
-std::optional<u64> CoreTiming::Advance() {
+std::optional<s64> CoreTiming::Advance() {
     advance_lock.lock();
     basic_lock.lock();
     global_timer = GetGlobalTimeNs().count();
@@ -170,10 +170,11 @@ std::optional<u64> CoreTiming::Advance() {
         }
 
         basic_lock.lock();
+        global_timer = GetGlobalTimeNs().count();
     }
 
     if (!event_queue.empty()) {
-        const u64 next_time = event_queue.front().time - global_timer;
+        const s64 next_time = event_queue.front().time - global_timer;
         basic_lock.unlock();
         advance_lock.unlock();
         return next_time;
@@ -191,8 +192,10 @@ void CoreTiming::ThreadLoop() {
             paused_set = false;
             const auto next_time = Advance();
             if (next_time) {
-                std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
-                event.WaitFor(next_time_ns);
+                if (*next_time > 0) {
+                    std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
+                    event.WaitFor(next_time_ns);
+                }
             } else {
                 wait_set = true;
                 event.Wait();
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index c70b605c89..032eb08aad 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -110,7 +110,7 @@ public:
     std::chrono::nanoseconds GetGlobalTimeNs() const;
 
     /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
-    std::optional<u64> Advance();
+    std::optional<s64> Advance();
 
 private:
     struct Event;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index d67d3c5cdd..da77967dd8 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -47,13 +47,13 @@ u32 GlobalScheduler::SelectThreads() {
     ASSERT(is_locked);
     const auto update_thread = [](Thread* thread, Scheduler& sched) {
         sched.guard.lock();
-        if (thread != sched.selected_thread.get()) {
+        if (thread != sched.selected_thread_set.get()) {
             if (thread == nullptr) {
                 ++sched.idle_selection_count;
             }
-            sched.selected_thread = SharedFrom(thread);
+            sched.selected_thread_set = SharedFrom(thread);
         }
-        const bool reschedule_pending = sched.selected_thread != sched.current_thread;
+        const bool reschedule_pending = sched.selected_thread_set != sched.current_thread;
         sched.is_context_switch_pending = reschedule_pending;
         std::atomic_thread_fence(std::memory_order_seq_cst);
         sched.guard.unlock();
@@ -118,6 +118,8 @@ u32 GlobalScheduler::SelectThreads() {
                                        suggested);
                         top_threads[candidate_core] = next;
                         break;
+                    } else {
+                        suggested = nullptr;
                     }
                 }
             }
@@ -590,7 +592,7 @@ void Scheduler::OnThreadStart() {
 }
 
 void Scheduler::SwitchContextStep2() {
-    Thread* previous_thread = current_thread.get();
+    Thread* previous_thread = current_thread_prev.get();
     Thread* new_thread = selected_thread.get();
 
     // Load context of new thread
@@ -606,8 +608,6 @@ void Scheduler::SwitchContextStep2() {
                    "Thread must be ready to become running.");
 
         // Cancel any outstanding wakeup events for this thread
-        current_thread = SharedFrom(new_thread);
-        new_thread->SetStatus(ThreadStatus::Running);
         new_thread->SetIsRunning(true);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
@@ -622,21 +622,21 @@ void Scheduler::SwitchContextStep2() {
             cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
             cpu_core.ClearExclusiveState();
         }
-    } else {
-        current_thread = nullptr;
-        // Note: We do not reset the current process and current page table when idling because
-        // technically we haven't changed processes, our threads are just paused.
     }
-    guard.unlock();
+
+    TryDoContextSwitch();
 }
 
 void Scheduler::SwitchContext() {
-    Thread* previous_thread = current_thread.get();
+    current_thread_prev = current_thread;
+    selected_thread = selected_thread_set;
+    Thread* previous_thread = current_thread_prev.get();
     Thread* new_thread = selected_thread.get();
+    current_thread = selected_thread;
 
     is_context_switch_pending = false;
+    guard.unlock();
     if (new_thread == previous_thread) {
-        guard.unlock();
         return;
     }
 
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index f26a554f50..f73ca777e4 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -249,6 +249,8 @@ private:
 
     std::shared_ptr<Thread> current_thread = nullptr;
     std::shared_ptr<Thread> selected_thread = nullptr;
+    std::shared_ptr<Thread> current_thread_prev = nullptr;
+    std::shared_ptr<Thread> selected_thread_set = nullptr;
     std::shared_ptr<Thread> idle_thread = nullptr;
 
     Core::System& system;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9f46a17589..5e9dd43bf0 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -316,7 +316,6 @@ static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
 
 /// Makes a blocking IPC call to an OS service.
 static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
-    std::lock_guard lock{HLE::g_hle_lock};
     const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
     std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
     if (!session) {

From c574ad5f63181098f6ab03eb9608c19311bf5e50 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 15:59:09 -0400
Subject: [PATCH 056/122] Scheduler: Correct Select Threads Step 2.

---
 src/core/hle/kernel/scheduler.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index da77967dd8..9329202c68 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -103,6 +103,7 @@ u32 GlobalScheduler::SelectThreads() {
                     TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
                     break;
                 }
+                suggested = nullptr;
                 migration_candidates[num_candidates++] = suggested_core_id;
             }
             // Step 3: Select a suggested thread from another core

From 955823442f57157b7639185ea48b98c17fd43b91 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 3 Mar 2020 17:19:44 -0400
Subject: [PATCH 057/122] SVC: Correct svcWaitForAddress and
 svcSignalToAddress.

---
 src/core/hle/kernel/address_arbiter.cpp | 230 ++++++++++++++++--------
 src/core/hle/kernel/address_arbiter.h   |   3 -
 src/core/hle/kernel/svc.cpp             |   3 -
 src/core/hle/kernel/thread.h            |   9 +
 4 files changed, 169 insertions(+), 76 deletions(-)

diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 8475b698c6..ebabde9213 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -7,11 +7,15 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
 
@@ -20,6 +24,7 @@ namespace Kernel {
 // Wake up num_to_wake (or all) threads in a vector.
 void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
                                  s32 num_to_wake) {
+    auto& time_manager = system.Kernel().TimeManager();
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
     std::size_t last = waiting_threads.size();
@@ -29,12 +34,20 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai
 
     // Signal the waiting threads.
     for (std::size_t i = 0; i < last; i++) {
+        if (waiting_threads[i]->GetStatus() != ThreadStatus::WaitArb) {
+            last++;
+            last = std::min(waiting_threads.size(), last);
+            continue;
+        }
+
+        time_manager.CancelTimeEvent(waiting_threads[i].get());
+
         ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
-        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
         RemoveThread(waiting_threads[i]);
+        waiting_threads[i]->WaitForArbitration(false);
         waiting_threads[i]->SetArbiterWaitAddress(0);
         waiting_threads[i]->ResumeFromWait();
-        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
     }
 }
 
@@ -56,6 +69,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v
 }
 
 ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
+    SchedulerLock lock(system.Kernel());
     const std::vector<std::shared_ptr<Thread>> waiting_threads =
         GetThreadsWaitingOnAddress(address);
     WakeThreads(waiting_threads, num_to_wake);
@@ -64,6 +78,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
 
 ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
                                                               s32 num_to_wake) {
+    SchedulerLock lock(system.Kernel());
     auto& memory = system.Memory();
 
     // Ensure that we can write to the address.
@@ -71,16 +86,25 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
         return ERR_INVALID_ADDRESS_STATE;
     }
 
-    if (static_cast<s32>(memory.Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
+    const std::size_t current_core = system.CurrentCoreIndex();
+    auto& monitor = system.Monitor();
+    u32 current_value;
+    do {
+        monitor.SetExclusive(current_core, address);
+        current_value = memory.Read32(address);
+
+        if (current_value != value) {
+            return ERR_INVALID_STATE;
+        }
+        current_value++;
+    } while (!monitor.ExclusiveWrite32(current_core, address, current_value));
 
-    memory.Write32(address, static_cast<u32>(value + 1));
     return SignalToAddressOnly(address, num_to_wake);
 }
 
 ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
                                                                          s32 num_to_wake) {
+    SchedulerLock lock(system.Kernel());
     auto& memory = system.Memory();
 
     // Ensure that we can write to the address.
@@ -92,29 +116,34 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
     const std::vector<std::shared_ptr<Thread>> waiting_threads =
         GetThreadsWaitingOnAddress(address);
 
-    // Determine the modified value depending on the waiting count.
+    const std::size_t current_core = system.CurrentCoreIndex();
+    auto& monitor = system.Monitor();
     s32 updated_value;
-    if (num_to_wake <= 0) {
-        if (waiting_threads.empty()) {
-            updated_value = value + 1;
-        } else {
-            updated_value = value - 1;
-        }
-    } else {
-        if (waiting_threads.empty()) {
-            updated_value = value + 1;
-        } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-            updated_value = value - 1;
-        } else {
-            updated_value = value;
-        }
-    }
+    do {
+        monitor.SetExclusive(current_core, address);
+        updated_value = memory.Read32(address);
 
-    if (static_cast<s32>(memory.Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
+        if (updated_value != value) {
+            return ERR_INVALID_STATE;
+        }
+        // Determine the modified value depending on the waiting count.
+        if (num_to_wake <= 0) {
+            if (waiting_threads.empty()) {
+                updated_value = value + 1;
+            } else {
+                updated_value = value - 1;
+            }
+        } else {
+            if (waiting_threads.empty()) {
+                updated_value = value + 1;
+            } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+                updated_value = value - 1;
+            } else {
+                updated_value = value;
+            }
+        }
+    } while (!monitor.ExclusiveWrite32(current_core, address, updated_value));
 
-    memory.Write32(address, static_cast<u32>(updated_value));
     WakeThreads(waiting_threads, num_to_wake);
     return RESULT_SUCCESS;
 }
@@ -136,60 +165,121 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s
 ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
                                                     bool should_decrement) {
     auto& memory = system.Memory();
+    auto& kernel = system.Kernel();
+    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
 
-    // Ensure that we can read the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
+    Handle event_handle = InvalidHandle;
+    {
+        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
+
+        // Ensure that we can read the address.
+        if (!memory.IsValidVirtualAddress(address)) {
+            lock.CancelSleep();
+            return ERR_INVALID_ADDRESS_STATE;
+        }
+
+        /// TODO(Blinkhawk): Check termination pending.
+
+        s32 current_value = static_cast<s32>(memory.Read32(address));
+        if (current_value >= value) {
+            lock.CancelSleep();
+            return ERR_INVALID_STATE;
+        }
+
+        s32 decrement_value;
+
+        const std::size_t current_core = system.CurrentCoreIndex();
+        auto& monitor = system.Monitor();
+        do {
+            monitor.SetExclusive(current_core, address);
+            current_value = static_cast<s32>(memory.Read32(address));
+            if (should_decrement) {
+                decrement_value = current_value - 1;
+            } else {
+                decrement_value = current_value;
+            }
+        } while (
+            !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value)));
+
+        // Short-circuit without rescheduling, if timeout is zero.
+        if (timeout == 0) {
+            lock.CancelSleep();
+            return RESULT_TIMEOUT;
+        }
+
+        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+        current_thread->SetArbiterWaitAddress(address);
+        InsertThread(SharedFrom(current_thread));
+        current_thread->SetStatus(ThreadStatus::WaitArb);
+        current_thread->WaitForArbitration(true);
     }
 
-    const s32 cur_value = static_cast<s32>(memory.Read32(address));
-    if (cur_value >= value) {
-        return ERR_INVALID_STATE;
+    if (event_handle != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(event_handle);
     }
 
-    if (should_decrement) {
-        memory.Write32(address, static_cast<u32>(cur_value - 1));
+    {
+        SchedulerLock lock(kernel);
+        if (current_thread->IsWaitingForArbitration()) {
+            RemoveThread(SharedFrom(current_thread));
+            current_thread->WaitForArbitration(false);
+        }
     }
 
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddressImpl(address, timeout);
+    return current_thread->GetSignalingResult();
 }
 
 ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
     auto& memory = system.Memory();
-
-    // Ensure that we can read the address.
-    if (!memory.IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Only wait for the address if equal.
-    if (static_cast<s32>(memory.Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
-
-    // Short-circuit without rescheduling if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddressImpl(address, timeout);
-}
-
-ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
+    auto& kernel = system.Kernel();
     Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
-    current_thread->SetArbiterWaitAddress(address);
-    InsertThread(SharedFrom(current_thread));
-    current_thread->SetStatus(ThreadStatus::WaitArb);
-    current_thread->InvalidateWakeupCallback();
-    current_thread->WakeAfterDelay(timeout);
 
-    system.PrepareReschedule(current_thread->GetProcessorID());
-    return RESULT_TIMEOUT;
+    Handle event_handle = InvalidHandle;
+    {
+        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
+
+        // Ensure that we can read the address.
+        if (!memory.IsValidVirtualAddress(address)) {
+            lock.CancelSleep();
+            return ERR_INVALID_ADDRESS_STATE;
+        }
+
+        /// TODO(Blinkhawk): Check termination pending.
+
+        s32 current_value = static_cast<s32>(memory.Read32(address));
+        if (current_value != value) {
+            lock.CancelSleep();
+            return ERR_INVALID_STATE;
+        }
+
+        // Short-circuit without rescheduling, if timeout is zero.
+        if (timeout == 0) {
+            lock.CancelSleep();
+            return RESULT_TIMEOUT;
+        }
+
+        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+        current_thread->SetArbiterWaitAddress(address);
+        InsertThread(SharedFrom(current_thread));
+        current_thread->SetStatus(ThreadStatus::WaitArb);
+        current_thread->WaitForArbitration(true);
+    }
+
+    if (event_handle != InvalidHandle) {
+        auto& time_manager = kernel.TimeManager();
+        time_manager.UnscheduleTimeEvent(event_handle);
+    }
+
+    {
+        SchedulerLock lock(kernel);
+        if (current_thread->IsWaitingForArbitration()) {
+            RemoveThread(SharedFrom(current_thread));
+            current_thread->WaitForArbitration(false);
+        }
+    }
+
+    return current_thread->GetSignalingResult();
 }
 
 void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
@@ -221,9 +311,9 @@ void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
     const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
                                    [&thread](const auto& entry) { return thread == entry; });
 
-    ASSERT(iter != thread_list.cend());
-
-    thread_list.erase(iter);
+    if (iter != thread_list.cend()) {
+        thread_list.erase(iter);
+    }
 }
 
 std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index f958eee5a9..0b05d533c7 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -73,9 +73,6 @@ private:
     /// Waits on an address if the value passed is equal to the argument value.
     ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
 
-    // Waits on the given address with a timeout in nanoseconds
-    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
-
     /// Wake up num_to_wake (or all) threads in a vector.
     void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 5e9dd43bf0..718462b2b1 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1691,7 +1691,6 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
     LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
               type, value, timeout);
 
-    UNIMPLEMENTED();
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Core::Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
@@ -1717,8 +1716,6 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
     LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
               address, type, value, num_to_wake);
 
-    UNIMPLEMENTED();
-
     // If the passed address is a kernel virtual address, return invalid memory state.
     if (Core::Memory::IsKernelVirtualAddress(address)) {
         LOG_ERROR(Kernel_SVC, "Address is a kernel virtual address, address={:016X}", address);
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 7b6d1b4ec7..e8355bbd13 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -548,6 +548,14 @@ public:
         return global_handle;
     }
 
+    bool IsWaitingForArbitration() const {
+        return waiting_for_arbitration;
+    }
+
+    void WaitForArbitration(bool set) {
+        waiting_for_arbitration = set;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
@@ -615,6 +623,7 @@ private:
 
     /// If waiting for an AddressArbiter, this is the address being waited on.
     VAddr arb_wait_address{0};
+    bool waiting_for_arbitration{};
 
     /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
     Handle global_handle = 0;

From 87a28a9b1a21c08101d0bdfb8659ca039ed5682b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 4 Mar 2020 22:46:22 -0400
Subject: [PATCH 058/122] Mutex: Correct Result writting to clear exclusivity.

---
 src/core/hle/kernel/mutex.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 18325db575..ebe3f6050e 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -9,6 +9,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/arm/exclusive_monitor.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -133,8 +134,12 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
      }
 
     auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
+    auto& monitor = system.Monitor();
+    const std::size_t current_core = system.CurrentCoreIndex();
     if (new_owner == nullptr) {
-        system.Memory().Write32(address, 0);
+        do {
+            monitor.SetExclusive(current_core, address);
+        } while (!monitor.ExclusiveWrite32(current_core, address, 0));
         return {RESULT_SUCCESS, nullptr};
     }
     // Transfer the ownership of the mutex from the previous owner to the new one.
@@ -145,9 +150,12 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
         mutex_value |= Mutex::MutexHasWaitersFlag;
     }
     new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
-    new_owner->ResumeFromWait();
     new_owner->SetLockOwner(nullptr);
-    system.Memory().Write32(address, mutex_value);
+    new_owner->ResumeFromWait();
+
+    do {
+        monitor.SetExclusive(current_core, address);
+    } while (!monitor.ExclusiveWrite32(current_core, address, mutex_value));
     return {RESULT_SUCCESS, new_owner};
 }
 

From f0c663592199bdff2ed20200b7aa7bfda90fd642 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 09:31:03 -0400
Subject: [PATCH 059/122] NVDRV: Remove frame limiting as Host Timing already
 takes care.

---
 src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 3f7b8e6704..19df0dca7c 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -41,7 +41,6 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
     system.GetPerfStats().EndGameFrame();
     system.GetPerfStats().EndSystemFrame();
     system.GPU().SwapBuffers(&framebuffer);
-    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
     system.GetPerfStats().BeginSystemFrame();
 }
 

From b1b90f792639dcacc8dd2d10f363d140e034f612 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 09:52:24 -0400
Subject: [PATCH 060/122] Scheduler: Release old thread fiber before trying to
 switch to the next thread fiber.

---
 src/core/hle/kernel/scheduler.cpp | 37 ++++++++++++++++++++++---------
 src/core/hle/kernel/scheduler.h   |  9 ++++++++
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 9329202c68..aa1f1a305a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -53,7 +53,8 @@ u32 GlobalScheduler::SelectThreads() {
             }
             sched.selected_thread_set = SharedFrom(thread);
         }
-        const bool reschedule_pending = sched.selected_thread_set != sched.current_thread;
+        const bool reschedule_pending =
+            sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread);
         sched.is_context_switch_pending = reschedule_pending;
         std::atomic_thread_fence(std::memory_order_seq_cst);
         sched.guard.unlock();
@@ -552,7 +553,9 @@ void GlobalScheduler::Unlock() {
 }
 
 Scheduler::Scheduler(Core::System& system, std::size_t core_id)
-    : system{system}, core_id{core_id} {}
+    : system(system), core_id(core_id) {
+    switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
+}
 
 Scheduler::~Scheduler() = default;
 
@@ -636,8 +639,9 @@ void Scheduler::SwitchContext() {
     current_thread = selected_thread;
 
     is_context_switch_pending = false;
-    guard.unlock();
+
     if (new_thread == previous_thread) {
+        guard.unlock();
         return;
     }
 
@@ -669,20 +673,31 @@ void Scheduler::SwitchContext() {
     } else {
         old_context = idle_thread->GetHostContext();
     }
+    guard.unlock();
 
-    std::shared_ptr<Common::Fiber> next_context;
-    if (new_thread != nullptr) {
-        next_context = new_thread->GetHostContext();
-    } else {
-        next_context = idle_thread->GetHostContext();
-    }
-
-    Common::Fiber::YieldTo(old_context, next_context);
+    Common::Fiber::YieldTo(old_context, switch_fiber);
     /// When a thread wakes up, the scheduler may have changed to other in another core.
     auto& next_scheduler = system.Kernel().CurrentScheduler();
     next_scheduler.SwitchContextStep2();
 }
 
+void Scheduler::OnSwitch(void* this_scheduler) {
+    Scheduler* sched = static_cast<Scheduler*>(this_scheduler);
+    sched->SwitchToCurrent();
+}
+
+void Scheduler::SwitchToCurrent() {
+    while (true) {
+        std::shared_ptr<Common::Fiber> next_context;
+        if (current_thread != nullptr) {
+            next_context = current_thread->GetHostContext();
+        } else {
+            next_context = idle_thread->GetHostContext();
+        }
+        Common::Fiber::YieldTo(switch_fiber, next_context);
+    }
+}
+
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     const u64 prev_switch_ticks = last_context_switch_time;
     const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index f73ca777e4..728cca8020 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -15,6 +15,10 @@
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/thread.h"
 
+namespace Common {
+    class Fiber;
+}
+
 namespace Core {
 class ARM_Interface;
 class System;
@@ -247,12 +251,17 @@ private:
      */
     void UpdateLastContextSwitchTime(Thread* thread, Process* process);
 
+    static void OnSwitch(void* this_scheduler);
+    void SwitchToCurrent();
+
     std::shared_ptr<Thread> current_thread = nullptr;
     std::shared_ptr<Thread> selected_thread = nullptr;
     std::shared_ptr<Thread> current_thread_prev = nullptr;
     std::shared_ptr<Thread> selected_thread_set = nullptr;
     std::shared_ptr<Thread> idle_thread = nullptr;
 
+    std::shared_ptr<Common::Fiber> switch_fiber = nullptr;
+
     Core::System& system;
     u64 last_context_switch_time = 0;
     u64 idle_selection_count = 0;

From 27cef6fef59cccf67ae208d7a225117510c00a58 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 14:56:05 -0400
Subject: [PATCH 061/122] Core: Correct rebase.

---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 18 ++++++------------
 src/core/hle/kernel/scheduler.cpp         | 11 +++++------
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index a081680f1d..73d4a6ae59 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -8,6 +8,7 @@
 #include <dynarmic/A32/config.h>
 #include <dynarmic/A32/context.h>
 #include "common/microprofile.h"
+#include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
@@ -71,20 +72,13 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
-        // rough approximation of the amount of executed ticks in the system, it may be thrown off
-        // if not all cores are doing a similar amount of work. Instead of doing this, we should
-        // device a way so that timing is consistent across all cores without increasing the ticks 4
-        // times.
-        u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
-        // Always execute at least one tick.
-        amortized_ticks = std::max<u64>(amortized_ticks, 1);
-
-        parent.system.CoreTiming().AddTicks(amortized_ticks);
-        num_interpreted_instructions = 0;
+        /// We are using host timing, NOP
     }
     u64 GetTicksRemaining() override {
-        return std::max(parent.system.CoreTiming().GetDowncount(), {});
+        if (!parent.interrupt_handler.IsInterrupted()) {
+            return 1000ULL;
+        }
+        return 0ULL;
     }
 
     ARM_Dynarmic_32& parent;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index aa1f1a305a..ae89e908f5 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -552,8 +552,7 @@ void GlobalScheduler::Unlock() {
     EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
 }
 
-Scheduler::Scheduler(Core::System& system, std::size_t core_id)
-    : system(system), core_id(core_id) {
+Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) {
     switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
 }
 
@@ -601,9 +600,10 @@ void Scheduler::SwitchContextStep2() {
 
     // Load context of new thread
     Process* const previous_process =
-        previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
+        previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
 
     if (new_thread) {
+        auto& cpu_core = system.ArmInterface(core_id);
         new_thread->context_guard.lock();
         cpu_core.Lock();
         ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
@@ -619,7 +619,6 @@ void Scheduler::SwitchContextStep2() {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!new_thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
             cpu_core.LoadContext(new_thread->GetContext32());
             cpu_core.LoadContext(new_thread->GetContext64());
             cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
@@ -651,12 +650,12 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
+        auto& cpu_core = system.ArmInterface(core_id);
         if (!previous_thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
             cpu_core.SaveContext(previous_thread->GetContext32());
             cpu_core.SaveContext(previous_thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
-            previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+            previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
             cpu_core.ClearExclusiveState();
         }
         if (previous_thread->GetStatus() == ThreadStatus::Running) {

From 074704806ae3d959b2b00e3f140af5cf97a14042 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 19:30:37 -0400
Subject: [PATCH 062/122] Scheduler: Correct assert.

---
 src/core/hle/kernel/scheduler.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ae89e908f5..4e2a5adf38 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -606,10 +606,8 @@ void Scheduler::SwitchContextStep2() {
         auto& cpu_core = system.ArmInterface(core_id);
         new_thread->context_guard.lock();
         cpu_core.Lock();
-        ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
-                   "Thread must be assigned to this core.");
-        ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
-                   "Thread must be ready to become running.");
+        ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
+                   "Thread must be runnable.");
 
         // Cancel any outstanding wakeup events for this thread
         new_thread->SetIsRunning(true);

From 345710cc66a993d88efbc051bcd6c6fddf4d0f05 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 20:20:36 -0400
Subject: [PATCH 063/122] Scheduler: Protect on closed threads.

---
 src/core/hle/kernel/scheduler.cpp | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 4e2a5adf38..74d3731fc6 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -604,7 +604,6 @@ void Scheduler::SwitchContextStep2() {
 
     if (new_thread) {
         auto& cpu_core = system.ArmInterface(core_id);
-        new_thread->context_guard.lock();
         cpu_core.Lock();
         ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
                    "Thread must be runnable.");
@@ -685,13 +684,24 @@ void Scheduler::OnSwitch(void* this_scheduler) {
 
 void Scheduler::SwitchToCurrent() {
     while (true) {
-        std::shared_ptr<Common::Fiber> next_context;
-        if (current_thread != nullptr) {
-            next_context = current_thread->GetHostContext();
-        } else {
-            next_context = idle_thread->GetHostContext();
+        guard.lock();
+        selected_thread = selected_thread_set;
+        current_thread = selected_thread;
+        guard.unlock();
+        while (!is_context_switch_pending) {
+            current_thread->context_guard.lock();
+            if (current_thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+                current_thread->context_guard.unlock();
+                break;
+            }
+            std::shared_ptr<Common::Fiber> next_context;
+            if (current_thread != nullptr) {
+                next_context = current_thread->GetHostContext();
+            } else {
+                next_context = idle_thread->GetHostContext();
+            }
+            Common::Fiber::YieldTo(switch_fiber, next_context);
         }
-        Common::Fiber::YieldTo(switch_fiber, next_context);
     }
 }
 

From 9a98f56580cc36dfa1f118bb83f8654ee3473b44 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 20:36:05 -0400
Subject: [PATCH 064/122] Scheduler: Fix HLE Threads on guard

---
 src/core/hle/kernel/scheduler.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 74d3731fc6..d7529360c0 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -689,10 +689,12 @@ void Scheduler::SwitchToCurrent() {
         current_thread = selected_thread;
         guard.unlock();
         while (!is_context_switch_pending) {
-            current_thread->context_guard.lock();
-            if (current_thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
-                current_thread->context_guard.unlock();
-                break;
+            if (current_thread != nullptr) {
+                current_thread->context_guard.lock();
+                if (current_thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+                    current_thread->context_guard.unlock();
+                    break;
+                }
             }
             std::shared_ptr<Common::Fiber> next_context;
             if (current_thread != nullptr) {

From 940fb591b839a2901a38cf90e5b99112733eec75 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 6 Mar 2020 22:58:56 -0400
Subject: [PATCH 065/122] Scheduler: Correct locking for hle threads.

---
 src/core/hle/kernel/scheduler.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index d7529360c0..f020438fb5 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -687,9 +687,10 @@ void Scheduler::SwitchToCurrent() {
         guard.lock();
         selected_thread = selected_thread_set;
         current_thread = selected_thread;
+        is_context_switch_pending = false;
         guard.unlock();
         while (!is_context_switch_pending) {
-            if (current_thread != nullptr) {
+            if (current_thread != nullptr && !current_thread->IsHLEThread()) {
                 current_thread->context_guard.lock();
                 if (current_thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
                     current_thread->context_guard.unlock();

From adede5b07ec57b326ded09e96fe10049894d7443 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 10:24:46 -0400
Subject: [PATCH 066/122] SCC: Small corrections to CancelSynchronization

---
 src/core/hle/kernel/synchronization.cpp | 2 ++
 src/core/hle/kernel/thread.cpp          | 5 +++--
 src/core/hle/kernel/thread.h            | 9 +++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index ac43a70946..a7e3fbe92f 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -74,7 +74,9 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
         thread->SetSynchronizationObjects(&sync_objects);
         thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
         thread->SetStatus(ThreadStatus::WaitSynch);
+        thread->SetWaitingSync(true);
     }
+    thread->SetWaitingSync(false);
 
     if (event_handle != InvalidHandle) {
         auto& time_manager = kernel.TimeManager();
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index fb17518605..e8962a0d8f 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -139,13 +139,14 @@ ResultCode Thread::Start() {
 
 void Thread::CancelWait() {
     SchedulerLock lock(kernel);
-    if (GetSchedulingStatus() != ThreadSchedStatus::Paused) {
+    if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
         is_sync_cancelled = true;
         return;
     }
+    //TODO(Blinkhawk): Implement cancel of server session
     is_sync_cancelled = false;
     SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
-    ResumeFromWait();
+    SetStatus(ThreadStatus::Ready);
 }
 
 static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index e8355bbd13..d8a983200c 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -556,6 +556,14 @@ public:
         waiting_for_arbitration = set;
     }
 
+    bool IsWaitingSync() const {
+        return is_waiting_on_sync;
+    }
+
+    void SetWaitingSync(bool is_waiting) {
+        is_waiting_on_sync = is_waiting;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
@@ -650,6 +658,7 @@ private:
 
     u32 scheduling_state = 0;
     bool is_running = false;
+    bool is_waiting_on_sync = false;
     bool is_sync_cancelled = false;
 
     bool will_be_terminated{};

From 599ef27cf0a221e39e58cdf126a04e5b55517e63 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 12:44:35 -0400
Subject: [PATCH 067/122] SVC: Correct SetThreadActivity.

---
 src/core/hle/kernel/scheduler.cpp |  9 ++---
 src/core/hle/kernel/svc.cpp       |  5 +--
 src/core/hle/kernel/thread.cpp    | 61 ++++++++++++++++++++-----------
 src/core/hle/kernel/thread.h      | 22 +++++++----
 4 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index f020438fb5..a37b992ec1 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -417,8 +417,7 @@ void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
     }
     ASSERT(is_locked);
 
-    if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
-        ThreadSchedStatus::Runnable) {
+    if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) {
         // In this case the thread was running, now it's pausing/exitting
         if (thread->processor_id >= 0) {
             Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
@@ -430,7 +429,7 @@ void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
                 Unsuggest(thread->current_priority, core, thread);
             }
         }
-    } else if (thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
+    } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
         // The thread is now set to running from being stopped
         if (thread->processor_id >= 0) {
             Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
@@ -448,7 +447,7 @@ void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
 }
 
 void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
-    if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) {
         return;
     }
     ASSERT(is_locked);
@@ -486,7 +485,7 @@ void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priorit
 
 void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
                                                  s32 old_core) {
-    if (thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
+    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) ||
         thread->current_priority >= THREADPRIO_COUNT) {
         return;
     }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 718462b2b1..da2f90a1d1 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1012,7 +1012,6 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
 /// Sets the thread activity
 static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
     LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
-    UNIMPLEMENTED();
     if (activity > static_cast<u32>(ThreadActivity::Paused)) {
         return ERR_INVALID_ENUM_VALUE;
     }
@@ -1039,9 +1038,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
         return ERR_BUSY;
     }
 
-    thread->SetActivity(static_cast<ThreadActivity>(activity));
-
-    return RESULT_SUCCESS;
+    return thread->SetActivity(static_cast<ThreadActivity>(activity));
 }
 
 /// Gets the thread context
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index e8962a0d8f..b99e3b7a55 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -113,20 +113,11 @@ void Thread::ResumeFromWait() {
         return;
     }
 
-    if (activity == ThreadActivity::Paused) {
-        SetStatus(ThreadStatus::Paused);
-        return;
-    }
-
     SetStatus(ThreadStatus::Ready);
 }
 
 void Thread::OnWakeUp() {
     SchedulerLock lock(kernel);
-    if (activity == ThreadActivity::Paused) {
-        SetStatus(ThreadStatus::Paused);
-        return;
-    }
 
     SetStatus(ThreadStatus::Ready);
 }
@@ -143,7 +134,7 @@ void Thread::CancelWait() {
         is_sync_cancelled = true;
         return;
     }
-    //TODO(Blinkhawk): Implement cancel of server session
+    // TODO(Blinkhawk): Implement cancel of server session
     is_sync_cancelled = false;
     SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
     SetStatus(ThreadStatus::Ready);
@@ -407,19 +398,31 @@ bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
     return hle_callback(std::move(thread));
 }
 
-void Thread::SetActivity(ThreadActivity value) {
-    activity = value;
+ResultCode Thread::SetActivity(ThreadActivity value) {
+    SchedulerLock lock(kernel);
+
+    auto sched_status = GetSchedulingStatus();
+
+    if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (IsPendingTermination()) {
+        return RESULT_SUCCESS;
+    }
 
     if (value == ThreadActivity::Paused) {
-        // Set status if not waiting
-        if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
-            SetStatus(ThreadStatus::Paused);
-            kernel.PrepareReschedule(processor_id);
+        if (pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag) != 0) {
+            return ERR_INVALID_STATE;
         }
-    } else if (status == ThreadStatus::Paused) {
-        // Ready to reschedule
-        ResumeFromWait();
+        AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
+    } else {
+        if (pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag) == 0) {
+            return ERR_INVALID_STATE;
+        }
+        RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
     }
+    return RESULT_SUCCESS;
 }
 
 ResultCode Thread::Sleep(s64 nanoseconds) {
@@ -460,11 +463,27 @@ ResultCode Thread::YieldAndWaitForLoadBalancing() {
     return RESULT_SUCCESS;
 }
 
+void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
+    const u32 old_state = scheduling_state;
+    pausing_state |= static_cast<u32>(flag);
+    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
+    scheduling_state = base_scheduling | pausing_state;
+    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
+}
+
+void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
+    const u32 old_state = scheduling_state;
+    pausing_state &= ~static_cast<u32>(flag);
+    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
+    scheduling_state = base_scheduling | pausing_state;
+    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
+}
+
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
-    const u32 old_flags = scheduling_state;
+    const u32 old_state = scheduling_state;
     scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
                        static_cast<u32>(new_status);
-    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_flags);
+    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
 }
 
 void Thread::SetCurrentPriority(u32 new_priority) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index d8a983200c..0a8f7bb654 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -497,11 +497,7 @@ public:
         return affinity_mask;
     }
 
-    ThreadActivity GetActivity() const {
-        return activity;
-    }
-
-    void SetActivity(ThreadActivity value);
+    ResultCode SetActivity(ThreadActivity value);
 
     /// Sleeps this thread for the given amount of nanoseconds.
     ResultCode Sleep(s64 nanoseconds);
@@ -564,11 +560,22 @@ public:
         is_waiting_on_sync = is_waiting;
     }
 
+    bool IsPendingTermination() const {
+        return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited;
+    }
+
+    bool IsPaused() const {
+        return pausing_state != 0;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
 
     void SetSchedulingStatus(ThreadSchedStatus new_status);
+    void AddSchedulingFlag(ThreadSchedFlags flag);
+    void RemoveSchedulingFlag(ThreadSchedFlags flag);
+
     void SetCurrentPriority(u32 new_priority);
 
     void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
@@ -650,18 +657,17 @@ private:
     u32 ideal_core{0xFFFFFFFF};
     u64 affinity_mask{0x1};
 
-    ThreadActivity activity = ThreadActivity::Normal;
-
     s32 ideal_core_override = -1;
     u64 affinity_mask_override = 0x1;
     u32 affinity_override_count = 0;
 
     u32 scheduling_state = 0;
+    u32 pausing_state = 0;
     bool is_running = false;
     bool is_waiting_on_sync = false;
     bool is_sync_cancelled = false;
 
-    bool will_be_terminated{};
+    bool will_be_terminated = false;
 
     std::string name;
 };

From 97631eb155a52a8e1e983d0d6ec8b164a14d777b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 13:07:04 -0400
Subject: [PATCH 068/122] Yuzu/Debuggers: Correct Wait Tree for Paused threads.

---
 src/yuzu/debugger/wait_tree.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 765908c5a7..ab7b18abec 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -208,7 +208,11 @@ QString WaitTreeThread::GetText() const {
         status = tr("running");
         break;
     case Kernel::ThreadStatus::Ready:
-        status = tr("ready");
+        if (!thread.IsPaused()) {
+            status = tr("ready");
+        } else {
+            status = tr("paused");
+        }
         break;
     case Kernel::ThreadStatus::Paused:
         status = tr("paused");
@@ -256,7 +260,11 @@ QColor WaitTreeThread::GetColor() const {
     case Kernel::ThreadStatus::Running:
         return QColor(Qt::GlobalColor::darkGreen);
     case Kernel::ThreadStatus::Ready:
-        return QColor(Qt::GlobalColor::darkBlue);
+        if (!thread.IsPaused()) {
+            return QColor(Qt::GlobalColor::darkBlue);
+        } else {
+            return QColor(Qt::GlobalColor::lightGray);
+        }
     case Kernel::ThreadStatus::Paused:
         return QColor(Qt::GlobalColor::lightGray);
     case Kernel::ThreadStatus::WaitHLEEvent:

From 9818e76bf369841cbad31809f89343280d5d7562 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 13:27:27 -0400
Subject: [PATCH 069/122] Scheduler: Remove arm_interface lock and a few
 corrections.

---
 src/core/arm/arm_interface.h      | 10 ----------
 src/core/hle/kernel/scheduler.cpp | 10 +++-------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index be9f3703a6..87a1c29cc9 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <vector>
-#include <mutex>
 #include "common/common_types.h"
 
 namespace Common {
@@ -165,14 +164,6 @@ public:
         std::string name;
     };
 
-    void Lock() {
-        guard.lock();
-    }
-
-    void Unlock() {
-        guard.unlock();
-    }
-
     std::vector<BacktraceEntry> GetBacktrace() const;
 
     /// fp (= r29) points to the last frame record.
@@ -187,7 +178,6 @@ protected:
     /// System context that this ARM interface is running under.
     System& system;
     CPUInterruptHandler& interrupt_handler;
-    std::mutex guard;
 };
 
 } // namespace Core
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index a37b992ec1..affc2fbedd 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -463,9 +463,7 @@ void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priorit
     }
 
     if (thread->processor_id >= 0) {
-        // TODO(Blinkhawk): compare it with current thread running on current core, instead of
-        // checking running
-        if (thread->IsRunning()) {
+        if (thread == kernel.CurrentScheduler().GetCurrentThread()) {
             SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
                             thread);
         } else {
@@ -602,8 +600,6 @@ void Scheduler::SwitchContextStep2() {
         previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
 
     if (new_thread) {
-        auto& cpu_core = system.ArmInterface(core_id);
-        cpu_core.Lock();
         ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
                    "Thread must be runnable.");
 
@@ -615,6 +611,7 @@ void Scheduler::SwitchContextStep2() {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!new_thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
             cpu_core.LoadContext(new_thread->GetContext32());
             cpu_core.LoadContext(new_thread->GetContext64());
             cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
@@ -646,8 +643,8 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
-        auto& cpu_core = system.ArmInterface(core_id);
         if (!previous_thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
             cpu_core.SaveContext(previous_thread->GetContext32());
             cpu_core.SaveContext(previous_thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
@@ -659,7 +656,6 @@ void Scheduler::SwitchContext() {
         }
         previous_thread->SetIsRunning(false);
         previous_thread->context_guard.unlock();
-        cpu_core.Unlock();
     }
 
     std::shared_ptr<Common::Fiber> old_context;

From 287204164793c345716393a44802e088d04574b4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 13:37:47 -0400
Subject: [PATCH 070/122] SVC: WaitSynchronization add Termination Pending
 Result.

---
 src/core/hle/kernel/errors.h            | 1 +
 src/core/hle/kernel/synchronization.cpp | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index 29bfa3621f..d4e5d88cff 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -12,6 +12,7 @@ namespace Kernel {
 
 constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
+constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
 constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index a7e3fbe92f..4323fc1208 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -59,7 +59,10 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
             return {RESULT_TIMEOUT, InvalidHandle};
         }
 
-        /// TODO(Blinkhawk): Check for termination pending
+        if (thread->IsPendingTermination()) {
+            lock.CancelSleep();
+            return {ERR_THREAD_TERMINATING, InvalidHandle};
+        }
 
         if (thread->IsSyncCancelled()) {
             thread->SetSyncCancelled(false);

From ded21533ae37edc382dd3276effd400c529044ea Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 18:59:42 -0400
Subject: [PATCH 071/122] ARM/Memory: Correct Exclusive Monitor and Implement
 Exclusive Memory Writes.

---
 src/common/CMakeLists.txt                 |  2 +
 src/common/atomic_ops.cpp                 | 70 ++++++++++++++++
 src/common/atomic_ops.h                   | 17 ++++
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 66 ++++++++++++---
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  6 +-
 src/core/arm/exclusive_monitor.h          |  6 +-
 src/core/hle/kernel/address_arbiter.cpp   |  6 +-
 src/core/hle/kernel/mutex.cpp             |  5 +-
 src/core/hle/kernel/svc.cpp               |  2 +-
 src/core/hle/kernel/thread.cpp            |  6 +-
 src/core/memory.cpp                       | 98 +++++++++++++++++++++++
 src/core/memory.h                         | 65 +++++++++++++++
 12 files changed, 325 insertions(+), 24 deletions(-)
 create mode 100644 src/common/atomic_ops.cpp
 create mode 100644 src/common/atomic_ops.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index f502354e54..f692cb4e9b 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -96,6 +96,8 @@ add_library(common STATIC
     algorithm.h
     alignment.h
     assert.h
+    atomic_ops.cpp
+    atomic_ops.h
     detached_tasks.cpp
     detached_tasks.h
     bit_field.h
diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
new file mode 100644
index 0000000000..65cdfb4fd5
--- /dev/null
+++ b/src/common/atomic_ops.cpp
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/atomic_ops.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace Common {
+
+#if _MSC_VER
+
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
+    u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected);
+    return result == expected;
+}
+
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
+    u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected);
+    return result == expected;
+}
+
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
+    u32 result = _InterlockedCompareExchange((long*)pointer, value, expected);
+    return result == expected;
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
+    u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected);
+    return result == expected;
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
+    return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], (__int64*)expected.data()) != 0;
+}
+
+
+#else
+
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
+    return __sync_bool_compare_and_swap (pointer, value, expected);
+}
+
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
+    return __sync_bool_compare_and_swap (pointer, value, expected);
+}
+
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
+    return __sync_bool_compare_and_swap (pointer, value, expected);
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
+    return __sync_bool_compare_and_swap (pointer, value, expected);
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
+    unsigned __int128 value_a;
+    unsigned __int128 expected_a;
+    std::memcpy(&value_a, value.data(), sizeof(u128));
+    std::memcpy(&expected_a, expected.data(), sizeof(u128));
+    return __sync_bool_compare_and_swap ((unsigned __int128*)pointer, value_a, expected_a);
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
new file mode 100644
index 0000000000..22cb3a402b
--- /dev/null
+++ b/src/common/atomic_ops.h
@@ -0,0 +1,17 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Common {
+
+bool AtomicCompareAndSwap(u8 volatile * pointer, u8 value, u8 expected);
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected);
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected);
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected);
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected);
+
+} // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 2d0a9b6f09..f8db526ed6 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -66,6 +66,22 @@ public:
         memory.Write64(vaddr + 8, value[1]);
     }
 
+    bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
+        return parent.system.Memory().WriteExclusive8(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
+        return parent.system.Memory().WriteExclusive16(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
+        return parent.system.Memory().WriteExclusive32(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
+        return parent.system.Memory().WriteExclusive64(vaddr, value, expected);
+    }
+    bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
+        return parent.system.Memory().WriteExclusive128(vaddr, value, expected);
+    }
+
     void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
         LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
                  num_instructions, MemoryReadCode(pc));
@@ -284,9 +300,29 @@ DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::
 
 DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
 
-void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
-    // Size doesn't actually matter.
-    monitor.Mark(core_index, addr, 16);
+void DynarmicExclusiveMonitor::SetExclusive8(std::size_t core_index, VAddr addr) {
+    monitor.Mark<u8>(core_index, addr, 1, [&]() -> u8 { return memory.Read8(addr); });
+}
+
+void DynarmicExclusiveMonitor::SetExclusive16(std::size_t core_index, VAddr addr) {
+    monitor.Mark<u16>(core_index, addr, 2, [&]() -> u16 { return memory.Read16(addr); });
+}
+
+void DynarmicExclusiveMonitor::SetExclusive32(std::size_t core_index, VAddr addr) {
+    monitor.Mark<u32>(core_index, addr, 4, [&]() -> u32 { return memory.Read32(addr); });
+}
+
+void DynarmicExclusiveMonitor::SetExclusive64(std::size_t core_index, VAddr addr) {
+    monitor.Mark<u64>(core_index, addr, 8, [&]() -> u64 { return memory.Read64(addr); });
+}
+
+void DynarmicExclusiveMonitor::SetExclusive128(std::size_t core_index, VAddr addr) {
+    monitor.Mark<u128>(core_index, addr, 16, [&]() -> u128 {
+        u128 result;
+        result[0] = memory.Read64(addr);
+        result[1] = memory.Read64(addr + 8);
+        return result;
+    });
 }
 
 void DynarmicExclusiveMonitor::ClearExclusive() {
@@ -294,28 +330,32 @@ void DynarmicExclusiveMonitor::ClearExclusive() {
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); });
+    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, 1, [&](u8 expected) -> bool {
+        return memory.WriteExclusive8(vaddr, value, expected);
+    });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 2,
-                                        [&] { memory.Write16(vaddr, value); });
+    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, 2, [&](u16 expected) -> bool {
+        return memory.WriteExclusive16(vaddr, value, expected);
+    });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 4,
-                                        [&] { memory.Write32(vaddr, value); });
+    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, 4, [&](u32 expected) -> bool {
+        return memory.WriteExclusive32(vaddr, value, expected);
+    });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 8,
-                                        [&] { memory.Write64(vaddr, value); });
+    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, 8, [&](u64 expected) -> bool {
+        return memory.WriteExclusive64(vaddr, value, expected);
+    });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
-        memory.Write64(vaddr + 0, value[0]);
-        memory.Write64(vaddr + 8, value[1]);
+    return monitor.DoExclusiveOperation<u128>(core_index, vaddr, 16, [&](u128 expected) -> bool {
+        return memory.WriteExclusive128(vaddr, value, expected);
     });
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 9e94b58c2d..3ead59f166 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -82,7 +82,11 @@ public:
     explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
     ~DynarmicExclusiveMonitor() override;
 
-    void SetExclusive(std::size_t core_index, VAddr addr) override;
+    void SetExclusive8(std::size_t core_index, VAddr addr) override;
+    void SetExclusive16(std::size_t core_index, VAddr addr) override;
+    void SetExclusive32(std::size_t core_index, VAddr addr) override;
+    void SetExclusive64(std::size_t core_index, VAddr addr) override;
+    void SetExclusive128(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
 
     bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index ccd73b80ff..2ee312eeea 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -18,7 +18,11 @@ class ExclusiveMonitor {
 public:
     virtual ~ExclusiveMonitor();
 
-    virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive8(std::size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive16(std::size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive32(std::size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive64(std::size_t core_index, VAddr addr) = 0;
+    virtual void SetExclusive128(std::size_t core_index, VAddr addr) = 0;
     virtual void ClearExclusive() = 0;
 
     virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index ebabde9213..07acabc1d5 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -90,7 +90,7 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
     auto& monitor = system.Monitor();
     u32 current_value;
     do {
-        monitor.SetExclusive(current_core, address);
+        monitor.SetExclusive32(current_core, address);
         current_value = memory.Read32(address);
 
         if (current_value != value) {
@@ -120,7 +120,7 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
     auto& monitor = system.Monitor();
     s32 updated_value;
     do {
-        monitor.SetExclusive(current_core, address);
+        monitor.SetExclusive32(current_core, address);
         updated_value = memory.Read32(address);
 
         if (updated_value != value) {
@@ -191,7 +191,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
         const std::size_t current_core = system.CurrentCoreIndex();
         auto& monitor = system.Monitor();
         do {
-            monitor.SetExclusive(current_core, address);
+            monitor.SetExclusive32(current_core, address);
             current_value = static_cast<s32>(memory.Read32(address));
             if (should_decrement) {
                 decrement_value = current_value - 1;
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index ebe3f6050e..16c95782a3 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -10,6 +10,7 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -138,7 +139,7 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
     const std::size_t current_core = system.CurrentCoreIndex();
     if (new_owner == nullptr) {
         do {
-            monitor.SetExclusive(current_core, address);
+            monitor.SetExclusive32(current_core, address);
         } while (!monitor.ExclusiveWrite32(current_core, address, 0));
         return {RESULT_SUCCESS, nullptr};
     }
@@ -154,7 +155,7 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
     new_owner->ResumeFromWait();
 
     do {
-        monitor.SetExclusive(current_core, address);
+        monitor.SetExclusive32(current_core, address);
     } while (!monitor.ExclusiveWrite32(current_core, address, mutex_value));
     return {RESULT_SUCCESS, new_owner};
 }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index da2f90a1d1..371beed0d7 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1641,7 +1641,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
         u32 update_val = 0;
         const VAddr mutex_address = thread->GetMutexWaitAddress();
         do {
-            monitor.SetExclusive(current_core, mutex_address);
+            monitor.SetExclusive32(current_core, mutex_address);
 
             // If the mutex is not yet acquired, acquire it.
             mutex_val = memory.Read32(mutex_address);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index b99e3b7a55..51cc5dcca6 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -236,7 +236,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
         ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
     }
     thread->host_context =
-        std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);
+        std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);
 
     return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
 }
@@ -412,12 +412,12 @@ ResultCode Thread::SetActivity(ThreadActivity value) {
     }
 
     if (value == ThreadActivity::Paused) {
-        if (pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag) != 0) {
+        if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) != 0) {
             return ERR_INVALID_STATE;
         }
         AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
     } else {
-        if (pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag) == 0) {
+        if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) == 0) {
             return ERR_INVALID_STATE;
         }
         RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 66634596d1..4cb5d05e59 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -8,6 +8,7 @@
 #include <utility>
 
 #include "common/assert.h"
+#include "common/atomic_ops.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/page_table.h"
@@ -176,6 +177,22 @@ struct Memory::Impl {
         }
     }
 
+    bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
+        return WriteExclusive<u8>(addr, data, expected);
+    }
+
+    bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
+        return WriteExclusive<u16_le>(addr, data, expected);
+    }
+
+    bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
+        return WriteExclusive<u32_le>(addr, data, expected);
+    }
+
+    bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
+        return WriteExclusive<u64_le>(addr, data, expected);
+    }
+
     std::string ReadCString(VAddr vaddr, std::size_t max_length) {
         std::string string;
         string.reserve(max_length);
@@ -679,6 +696,67 @@ struct Memory::Impl {
         }
     }
 
+    template <typename T>
+    bool WriteExclusive(const VAddr vaddr, const T data, const T expected) {
+        u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
+        if (page_pointer != nullptr) {
+            // NOTE: Avoid adding any extra logic to this fast-path block
+            T volatile* pointer = reinterpret_cast<T volatile*>(&page_pointer[vaddr]);
+            return Common::AtomicCompareAndSwap(pointer, data, expected);
+        }
+
+        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        switch (type) {
+        case Common::PageType::Unmapped:
+            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+                      static_cast<u32>(data), vaddr);
+            return true;
+        case Common::PageType::Memory:
+            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
+            break;
+        case Common::PageType::RasterizerCachedMemory: {
+            u8* host_ptr{GetPointerFromVMA(vaddr)};
+            system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+            T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr);
+            return Common::AtomicCompareAndSwap(pointer, data, expected);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
+        return true;
+    }
+
+    bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) {
+        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
+        if (page_pointer != nullptr) {
+            // NOTE: Avoid adding any extra logic to this fast-path block
+            u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&page_pointer[vaddr]);
+            return Common::AtomicCompareAndSwap(pointer, data, expected);
+        }
+
+        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        switch (type) {
+        case Common::PageType::Unmapped:
+            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
+                      static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
+            return true;
+        case Common::PageType::Memory:
+            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
+            break;
+        case Common::PageType::RasterizerCachedMemory: {
+            u8* host_ptr{GetPointerFromVMA(vaddr)};
+            system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(u128));
+            u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr);
+            return Common::AtomicCompareAndSwap(pointer, data, expected);
+            break;
+        }
+        default:
+            UNREACHABLE();
+        }
+        return true;
+    }
+
     Common::PageTable* current_page_table = nullptr;
     Core::System& system;
 };
@@ -761,6 +839,26 @@ void Memory::Write64(VAddr addr, u64 data) {
     impl->Write64(addr, data);
 }
 
+bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) {
+    return impl->WriteExclusive8(addr, data, expected);
+}
+
+bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) {
+    return impl->WriteExclusive16(addr, data, expected);
+}
+
+bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) {
+    return impl->WriteExclusive32(addr, data, expected);
+}
+
+bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) {
+    return impl->WriteExclusive64(addr, data, expected);
+}
+
+bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) {
+    return impl->WriteExclusive128(addr, data, expected);
+}
+
 std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {
     return impl->ReadCString(vaddr, max_length);
 }
diff --git a/src/core/memory.h b/src/core/memory.h
index 93f0c1d6c4..4a1cc63f4d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -244,6 +244,71 @@ public:
      */
     void Write64(VAddr addr, u64 data);
 
+    /**
+     * Writes a 8-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the 8-bit unsigned integer to.
+     * @param data The 8-bit unsigned integer to write to the given virtual address.
+     * @param expected The 8-bit unsigned integer to check against the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive8(VAddr addr, u8 data, u8 expected);
+
+    /**
+     * Writes a 16-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the 16-bit unsigned integer to.
+     * @param data The 16-bit unsigned integer to write to the given virtual address.
+     * @param expected The 16-bit unsigned integer to check against the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive16(VAddr addr, u16 data, u16 expected);
+
+    /**
+     * Writes a 32-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the 32-bit unsigned integer to.
+     * @param data The 32-bit unsigned integer to write to the given virtual address.
+     * @param expected The 32-bit unsigned integer to check against the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive32(VAddr addr, u32 data, u32 expected);
+
+    /**
+     * Writes a 64-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the 64-bit unsigned integer to.
+     * @param data The 64-bit unsigned integer to write to the given virtual address.
+     * @param expected The 64-bit unsigned integer to check against the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive64(VAddr addr, u64 data, u64 expected);
+
+    /**
+     * Writes a 128-bit unsigned integer to the given virtual address in
+     * the current process' address space if and only if the address contains
+     * the expected value. This operation is atomic.
+     *
+     * @param addr The virtual address to write the 128-bit unsigned integer to.
+     * @param data The 128-bit unsigned integer to write to the given virtual address.
+     * @param expected The 128-bit unsigned integer to check against the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    bool WriteExclusive128(VAddr addr, u128 data, u128 expected);
+
     /**
      * Reads a null-terminated string from the given virtual address.
      * This function will continually read characters until either:

From 12d3b542f2b997e0fc7ee406a2eb33c6728eb6ef Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 19:04:02 -0400
Subject: [PATCH 072/122] Mutex: Revert workaround due to poor exclusive
 memory.

---
 src/core/hle/kernel/mutex.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 16c95782a3..5a96d5e90d 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -9,7 +9,6 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
@@ -135,12 +134,8 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
      }
 
     auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
-    auto& monitor = system.Monitor();
-    const std::size_t current_core = system.CurrentCoreIndex();
     if (new_owner == nullptr) {
-        do {
-            monitor.SetExclusive32(current_core, address);
-        } while (!monitor.ExclusiveWrite32(current_core, address, 0));
+        system.Memory().Write32(address, 0);
         return {RESULT_SUCCESS, nullptr};
     }
     // Transfer the ownership of the mutex from the previous owner to the new one.
@@ -154,9 +149,7 @@ std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thr
     new_owner->SetLockOwner(nullptr);
     new_owner->ResumeFromWait();
 
-    do {
-        monitor.SetExclusive32(current_core, address);
-    } while (!monitor.ExclusiveWrite32(current_core, address, mutex_value));
+    system.Memory().Write32(address, mutex_value);
     return {RESULT_SUCCESS, new_owner};
 }
 

From 05809e0fafa2c808777dfbf84076ecf9c7b84636 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 8 Mar 2020 11:25:50 -0400
Subject: [PATCH 073/122] Scheduler: Correct yields.

---
 src/core/hle/kernel/scheduler.cpp | 28 +++++++++++++++++++++-------
 src/core/hle/kernel/thread.h      |  4 ++++
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index affc2fbedd..ab17204bb4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -147,9 +147,11 @@ bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
     const u32 priority = yielding_thread->GetPriority();
 
     // Yield the thread
-    const Thread* const winner = scheduled_queue[core_id].front(priority);
-    ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front");
-    scheduled_queue[core_id].yield(priority);
+    Reschedule(priority, core_id, yielding_thread);
+    const Thread* const winner = scheduled_queue[core_id].front();
+    if (kernel.GetCurrentHostThreadID() != core_id) {
+        is_reselection_pending.store(true, std::memory_order_release);
+    }
 
     return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
@@ -162,9 +164,7 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     const u32 priority = yielding_thread->GetPriority();
 
     // Yield the thread
-    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
-               "Thread yielding without being in front");
-    scheduled_queue[core_id].yield(priority);
+    Reschedule(priority, core_id, yielding_thread);
 
     std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
     for (std::size_t i = 0; i < current_threads.size(); i++) {
@@ -200,6 +200,10 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
         winner = next_thread;
     }
 
+    if (kernel.GetCurrentHostThreadID() != core_id) {
+        is_reselection_pending.store(true, std::memory_order_release);
+    }
+
     return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
@@ -239,6 +243,12 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
         } else {
             winner = yielding_thread;
         }
+    } else {
+        winner = scheduled_queue[i].front();
+    }
+
+    if (kernel.GetCurrentHostThreadID() != core_id) {
+        is_reselection_pending.store(true, std::memory_order_release);
     }
 
     return AskForReselectionOrMarkRedundant(yielding_thread, winner);
@@ -687,7 +697,11 @@ void Scheduler::SwitchToCurrent() {
         while (!is_context_switch_pending) {
             if (current_thread != nullptr && !current_thread->IsHLEThread()) {
                 current_thread->context_guard.lock();
-                if (current_thread->GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+                if (!current_thread->IsRunnable()) {
+                    current_thread->context_guard.unlock();
+                    break;
+                }
+                if (current_thread->GetProcessorID() != core_id) {
                     current_thread->context_guard.unlock();
                     break;
                 }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 0a8f7bb654..953b023b5a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -524,6 +524,10 @@ public:
                                               static_cast<u32>(ThreadSchedMasks::LowMask));
     }
 
+    bool IsRunnable() const {
+        return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable);
+    }
+
     bool IsRunning() const {
         return is_running;
     }

From b2c283c80d53d71689186ceab623a29039150916 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 8 Mar 2020 12:51:24 -0400
Subject: [PATCH 074/122] Kernel: Fixes, corrections and asserts to scheduler
 and different svcs.

---
 src/core/hle/kernel/address_arbiter.cpp | 27 +++++++++++--------------
 src/core/hle/kernel/kernel.cpp          |  1 +
 src/core/hle/kernel/scheduler.cpp       |  3 ++-
 src/core/hle/kernel/scheduler.h         |  5 +++--
 src/core/hle/kernel/svc.cpp             | 27 +++++++++++++------------
 src/core/hle/kernel/synchronization.cpp | 10 +++++----
 src/core/hle/kernel/time_manager.cpp    |  2 --
 src/core/hle/kernel/time_manager.h      |  1 -
 8 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 07acabc1d5..e8f22b598e 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -34,19 +34,9 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai
 
     // Signal the waiting threads.
     for (std::size_t i = 0; i < last; i++) {
-        if (waiting_threads[i]->GetStatus() != ThreadStatus::WaitArb) {
-            last++;
-            last = std::min(waiting_threads.size(), last);
-            continue;
-        }
-
-        time_manager.CancelTimeEvent(waiting_threads[i].get());
-
-        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
         waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
         RemoveThread(waiting_threads[i]);
         waiting_threads[i]->WaitForArbitration(false);
-        waiting_threads[i]->SetArbiterWaitAddress(0);
         waiting_threads[i]->ResumeFromWait();
     }
 }
@@ -172,20 +162,25 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
     {
         SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
 
+        if (current_thread->IsPendingTermination()) {
+            lock.CancelSleep();
+            return ERR_THREAD_TERMINATING;
+        }
+
         // Ensure that we can read the address.
         if (!memory.IsValidVirtualAddress(address)) {
             lock.CancelSleep();
             return ERR_INVALID_ADDRESS_STATE;
         }
 
-        /// TODO(Blinkhawk): Check termination pending.
-
         s32 current_value = static_cast<s32>(memory.Read32(address));
         if (current_value >= value) {
             lock.CancelSleep();
             return ERR_INVALID_STATE;
         }
 
+        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+
         s32 decrement_value;
 
         const std::size_t current_core = system.CurrentCoreIndex();
@@ -207,7 +202,6 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
             return RESULT_TIMEOUT;
         }
 
-        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
         current_thread->SetArbiterWaitAddress(address);
         InsertThread(SharedFrom(current_thread));
         current_thread->SetStatus(ThreadStatus::WaitArb);
@@ -239,14 +233,17 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
     {
         SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
 
+        if (current_thread->IsPendingTermination()) {
+            lock.CancelSleep();
+            return ERR_THREAD_TERMINATING;
+        }
+
         // Ensure that we can read the address.
         if (!memory.IsValidVirtualAddress(address)) {
             lock.CancelSleep();
             return ERR_INVALID_ADDRESS_STATE;
         }
 
-        /// TODO(Blinkhawk): Check termination pending.
-
         s32 current_value = static_cast<s32>(memory.Read32(address));
         if (current_value != value) {
             lock.CancelSleep();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index ba051a7d80..721ab1e708 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -49,6 +49,7 @@ namespace Kernel {
  * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
  */
 static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
+    UNREACHABLE();
     const auto proper_handle = static_cast<Handle>(thread_handle);
     const auto& system = Core::System::GetInstance();
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ab17204bb4..5322f0aae3 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -133,6 +133,7 @@ u32 GlobalScheduler::SelectThreads() {
     u32 cores_needing_context_switch{};
     for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
         Scheduler& sched = kernel.Scheduler(core);
+        ASSERT(top_threads[core] == nullptr || top_threads[core]->GetProcessorID() == core);
         if (update_thread(top_threads[core], sched)) {
             cores_needing_context_switch |= (1ul << core);
         }
@@ -244,7 +245,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
             winner = yielding_thread;
         }
     } else {
-        winner = scheduled_queue[i].front();
+        winner = scheduled_queue[core_id].front();
     }
 
     if (kernel.GetCurrentHostThreadID() != core_id) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 728cca8020..5e062bf595 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -16,7 +16,7 @@
 #include "core/hle/kernel/thread.h"
 
 namespace Common {
-    class Fiber;
+class Fiber;
 }
 
 namespace Core {
@@ -133,7 +133,8 @@ private:
     /// and reschedules current core if needed.
     void Unlock();
 
-    void EnableInterruptAndSchedule(u32 cores_pending_reschedule, Core::EmuThreadHandle global_thread);
+    void EnableInterruptAndSchedule(u32 cores_pending_reschedule,
+                                    Core::EmuThreadHandle global_thread);
 
     /**
      * Add a thread to the suggested queue of a cpu core. Suggested threads may be
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 371beed0d7..aad2ac5493 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1562,6 +1562,11 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 
         current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
 
+        if (thread->IsPendingTermination()) {
+            lock.CancelSleep();
+            return ERR_THREAD_TERMINATING;
+        }
+
         const auto release_result = current_process->GetMutex().Release(mutex_addr);
         if (release_result.IsError()) {
             lock.CancelSleep();
@@ -1588,6 +1593,11 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
     {
         SchedulerLock lock(kernel);
 
+        auto* owner = current_thread->GetLockOwner();
+        if (owner != nullptr) {
+            owner->RemoveMutexWaiter(SharedFrom(current_thread));
+        }
+
         current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
     }
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
@@ -1618,19 +1628,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
     for (std::size_t index = 0; index < last; ++index) {
         auto& thread = waiting_threads[index];
 
-        if (thread->GetStatus() != ThreadStatus::WaitCondVar) {
-            last++;
-            last = std::min(waiting_threads.size(), last);
-            continue;
-        }
-
-        time_manager.CancelTimeEvent(thread.get());
-
         ASSERT(thread->GetCondVarWaitAddress() == condition_variable_addr);
 
         // liberate Cond Var Thread.
         current_process->RemoveConditionVariableThread(thread);
-        thread->SetCondVarWaitAddress(0);
 
         const std::size_t current_core = system.CurrentCoreIndex();
         auto& monitor = system.Monitor();
@@ -1655,9 +1656,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
         monitor.ClearExclusive();
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
-            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
-            thread->ResumeFromWait();
-
             auto* const lock_owner = thread->GetLockOwner();
             if (lock_owner != nullptr) {
                 lock_owner->RemoveMutexWaiter(thread);
@@ -1665,13 +1663,16 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
 
             thread->SetLockOwner(nullptr);
             thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
+            thread->ResumeFromWait();
         } else {
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
             const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
             auto owner = handle_table.Get<Thread>(owner_handle);
             ASSERT(owner);
-            thread->SetStatus(ThreadStatus::WaitMutex);
+            if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
+                thread->SetStatus(ThreadStatus::WaitMutex);
+            }
 
             owner->AddMutexWaiter(thread);
         }
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index 4323fc1208..275bf11cc0 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -23,9 +23,10 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const {
     if (obj.IsSignaled()) {
         for (auto thread : obj.GetWaitingThreads()) {
             if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
+                ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+                ASSERT(thread->IsWaitingSync());
                 thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
                 thread->ResumeFromWait();
-                time_manager.CancelTimeEvent(thread.get());
             }
         }
         obj.ClearWaitingThreads();
@@ -91,10 +92,11 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
         ResultCode signaling_result = thread->GetSignalingResult();
         SynchronizationObject* signaling_object = thread->GetSignalingObject();
         thread->SetSynchronizationObjects(nullptr);
+        auto shared_thread = SharedFrom(thread);
         for (auto& obj : sync_objects) {
-            obj->RemoveWaitingThread(SharedFrom(thread));
+            obj->RemoveWaitingThread(shared_thread);
         }
-        if (signaling_result == RESULT_SUCCESS) {
+        if (signaling_object != nullptr) {
             const auto itr = std::find_if(
                 sync_objects.begin(), sync_objects.end(),
                 [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
@@ -103,7 +105,7 @@ std::pair<ResultCode, Handle> Synchronization::WaitFor(
             ASSERT(itr != sync_objects.end());
             signaling_object->Acquire(thread);
             const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
-            return {RESULT_SUCCESS, index};
+            return {signaling_result, index};
         }
         return {signaling_result, -1};
     }
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index dab5fc4c6f..cc228f5f70 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -22,7 +22,6 @@ TimeManager::TimeManager(Core::System& system_) : system{system_} {
             if (cancelled_events[proper_handle]) {
                 return;
             }
-            event_fired[proper_handle] = true;
             std::shared_ptr<Thread> thread =
                 this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
             thread->OnWakeUp();
@@ -39,7 +38,6 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64
         event_handle = InvalidHandle;
     }
     cancelled_events[event_handle] = false;
-    event_fired[event_handle] = false;
 }
 
 void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
index 3080ac8383..307a187658 100644
--- a/src/core/hle/kernel/time_manager.h
+++ b/src/core/hle/kernel/time_manager.h
@@ -42,7 +42,6 @@ private:
     Core::System& system;
     std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
     std::unordered_map<Handle, bool> cancelled_events;
-    std::unordered_map<Handle, bool> event_fired;
 };
 
 } // namespace Kernel

From 264fe8825f01cce1f76f6b542925b8175aec8dd4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 8 Mar 2020 16:20:05 -0400
Subject: [PATCH 075/122] Kernel: Corrections to TimeManager, Scheduler and
 Mutex.

---
 src/core/hle/kernel/mutex.cpp        | 3 +--
 src/core/hle/kernel/scheduler.cpp    | 2 +-
 src/core/hle/kernel/time_manager.cpp | 5 +++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 5a96d5e90d..32dc1ffaef 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -35,8 +35,6 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
         if (thread->GetMutexWaitAddress() != mutex_addr)
             continue;
 
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
-
         ++num_waiters;
         if (highest_priority_thread == nullptr ||
             thread->GetPriority() < highest_priority_thread->GetPriority()) {
@@ -50,6 +48,7 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
 /// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.
 static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,
                                    std::shared_ptr<Thread> new_owner) {
+    current_thread->RemoveMutexWaiter(new_owner);
     const auto threads = current_thread->GetMutexWaitingThreads();
     for (const auto& thread : threads) {
         if (thread->GetMutexWaitAddress() != mutex_addr)
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5322f0aae3..98fbb8fe58 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -93,7 +93,7 @@ u32 GlobalScheduler::SelectThreads() {
                 iter++;
                 s32 suggested_core_id = suggested->GetProcessorID();
                 Thread* top_thread =
-                    suggested_core_id > 0 ? top_threads[suggested_core_id] : nullptr;
+                    suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr;
                 if (top_thread != suggested) {
                     if (top_thread != nullptr &&
                         top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index cc228f5f70..941305e8ee 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -32,8 +32,9 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64
     event_handle = timetask->GetGlobalHandle();
     if (nanoseconds > 0) {
         ASSERT(timetask);
-        const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
-        system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle);
+        ASSERT(timetask->GetStatus() != ThreadStatus::Ready);
+        ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex);
+        system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle);
     } else {
         event_handle = InvalidHandle;
     }

From fdf8810b07bec9cfa6532856782fe00a40d4c9c3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 8 Mar 2020 21:13:18 -0400
Subject: [PATCH 076/122] Scheduler: Set last running time on thread.

---
 src/core/hle/kernel/scheduler.cpp | 2 ++
 src/core/hle/kernel/thread.cpp    | 4 ----
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 98fbb8fe58..d68d86cdfc 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -616,6 +616,7 @@ void Scheduler::SwitchContextStep2() {
 
         // Cancel any outstanding wakeup events for this thread
         new_thread->SetIsRunning(true);
+        new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process && thread_owner_process != nullptr) {
@@ -654,6 +655,7 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
+        previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         if (!previous_thread->IsHLEThread()) {
             auto& cpu_core = system.ArmInterface(core_id);
             cpu_core.SaveContext(previous_thread->GetContext32());
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 51cc5dcca6..fc6c0bc857 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -295,10 +295,6 @@ void Thread::SetStatus(ThreadStatus new_status) {
         break;
     }
 
-    if (status == ThreadStatus::Running) {
-        last_running_ticks = Core::System::GetInstance().CoreTiming().GetCPUTicks();
-    }
-
     status = new_status;
 }
 

From 1c5c3692f91af5d1b963004d94e4d3e01cc35898 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 8 Mar 2020 22:39:41 -0400
Subject: [PATCH 077/122] General: Initial Setup for Single Core.

---
 src/core/core.cpp                            |   3 +
 src/core/cpu_manager.cpp                     | 194 ++++++++++++++++---
 src/core/cpu_manager.h                       |  30 ++-
 src/core/hle/kernel/kernel.cpp               |  19 ++
 src/core/hle/kernel/kernel.h                 |   3 +
 src/core/memory.cpp                          |   8 +-
 src/yuzu/configuration/configure_general.cpp |   6 +
 src/yuzu/configuration/configure_general.ui  |   7 +
 8 files changed, 232 insertions(+), 38 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 5d4ecdce5a..fd1bdcaf0a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -149,6 +149,9 @@ struct System::Impl {
 
         device_memory = std::make_unique<Core::DeviceMemory>(system);
 
+        kernel.SetMulticore(Settings::values.use_multi_core);
+        cpu_manager.SetMulticore(Settings::values.use_multi_core);
+
         core_timing.Initialize([&system]() { system.RegisterHostThread(); });
         kernel.Initialize();
         cpu_manager.Initialize();
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 9a261968a7..e72f898083 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -26,9 +26,13 @@ void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
 
 void CpuManager::Initialize() {
     running_mode = true;
-    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        core_data[core].host_thread =
-            std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+    if (is_multicore) {
+        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+            core_data[core].host_thread =
+                std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+        }
+    } else {
+        core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
     }
 }
 
@@ -41,26 +45,6 @@ void CpuManager::Shutdown() {
     }
 }
 
-void CpuManager::GuestThreadFunction(void* cpu_manager_) {
-    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
-    cpu_manager->RunGuestThread();
-}
-
-void CpuManager::GuestRewindFunction(void* cpu_manager_) {
-    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
-    cpu_manager->RunGuestLoop();
-}
-
-void CpuManager::IdleThreadFunction(void* cpu_manager_) {
-    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
-    cpu_manager->RunIdleThread();
-}
-
-void CpuManager::SuspendThreadFunction(void* cpu_manager_) {
-    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
-    cpu_manager->RunSuspendThread();
-}
-
 std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
     return std::function<void(void*)>(GuestThreadFunction);
 }
@@ -73,20 +57,60 @@ std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() {
     return std::function<void(void*)>(SuspendThreadFunction);
 }
 
+void CpuManager::GuestThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    if (cpu_manager->is_multicore) {
+        cpu_manager->MultiCoreRunGuestThread();
+    } else {
+        cpu_manager->SingleCoreRunGuestThread();
+    }
+}
+
+void CpuManager::GuestRewindFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    if (cpu_manager->is_multicore) {
+        cpu_manager->MultiCoreRunGuestLoop();
+    } else {
+        cpu_manager->SingleCoreRunGuestLoop();
+    }
+}
+
+void CpuManager::IdleThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    if (cpu_manager->is_multicore) {
+        cpu_manager->MultiCoreRunIdleThread();
+    } else {
+        cpu_manager->SingleCoreRunIdleThread();
+    }
+}
+
+void CpuManager::SuspendThreadFunction(void* cpu_manager_) {
+    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+    if (cpu_manager->is_multicore) {
+        cpu_manager->MultiCoreRunSuspendThread();
+    } else {
+        cpu_manager->SingleCoreRunSuspendThread();
+    }
+}
+
 void* CpuManager::GetStartFuncParamater() {
     return static_cast<void*>(this);
 }
 
-void CpuManager::RunGuestThread() {
+///////////////////////////////////////////////////////////////////////////////
+///                             MultiCore                                   ///
+///////////////////////////////////////////////////////////////////////////////
+
+void CpuManager::MultiCoreRunGuestThread() {
     auto& kernel = system.Kernel();
     {
         auto& sched = kernel.CurrentScheduler();
         sched.OnThreadStart();
     }
-    RunGuestLoop();
+    MultiCoreRunGuestLoop();
 }
 
-void CpuManager::RunGuestLoop() {
+void CpuManager::MultiCoreRunGuestLoop() {
     auto& kernel = system.Kernel();
     auto* thread = kernel.CurrentScheduler().GetCurrentThread();
     auto host_context = thread->GetHostContext();
@@ -103,7 +127,7 @@ void CpuManager::RunGuestLoop() {
     }
 }
 
-void CpuManager::RunIdleThread() {
+void CpuManager::MultiCoreRunIdleThread() {
     auto& kernel = system.Kernel();
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
@@ -113,7 +137,7 @@ void CpuManager::RunIdleThread() {
     }
 }
 
-void CpuManager::RunSuspendThread() {
+void CpuManager::MultiCoreRunSuspendThread() {
     auto& kernel = system.Kernel();
     {
         auto& sched = kernel.CurrentScheduler();
@@ -130,7 +154,7 @@ void CpuManager::RunSuspendThread() {
     }
 }
 
-void CpuManager::Pause(bool paused) {
+void CpuManager::MultiCorePause(bool paused) {
     if (!paused) {
         bool all_not_barrier = false;
         while (!all_not_barrier) {
@@ -171,10 +195,120 @@ void CpuManager::Pause(bool paused) {
     paused_state = paused;
 }
 
+///////////////////////////////////////////////////////////////////////////////
+///                             SingleCore                                   ///
+///////////////////////////////////////////////////////////////////////////////
+
+void CpuManager::SingleCoreRunGuestThread() {
+    auto& kernel = system.Kernel();
+    {
+        auto& sched = kernel.CurrentScheduler();
+        sched.OnThreadStart();
+    }
+    SingleCoreRunGuestLoop();
+}
+
+void CpuManager::SingleCoreRunGuestLoop() {
+    auto& kernel = system.Kernel();
+    auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+    auto host_context = thread->GetHostContext();
+    host_context->SetRewindPoint(std::function<void(void*)>(GuestRewindFunction), this);
+    host_context.reset();
+    while (true) {
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        while (!physical_core.IsInterrupted()) {
+            physical_core.Run();
+            preemption_count++;
+            if (preemption_count % max_cycle_runs == 0) {
+                break;
+            }
+        }
+        physical_core.ClearExclusive();
+        PreemptSingleCore();
+        auto& scheduler = physical_core.Scheduler();
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::SingleCoreRunIdleThread() {
+    auto& kernel = system.Kernel();
+    while (true) {
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        PreemptSingleCore();
+        auto& scheduler = physical_core.Scheduler();
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::SingleCoreRunSuspendThread() {
+    auto& kernel = system.Kernel();
+    {
+        auto& sched = kernel.CurrentScheduler();
+        sched.OnThreadStart();
+    }
+    while (true) {
+        auto core = kernel.GetCurrentHostThreadID();
+        auto& scheduler = kernel.CurrentScheduler();
+        Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+        Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context);
+        ASSERT(scheduler.ContextSwitchPending());
+        ASSERT(core == kernel.GetCurrentHostThreadID());
+        scheduler.TryDoContextSwitch();
+    }
+}
+
+void CpuManager::PreemptSingleCore() {
+    preemption_count = 0;
+    std::size_t old_core = current_core;
+    current_core = (current_core + 1) % Core::Hardware::NUM_CPU_CORES;
+    auto& scheduler = system.Kernel().Scheduler(old_core);
+    Kernel::Thread* current_thread = system.Kernel().Scheduler(old_core).GetCurrentThread();
+    Kernel::Thread* next_thread = system.Kernel().Scheduler(current_core).GetCurrentThread();
+    Common::Fiber::YieldTo(current_thread->GetHostContext(), next_thread->GetHostContext());
+}
+
+void CpuManager::SingleCorePause(bool paused) {
+    if (!paused) {
+        bool all_not_barrier = false;
+        while (!all_not_barrier) {
+            all_not_barrier = !core_data[0].is_running.load() && core_data[0].initialized.load();
+        }
+        core_data[0].enter_barrier->Set();
+        if (paused_state.load()) {
+            bool all_barrier = false;
+            while (!all_barrier) {
+                all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
+            }
+            core_data[0].exit_barrier->Set();
+        }
+    } else {
+        /// Wait until all cores are paused.
+        bool all_barrier = false;
+        while (!all_barrier) {
+            all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
+        }
+        /// Don't release the barrier
+    }
+    paused_state = paused;
+}
+
+void CpuManager::Pause(bool paused) {
+    if (is_multicore) {
+        MultiCorePause(paused);
+    } else {
+        SingleCorePause(paused);
+    }
+}
+
 void CpuManager::RunThread(std::size_t core) {
     /// Initialization
     system.RegisterCoreThread(core);
-    std::string name = "yuzu:CoreHostThread_" + std::to_string(core);
+    std::string name;
+    if (is_multicore) {
+        name = "yuzu:CoreCPUThread_" + std::to_string(core);
+    } else {
+        name = "yuzu:CPUThread";
+    }
     MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
     auto& data = core_data[core];
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index e83ab20f9c..1e81481ec2 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -30,6 +30,10 @@ public:
     CpuManager& operator=(const CpuManager&) = delete;
     CpuManager& operator=(CpuManager&&) = delete;
 
+    /// Sets if emulation is multicore or single core, must be set before Initialize
+    void SetMulticore(bool is_multicore) {
+        this->is_multicore = is_multicore;
+    }
     void Initialize();
     void Shutdown();
 
@@ -40,21 +44,34 @@ public:
     std::function<void(void*)> GetSuspendThreadStartFunc();
     void* GetStartFuncParamater();
 
+    std::size_t CurrentCore() const {
+        return current_core;
+    }
+
 private:
     static void GuestThreadFunction(void* cpu_manager);
     static void GuestRewindFunction(void* cpu_manager);
     static void IdleThreadFunction(void* cpu_manager);
     static void SuspendThreadFunction(void* cpu_manager);
 
-    void RunGuestThread();
-    void RunGuestLoop();
-    void RunIdleThread();
-    void RunSuspendThread();
+    void MultiCoreRunGuestThread();
+    void MultiCoreRunGuestLoop();
+    void MultiCoreRunIdleThread();
+    void MultiCoreRunSuspendThread();
+    void MultiCorePause(bool paused);
+
+    void SingleCoreRunGuestThread();
+    void SingleCoreRunGuestLoop();
+    void SingleCoreRunIdleThread();
+    void SingleCoreRunSuspendThread();
+    void SingleCorePause(bool paused);
 
     static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
 
     void RunThread(std::size_t core);
 
+    void PreemptSingleCore();
+
     struct CoreData {
         std::shared_ptr<Common::Fiber> host_context;
         std::unique_ptr<Common::Event> enter_barrier;
@@ -70,6 +87,11 @@ private:
 
     std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
 
+    bool is_multicore{};
+    std::size_t current_core{};
+    std::size_t preemption_count{};
+    static constexpr std::size_t max_cycle_runs = 5;
+
     System& system;
 };
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 721ab1e708..4a091ea381 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -113,6 +113,10 @@ struct KernelCore::Impl {
     explicit Impl(Core::System& system, KernelCore& kernel)
         : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
 
+    void SetMulticore(bool is_multicore) {
+        this->is_multicore = is_multicore;
+    }
+
     void Initialize(KernelCore& kernel) {
         Shutdown();
 
@@ -237,6 +241,9 @@ struct KernelCore::Impl {
 
     void RegisterCoreThread(std::size_t core_id) {
         std::unique_lock lock{register_thread_mutex};
+        if (!is_multicore) {
+            single_core_thread_id = std::this_thread::get_id();
+        }
         const std::thread::id this_id = std::this_thread::get_id();
         const auto it = host_thread_ids.find(this_id);
         ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
@@ -258,6 +265,11 @@ struct KernelCore::Impl {
 
     u32 GetCurrentHostThreadID() const {
         const std::thread::id this_id = std::this_thread::get_id();
+        if (!is_multicore) {
+            if (single_core_thread_id == this_id) {
+                return static_cast<u32>(system.GetCpuManager().CurrentCore());
+            }
+        }
         const auto it = host_thread_ids.find(this_id);
         if (it == host_thread_ids.end()) {
             return Core::INVALID_HOST_THREAD_ID;
@@ -378,6 +390,9 @@ struct KernelCore::Impl {
 
     std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
 
+    bool is_multicore{};
+    std::thread::id single_core_thread_id{};
+
     // System context
     Core::System& system;
 };
@@ -387,6 +402,10 @@ KernelCore::~KernelCore() {
     Shutdown();
 }
 
+void KernelCore::SetMulticore(bool is_multicore) {
+    impl->SetMulticore(is_multicore);
+}
+
 void KernelCore::Initialize() {
     impl->Initialize(*this);
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 5d32a83294..162bbd2f8d 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -65,6 +65,9 @@ public:
     KernelCore(KernelCore&&) = delete;
     KernelCore& operator=(KernelCore&&) = delete;
 
+    /// Sets if emulation is multicore or single core, must be set before Initialize
+    void SetMulticore(bool is_multicore);
+
     /// Resets the kernel to a clean slate for use.
     void Initialize();
 
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4cb5d05e59..7def007689 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -715,8 +715,8 @@ struct Memory::Impl {
             ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
             break;
         case Common::PageType::RasterizerCachedMemory: {
-            u8* host_ptr{GetPointerFromVMA(vaddr)};
-            system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
+            system.GPU().InvalidateRegion(vaddr, sizeof(T));
             T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr);
             return Common::AtomicCompareAndSwap(pointer, data, expected);
             break;
@@ -745,8 +745,8 @@ struct Memory::Impl {
             ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
             break;
         case Common::PageType::RasterizerCachedMemory: {
-            u8* host_ptr{GetPointerFromVMA(vaddr)};
-            system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(u128));
+            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
+            system.GPU().InvalidateRegion(vaddr, sizeof(u128));
             u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr);
             return Common::AtomicCompareAndSwap(pointer, data, expected);
             break;
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index cb95423e09..74b2ad537b 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -23,6 +23,11 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
 ConfigureGeneral::~ConfigureGeneral() = default;
 
 void ConfigureGeneral::SetConfiguration() {
+    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
+
+    ui->use_multi_core->setEnabled(runtime_lock);
+    ui->use_multi_core->setChecked(Settings::values.use_multi_core);
+
     ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
     ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
     ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background);
@@ -41,6 +46,7 @@ void ConfigureGeneral::ApplyConfiguration() {
 
     Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
     Settings::values.frame_limit = ui->frame_limit->value();
+    Settings::values.use_multi_core = ui->use_multi_core->isChecked();
 }
 
 void ConfigureGeneral::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index fc3b7e65a5..f872bddecf 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -51,6 +51,13 @@
             </item>
            </layout>
           </item>
+          <item>
+           <widget class="QCheckBox" name="use_multi_core">
+            <property name="text">
+             <string>Emulate CPU in Multiple Cores</string>
+            </property>
+           </widget>
+          </item>
           <item>
            <widget class="QCheckBox" name="toggle_check_exit">
             <property name="text">

From 3a8dc7ec8cab1aa9ad6abab002ea02d2ea1db0fd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 9 Mar 2020 10:51:05 -0400
Subject: [PATCH 078/122] Synchronization: Correct wide Assertion.

---
 src/core/hle/kernel/synchronization.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index 275bf11cc0..851b702a55 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -23,8 +23,10 @@ void Synchronization::SignalObject(SynchronizationObject& obj) const {
     if (obj.IsSignaled()) {
         for (auto thread : obj.GetWaitingThreads()) {
             if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
-                ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-                ASSERT(thread->IsWaitingSync());
+                if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) {
+                    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+                    ASSERT(thread->IsWaitingSync());
+                }
                 thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
                 thread->ResumeFromWait();
             }

From 9e057b467f9644f5b46292c8944cf1dcfffb8d2e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Mar 2020 11:50:33 -0400
Subject: [PATCH 079/122] CPU_Manager: Unload/Reload threads on preemption on
 SingleCore

---
 src/core/cpu_manager.cpp          | 14 +++++++----
 src/core/cpu_manager.h            |  5 ++--
 src/core/hle/kernel/scheduler.cpp | 42 +++++++++++++++++++++++++++++++
 src/core/hle/kernel/scheduler.h   | 10 ++++++++
 4 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index e72f898083..95842aad10 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -225,7 +225,7 @@ void CpuManager::SingleCoreRunGuestLoop() {
         }
         physical_core.ClearExclusive();
         PreemptSingleCore();
-        auto& scheduler = physical_core.Scheduler();
+        auto& scheduler = kernel.Scheduler(current_core);
         scheduler.TryDoContextSwitch();
     }
 }
@@ -260,11 +260,15 @@ void CpuManager::SingleCoreRunSuspendThread() {
 void CpuManager::PreemptSingleCore() {
     preemption_count = 0;
     std::size_t old_core = current_core;
-    current_core = (current_core + 1) % Core::Hardware::NUM_CPU_CORES;
+    current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
     auto& scheduler = system.Kernel().Scheduler(old_core);
-    Kernel::Thread* current_thread = system.Kernel().Scheduler(old_core).GetCurrentThread();
-    Kernel::Thread* next_thread = system.Kernel().Scheduler(current_core).GetCurrentThread();
-    Common::Fiber::YieldTo(current_thread->GetHostContext(), next_thread->GetHostContext());
+    Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+    scheduler.Unload();
+    auto& next_scheduler = system.Kernel().Scheduler(current_core);
+    Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
+    /// May have changed scheduler
+    auto& current_scheduler = system.Kernel().Scheduler(current_core);
+    current_scheduler.Reload();
 }
 
 void CpuManager::SingleCorePause(bool paused) {
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 1e81481ec2..ff1935d5c7 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <atomic>
 #include <functional>
 #include <memory>
 #include <thread>
@@ -45,7 +46,7 @@ public:
     void* GetStartFuncParamater();
 
     std::size_t CurrentCore() const {
-        return current_core;
+        return current_core.load();
     }
 
 private:
@@ -88,7 +89,7 @@ private:
     std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
 
     bool is_multicore{};
-    std::size_t current_core{};
+    std::atomic<std::size_t> current_core{};
     std::size_t preemption_count{};
     static constexpr std::size_t max_cycle_runs = 5;
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index d68d86cdfc..00322d997b 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -602,6 +602,48 @@ void Scheduler::OnThreadStart() {
     SwitchContextStep2();
 }
 
+void Scheduler::Unload() {
+    Thread* thread = current_thread.get();
+    if (thread) {
+        thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+        thread->SetIsRunning(false);
+        if (!thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
+            cpu_core.SaveContext(thread->GetContext32());
+            cpu_core.SaveContext(thread->GetContext64());
+            // Save the TPIDR_EL0 system register in case it was modified.
+            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        }
+        thread->context_guard.unlock();
+    }
+}
+
+void Scheduler::Reload() {
+    Thread* thread = current_thread.get();
+    if (thread) {
+        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
+                   "Thread must be runnable.");
+
+        // Cancel any outstanding wakeup events for this thread
+        thread->SetIsRunning(true);
+        thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+
+        auto* const thread_owner_process = thread->GetOwnerProcess();
+        if (thread_owner_process != nullptr) {
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+        }
+        if (!thread->IsHLEThread()) {
+            auto& cpu_core = system.ArmInterface(core_id);
+            cpu_core.LoadContext(thread->GetContext32());
+            cpu_core.LoadContext(thread->GetContext64());
+            cpu_core.SetTlsAddress(thread->GetTLSAddress());
+            cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        }
+    }
+}
+
 void Scheduler::SwitchContextStep2() {
     Thread* previous_thread = current_thread_prev.get();
     Thread* new_thread = selected_thread.get();
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 5e062bf595..f63cc50859 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -210,6 +210,12 @@ public:
     /// Reschedules to the next available thread (call after current thread is suspended)
     void TryDoContextSwitch();
 
+    /// The next two are for SingleCore Only.
+    /// Unload current thread before preempting core.
+    void Unload();
+    /// Reload current thread after core preemption.
+    void Reload();
+
     /// Gets the current running thread
     Thread* GetCurrentThread() const;
 
@@ -230,6 +236,10 @@ public:
 
     void OnThreadStart();
 
+    std::shared_ptr<Common::Fiber> ControlContext() {
+        return switch_fiber;
+    }
+
 private:
     friend class GlobalScheduler;
 

From a958ed9bbd1cef95fe074c9f33acdcdddb0dbe0d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Mar 2020 13:13:39 -0400
Subject: [PATCH 080/122] Kernel: Preempt Single core on redudant yields.

---
 src/core/cpu_manager.h         |  4 ++--
 src/core/hle/kernel/kernel.cpp |  4 ++++
 src/core/hle/kernel/kernel.h   |  2 ++
 src/core/hle/kernel/svc.cpp    | 23 +++++++++++++++++------
 src/core/hle/kernel/thread.cpp | 21 ++++++++++++---------
 src/core/hle/kernel/thread.h   |  9 +++++----
 6 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index ff1935d5c7..c0e454a7dd 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -45,6 +45,8 @@ public:
     std::function<void(void*)> GetSuspendThreadStartFunc();
     void* GetStartFuncParamater();
 
+    void PreemptSingleCore();
+
     std::size_t CurrentCore() const {
         return current_core.load();
     }
@@ -71,8 +73,6 @@ private:
 
     void RunThread(std::size_t core);
 
-    void PreemptSingleCore();
-
     struct CoreData {
         std::shared_ptr<Common::Fiber> host_context;
         std::unique_ptr<Common::Event> enter_barrier;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4a091ea381..2a1b917527 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -642,6 +642,10 @@ void KernelCore::Suspend(bool in_suspention) {
     }
 }
 
+bool KernelCore::IsMulticore() const {
+    return impl->is_multicore;
+}
+
 void KernelCore::ExceptionalExit() {
     exception_exited = true;
     Suspend(true);
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 162bbd2f8d..50eeb50ec1 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -212,6 +212,8 @@ public:
     /// Exceptional exit the OS.
     void ExceptionalExit();
 
+    bool IsMulticore() const;
+
 private:
     friend class Object;
     friend class Process;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index aad2ac5493..eca92b3567 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -19,6 +19,7 @@
 #include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -1509,21 +1510,31 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
 
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
-        case SleepType::YieldWithoutLoadBalancing:
-            current_thread->YieldSimple();
+        case SleepType::YieldWithoutLoadBalancing: {
+            auto pair = current_thread->YieldSimple();
+            is_redundant = pair.second;
             break;
-        case SleepType::YieldWithLoadBalancing:
-            current_thread->YieldAndBalanceLoad();
+        }
+        case SleepType::YieldWithLoadBalancing: {
+            auto pair = current_thread->YieldAndBalanceLoad();
+            is_redundant = pair.second;
             break;
-        case SleepType::YieldAndWaitForLoadBalancing:
-            current_thread->YieldAndWaitForLoadBalancing();
+        }
+        case SleepType::YieldAndWaitForLoadBalancing: {
+            auto pair = current_thread->YieldAndWaitForLoadBalancing();
+            is_redundant = pair.second;
             break;
+        }
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
         }
     } else {
         current_thread->Sleep(nanoseconds);
     }
+
+    if (is_redundant && !system.Kernel().IsMulticore()) {
+        system.GetCpuManager().PreemptSingleCore();
+    }
 }
 
 /// Wait process wide key atomic
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index fc6c0bc857..1c32552b12 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -435,28 +435,31 @@ ResultCode Thread::Sleep(s64 nanoseconds) {
     return RESULT_SUCCESS;
 }
 
-ResultCode Thread::YieldSimple() {
+std::pair<ResultCode, bool> Thread::YieldSimple() {
+    bool is_redundant = false;
     {
         SchedulerLock lock(kernel);
-        kernel.GlobalScheduler().YieldThread(this);
+        is_redundant = kernel.GlobalScheduler().YieldThread(this);
     }
-    return RESULT_SUCCESS;
+    return {RESULT_SUCCESS, is_redundant};
 }
 
-ResultCode Thread::YieldAndBalanceLoad() {
+std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() {
+    bool is_redundant = false;
     {
         SchedulerLock lock(kernel);
-        kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
+        is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
     }
-    return RESULT_SUCCESS;
+    return {RESULT_SUCCESS, is_redundant};
 }
 
-ResultCode Thread::YieldAndWaitForLoadBalancing() {
+std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() {
+    bool is_redundant = false;
     {
         SchedulerLock lock(kernel);
-        kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
+        is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
     }
-    return RESULT_SUCCESS;
+    return {RESULT_SUCCESS, is_redundant};
 }
 
 void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 953b023b5a..9a29875ac0 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -6,6 +6,7 @@
 
 #include <functional>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "common/common_types.h"
@@ -503,13 +504,13 @@ public:
     ResultCode Sleep(s64 nanoseconds);
 
     /// Yields this thread without rebalancing loads.
-    ResultCode YieldSimple();
+    std::pair<ResultCode, bool> YieldSimple();
 
     /// Yields this thread and does a load rebalancing.
-    ResultCode YieldAndBalanceLoad();
+    std::pair<ResultCode, bool> YieldAndBalanceLoad();
 
     /// Yields this thread and if the core is left idle, loads are rebalanced
-    ResultCode YieldAndWaitForLoadBalancing();
+    std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();
 
     void IncrementYieldCount() {
         yield_count++;
@@ -587,7 +588,7 @@ private:
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
     Common::SpinLock context_guard{};
-    std::shared_ptr<Common::Fiber> host_context{};
+    std::shared_ptr<Common::Fiber> host_context{};
 
     u64 thread_id = 0;
 

From 3bc98d1133948b045d5504da20f87e072506e4ff Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Mar 2020 18:41:11 -0400
Subject: [PATCH 081/122] Kernel: Rewind on SVC change.

---
 src/core/hle/kernel/scheduler.cpp |  2 ++
 src/core/hle/kernel/svc.cpp       |  9 ++++-----
 src/core/hle/kernel/thread.h      | 10 ++++++++++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 00322d997b..25fc8a3e8b 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -605,6 +605,7 @@ void Scheduler::OnThreadStart() {
 void Scheduler::Unload() {
     Thread* thread = current_thread.get();
     if (thread) {
+        thread->SetContinuousOnSVC(false);
         thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         thread->SetIsRunning(false);
         if (!thread->IsHLEThread()) {
@@ -697,6 +698,7 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
+        previous_thread->SetContinuousOnSVC(false);
         previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         if (!previous_thread->IsHLEThread()) {
             auto& cpu_core = system.ArmInterface(core_id);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index eca92b3567..d3d4e7bf9f 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2459,7 +2459,8 @@ MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 void Call(Core::System& system, u32 immediate) {
     MICROPROFILE_SCOPE(Kernel_SVC);
 
-    auto& physical_core = system.CurrentPhysicalCore();
+    auto* thread = system.CurrentScheduler().GetCurrentThread();
+    thread->SetContinuousOnSVC(true);
 
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
@@ -2472,10 +2473,8 @@ void Call(Core::System& system, u32 immediate) {
     } else {
         LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);
     }
-    auto& physical_core_2 = system.CurrentPhysicalCore();
-    if (physical_core.CoreIndex() != physical_core_2.CoreIndex()) {
-        LOG_CRITICAL(Kernel_SVC, "Rewinding");
-        auto* thread = physical_core_2.Scheduler().GetCurrentThread();
+
+    if (!thread->IsContinuousOnSVC()) {
         auto* host_context = thread->GetHostContext().get();
         host_context->Rewind();
     }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 9a29875ac0..168828ab0f 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -573,6 +573,14 @@ public:
         return pausing_state != 0;
     }
 
+    bool IsContinuousOnSVC() const {
+        return is_continuous_on_svc;
+    }
+
+    void SetContinuousOnSVC(bool is_continuous) {
+        is_continuous_on_svc = is_continuous;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
@@ -672,6 +680,8 @@ private:
     bool is_waiting_on_sync = false;
     bool is_sync_cancelled = false;
 
+    bool is_continuous_on_svc = false;
+
     bool will_be_terminated = false;
 
     std::string name;

From b27e93bf78faf69db01f5315abb78ed3baf93787 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 11 Mar 2020 20:44:53 -0400
Subject: [PATCH 082/122] General: Fix Stop function

---
 src/core/core_timing.cpp       |  1 +
 src/core/hle/kernel/kernel.cpp | 15 +++++++++++++++
 src/core/hle/kernel/thread.cpp |  8 +++++---
 src/yuzu/bootmanager.cpp       |  6 ++++--
 4 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index c91ae99759..3438f79cee 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -61,6 +61,7 @@ void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
 void CoreTiming::Shutdown() {
     paused = true;
     shutting_down = true;
+    pause_event.Set();
     event.Set();
     timer_thread->join();
     ClearPendingEvents();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 2a1b917527..24da4367e4 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -119,6 +119,7 @@ struct KernelCore::Impl {
 
     void Initialize(KernelCore& kernel) {
         Shutdown();
+        RegisterHostThread();
 
         InitializePhysicalCores();
         InitializeSystemResourceLimit(kernel);
@@ -135,6 +136,19 @@ struct KernelCore::Impl {
         next_user_process_id = Process::ProcessIDMin;
         next_thread_id = 1;
 
+        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+            if (suspend_threads[i]) {
+                suspend_threads[i].reset();
+            }
+        }
+
+        for (std::size_t i = 0; i < cores.size(); i++) {
+            cores[i].Shutdown();
+        }
+        cores.clear();
+
+        registered_core_threads.reset();
+
         process_list.clear();
         current_process = nullptr;
 
@@ -154,6 +168,7 @@ struct KernelCore::Impl {
         cores.clear();
 
         exclusive_monitor.reset();
+        host_thread_ids.clear();
     }
 
     void InitializePhysicalCores() {
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 1c32552b12..6f8e7a070a 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -56,10 +56,12 @@ void Thread::Stop() {
         Signal();
         kernel.GlobalHandleTable().Close(global_handle);
 
-        owner_process->UnregisterThread(this);
+        if (owner_process) {
+            owner_process->UnregisterThread(this);
 
-        // Mark the TLS slot in the thread's page as free.
-        owner_process->FreeTLSRegion(tls_address);
+            // Mark the TLS slot in the thread's page as free.
+            owner_process->FreeTLSRegion(tls_address);
+        }
     }
     global_handle = 0;
 }
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index fcac0db911..bce66cf076 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -86,8 +86,10 @@ void EmuThread::run() {
             }
             running_guard = false;
 
-            was_active = true;
-            emit DebugModeEntered();
+            if (!stop_run) {
+                was_active = true;
+                emit DebugModeEntered();
+            }
         } else if (exec_step) {
             UNIMPLEMENTED();
         } else {

From 7f9c52ae4cb0b604ee4049aa393b392d094fcf4f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 12 Mar 2020 16:48:43 -0400
Subject: [PATCH 083/122] General: Fix microprofile on dynarmic/svc, fix wait
 tree showing which threads were running.

---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  4 ----
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  5 +----
 src/core/core.cpp                         | 15 +++++++++++++++
 src/core/core.h                           |  6 ++++++
 src/core/cpu_manager.cpp                  |  4 ++++
 src/core/hle/kernel/kernel.cpp            | 15 +++++++++++++++
 src/core/hle/kernel/kernel.h              |  4 ++++
 src/core/hle/kernel/scheduler.cpp         |  7 +++++++
 src/core/hle/kernel/svc.cpp               | 10 +++++++---
 src/core/hle/kernel/thread.h              | 18 ++++++++++++++++++
 src/yuzu/debugger/wait_tree.cpp           | 12 ++++++++++--
 11 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 73d4a6ae59..1f19be36ed 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -7,7 +7,6 @@
 #include <dynarmic/A32/a32.h>
 #include <dynarmic/A32/config.h>
 #include <dynarmic/A32/context.h>
-#include "common/microprofile.h"
 #include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/dynarmic/arm_dynarmic_32.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
@@ -98,10 +97,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
     return std::make_unique<Dynarmic::A32::Jit>(config);
 }
 
-MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
-
 void ARM_Dynarmic_32::Run() {
-    MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);
     jit->Run();
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index f8db526ed6..d685459d56 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -7,7 +7,6 @@
 #include <dynarmic/A64/a64.h>
 #include <dynarmic/A64/config.h>
 #include "common/logging/log.h"
-#include "common/microprofile.h"
 #include "common/page_table.h"
 #include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
@@ -181,11 +180,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
     return std::make_shared<Dynarmic::A64::Jit>(config);
 }
 
-MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
+
 
 void ARM_Dynarmic_64::Run() {
-    MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64);
-
     jit->Run();
 }
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fd1bdcaf0a..032da7aa58 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -8,6 +8,7 @@
 
 #include "common/file_util.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "common/string_util.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
@@ -50,6 +51,8 @@
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
+
 namespace Core {
 
 namespace {
@@ -391,6 +394,8 @@ struct System::Impl {
 
     std::unique_ptr<Core::PerfStats> perf_stats;
     Core::FrameLimiter frame_limiter;
+
+    std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
 };
 
 System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -736,4 +741,14 @@ void System::RegisterHostThread() {
     impl->kernel.RegisterHostThread();
 }
 
+void System::EnterDynarmicProfile() {
+    std::size_t core = impl->kernel.GetCurrentHostThreadID();
+    impl->dynarmic_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(ARM_Jit_Dynarmic));
+}
+
+void System::ExitDynarmicProfile() {
+    std::size_t core = impl->kernel.GetCurrentHostThreadID();
+    MicroProfileLeave(MICROPROFILE_TOKEN(ARM_Jit_Dynarmic), impl->dynarmic_ticks[core]);
+}
+
 } // namespace Core
diff --git a/src/core/core.h b/src/core/core.h
index 9a0dd10753..87df79d573 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -377,6 +377,12 @@ public:
     /// Register a host thread as an auxiliary thread.
     void RegisterHostThread();
 
+    /// Enter Dynarmic Microprofile
+    void EnterDynarmicProfile();
+
+    /// Exit Dynarmic Microprofile
+    void ExitDynarmicProfile();
+
 private:
     System();
 
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 95842aad10..9e2e6d49fb 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -118,9 +118,11 @@ void CpuManager::MultiCoreRunGuestLoop() {
     host_context.reset();
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
+        system.EnterDynarmicProfile();
         while (!physical_core.IsInterrupted()) {
             physical_core.Run();
         }
+        system.ExitDynarmicProfile();
         physical_core.ClearExclusive();
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
@@ -216,6 +218,7 @@ void CpuManager::SingleCoreRunGuestLoop() {
     host_context.reset();
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
+        system.EnterDynarmicProfile();
         while (!physical_core.IsInterrupted()) {
             physical_core.Run();
             preemption_count++;
@@ -224,6 +227,7 @@ void CpuManager::SingleCoreRunGuestLoop() {
             }
         }
         physical_core.ClearExclusive();
+        system.ExitDynarmicProfile();
         PreemptSingleCore();
         auto& scheduler = kernel.Scheduler(current_core);
         scheduler.TryDoContextSwitch();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 24da4367e4..d2f5f9bf23 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -13,6 +13,7 @@
 
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "common/thread.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/exclusive_monitor.h"
@@ -41,6 +42,8 @@
 #include "core/hle/result.h"
 #include "core/memory.h"
 
+MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
+
 namespace Kernel {
 
 /**
@@ -408,6 +411,8 @@ struct KernelCore::Impl {
     bool is_multicore{};
     std::thread::id single_core_thread_id{};
 
+    std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};
+
     // System context
     Core::System& system;
 };
@@ -666,4 +671,14 @@ void KernelCore::ExceptionalExit() {
     Suspend(true);
 }
 
+void KernelCore::EnterSVCProfile() {
+    std::size_t core = impl->GetCurrentHostThreadID();
+    impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
+}
+
+void KernelCore::ExitSVCProfile() {
+    std::size_t core = impl->GetCurrentHostThreadID();
+    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 50eeb50ec1..1eb6ede73e 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -214,6 +214,10 @@ public:
 
     bool IsMulticore() const;
 
+    void EnterSVCProfile();
+
+    void ExitSVCProfile();
+
 private:
     friend class Object;
     friend class Process;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 25fc8a3e8b..2ad380b175 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -354,7 +354,9 @@ void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
     }
     if (must_context_switch) {
         auto& core_scheduler = kernel.CurrentScheduler();
+        kernel.ExitSVCProfile();
         core_scheduler.TryDoContextSwitch();
+        kernel.EnterSVCProfile();
     }
 }
 
@@ -628,6 +630,7 @@ void Scheduler::Reload() {
 
         // Cancel any outstanding wakeup events for this thread
         thread->SetIsRunning(true);
+        thread->SetWasRunning(false);
         thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
 
         auto* const thread_owner_process = thread->GetOwnerProcess();
@@ -660,6 +663,7 @@ void Scheduler::SwitchContextStep2() {
         // Cancel any outstanding wakeup events for this thread
         new_thread->SetIsRunning(true);
         new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+        new_thread->SetWasRunning(false);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process && thread_owner_process != nullptr) {
@@ -698,6 +702,9 @@ void Scheduler::SwitchContext() {
 
     // Save context for previous thread
     if (previous_thread) {
+        if (new_thread != nullptr && new_thread->IsSuspendThread()) {
+            previous_thread->SetWasRunning(true);
+        }
         previous_thread->SetContinuousOnSVC(false);
         previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         if (!previous_thread->IsHLEThread()) {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index d3d4e7bf9f..9b9f9402ea 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -2454,10 +2454,10 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) {
     return &SVC_Table_64[func_num];
 }
 
-MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
-
 void Call(Core::System& system, u32 immediate) {
-    MICROPROFILE_SCOPE(Kernel_SVC);
+    system.ExitDynarmicProfile();
+    auto& kernel = system.Kernel();
+    kernel.EnterSVCProfile();
 
     auto* thread = system.CurrentScheduler().GetCurrentThread();
     thread->SetContinuousOnSVC(true);
@@ -2474,10 +2474,14 @@ void Call(Core::System& system, u32 immediate) {
         LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);
     }
 
+    kernel.ExitSVCProfile();
+
     if (!thread->IsContinuousOnSVC()) {
         auto* host_context = thread->GetHostContext().get();
         host_context->Rewind();
     }
+
+    system.EnterDynarmicProfile();
 }
 
 } // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 168828ab0f..f42d7bd136 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -350,6 +350,22 @@ public:
         return (type & THREADTYPE_HLE) != 0;
     }
 
+    bool IsSuspendThread() const {
+        return (type & THREADTYPE_SUSPEND) != 0;
+    }
+
+    bool IsIdleThread() const {
+        return (type & THREADTYPE_IDLE) != 0;
+    }
+
+    bool WasRunning() const {
+        return was_running;
+    }
+
+    void SetWasRunning(bool value) {
+        was_running = value;
+    }
+
     std::shared_ptr<Common::Fiber> GetHostContext() const;
 
     ThreadStatus GetStatus() const {
@@ -684,6 +700,8 @@ private:
 
     bool will_be_terminated = false;
 
+    bool was_running = false;
+
     std::string name;
 };
 
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index ab7b18abec..fa091f4573 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -209,7 +209,11 @@ QString WaitTreeThread::GetText() const {
         break;
     case Kernel::ThreadStatus::Ready:
         if (!thread.IsPaused()) {
-            status = tr("ready");
+            if (thread.WasRunning()) {
+                status = tr("running");
+            } else {
+                status = tr("ready");
+            }
         } else {
             status = tr("paused");
         }
@@ -261,7 +265,11 @@ QColor WaitTreeThread::GetColor() const {
         return QColor(Qt::GlobalColor::darkGreen);
     case Kernel::ThreadStatus::Ready:
         if (!thread.IsPaused()) {
-            return QColor(Qt::GlobalColor::darkBlue);
+            if (thread.WasRunning()) {
+                return QColor(Qt::GlobalColor::darkGreen);
+            } else {
+                return QColor(Qt::GlobalColor::darkBlue);
+            }
         } else {
             return QColor(Qt::GlobalColor::lightGray);
         }

From 4fdd77fc05e1cb6cef51b285ac517532c5ba7a5e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 12 Mar 2020 19:53:54 -0400
Subject: [PATCH 084/122] Scheduler: Correct yielding interaction with
 SetThreadActivity.

---
 src/core/hle/kernel/scheduler.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 2ad380b175..8d56b49ce6 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -144,6 +144,11 @@ u32 GlobalScheduler::SelectThreads() {
 bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
     ASSERT(is_locked);
     // Note: caller should use critical section, etc.
+    if (!yielding_thread->IsRunnable()) {
+        // Normally this case shouldn't happen except for SetThreadActivity.
+        is_reselection_pending.store(true, std::memory_order_release);
+        return false;
+    }
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
     const u32 priority = yielding_thread->GetPriority();
 
@@ -161,6 +166,11 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     ASSERT(is_locked);
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
+    if (!yielding_thread->IsRunnable()) {
+        // Normally this case shouldn't happen except for SetThreadActivity.
+        is_reselection_pending.store(true, std::memory_order_release);
+        return false;
+    }
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
     const u32 priority = yielding_thread->GetPriority();
 
@@ -212,6 +222,11 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
     ASSERT(is_locked);
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
+    if (!yielding_thread->IsRunnable()) {
+        // Normally this case shouldn't happen except for SetThreadActivity.
+        is_reselection_pending.store(true, std::memory_order_release);
+        return false;
+    }
     Thread* winner = nullptr;
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
 

From 906b11549b2e71a2988bf6838a74cfd96d725563 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 12 Mar 2020 19:55:53 -0400
Subject: [PATCH 085/122] CPU_Manager: Correct stopping on SingleCore.

---
 src/core/cpu_manager.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 9e2e6d49fb..e92b0fb376 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -39,9 +39,14 @@ void CpuManager::Initialize() {
 void CpuManager::Shutdown() {
     running_mode = false;
     Pause(false);
-    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-        core_data[core].host_thread->join();
-        core_data[core].host_thread.reset();
+    if (is_multicore) {
+        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+            core_data[core].host_thread->join();
+            core_data[core].host_thread.reset();
+        }
+    } else {
+        core_data[0].host_thread->join();
+        core_data[0].host_thread.reset();
     }
 }
 

From a65d772faf605435eedeb69656409e6e9974706c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 15 Mar 2020 15:54:40 -0400
Subject: [PATCH 086/122] ARM: Addapt to new Exclusive Monitor Interface.

---
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 22 ++++++++++------------
 src/core/arm/dynarmic/arm_dynarmic_64.h   | 10 +++++-----
 src/core/arm/exclusive_monitor.h          | 10 +++++-----
 src/core/hle/kernel/address_arbiter.cpp   |  9 +++------
 src/core/hle/kernel/svc.cpp               |  4 +---
 5 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index d685459d56..724c68c391 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -180,8 +180,6 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
     return std::make_shared<Dynarmic::A64::Jit>(config);
 }
 
-
-
 void ARM_Dynarmic_64::Run() {
     jit->Run();
 }
@@ -297,24 +295,24 @@ DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::
 
 DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
 
-void DynarmicExclusiveMonitor::SetExclusive8(std::size_t core_index, VAddr addr) {
-    monitor.Mark<u8>(core_index, addr, 1, [&]() -> u8 { return memory.Read8(addr); });
+u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
 }
 
-void DynarmicExclusiveMonitor::SetExclusive16(std::size_t core_index, VAddr addr) {
-    monitor.Mark<u16>(core_index, addr, 2, [&]() -> u16 { return memory.Read16(addr); });
+u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
 }
 
-void DynarmicExclusiveMonitor::SetExclusive32(std::size_t core_index, VAddr addr) {
-    monitor.Mark<u32>(core_index, addr, 4, [&]() -> u32 { return memory.Read32(addr); });
+u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
 }
 
-void DynarmicExclusiveMonitor::SetExclusive64(std::size_t core_index, VAddr addr) {
-    monitor.Mark<u64>(core_index, addr, 8, [&]() -> u64 { return memory.Read64(addr); });
+u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
 }
 
-void DynarmicExclusiveMonitor::SetExclusive128(std::size_t core_index, VAddr addr) {
-    monitor.Mark<u128>(core_index, addr, 16, [&]() -> u128 {
+u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) {
+    return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 {
         u128 result;
         result[0] = memory.Read64(addr);
         result[1] = memory.Read64(addr + 8);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 3ead59f166..5560578add 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -82,11 +82,11 @@ public:
     explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
     ~DynarmicExclusiveMonitor() override;
 
-    void SetExclusive8(std::size_t core_index, VAddr addr) override;
-    void SetExclusive16(std::size_t core_index, VAddr addr) override;
-    void SetExclusive32(std::size_t core_index, VAddr addr) override;
-    void SetExclusive64(std::size_t core_index, VAddr addr) override;
-    void SetExclusive128(std::size_t core_index, VAddr addr) override;
+    u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
+    u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
+    u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
+    u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
+    u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
     void ClearExclusive() override;
 
     bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 2ee312eeea..62f6e60231 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -18,11 +18,11 @@ class ExclusiveMonitor {
 public:
     virtual ~ExclusiveMonitor();
 
-    virtual void SetExclusive8(std::size_t core_index, VAddr addr) = 0;
-    virtual void SetExclusive16(std::size_t core_index, VAddr addr) = 0;
-    virtual void SetExclusive32(std::size_t core_index, VAddr addr) = 0;
-    virtual void SetExclusive64(std::size_t core_index, VAddr addr) = 0;
-    virtual void SetExclusive128(std::size_t core_index, VAddr addr) = 0;
+    virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
+    virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
+    virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
+    virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
+    virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
     virtual void ClearExclusive() = 0;
 
     virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index e8f22b598e..4d2a9b35d3 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -80,8 +80,7 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
     auto& monitor = system.Monitor();
     u32 current_value;
     do {
-        monitor.SetExclusive32(current_core, address);
-        current_value = memory.Read32(address);
+        current_value = monitor.ExclusiveRead32(current_core, address);
 
         if (current_value != value) {
             return ERR_INVALID_STATE;
@@ -110,8 +109,7 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
     auto& monitor = system.Monitor();
     s32 updated_value;
     do {
-        monitor.SetExclusive32(current_core, address);
-        updated_value = memory.Read32(address);
+        updated_value = monitor.ExclusiveRead32(current_core, address);
 
         if (updated_value != value) {
             return ERR_INVALID_STATE;
@@ -186,8 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
         const std::size_t current_core = system.CurrentCoreIndex();
         auto& monitor = system.Monitor();
         do {
-            monitor.SetExclusive32(current_core, address);
-            current_value = static_cast<s32>(memory.Read32(address));
+            current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
             if (should_decrement) {
                 decrement_value = current_value - 1;
             } else {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9b9f9402ea..36e9c48f98 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1653,10 +1653,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
         u32 update_val = 0;
         const VAddr mutex_address = thread->GetMutexWaitAddress();
         do {
-            monitor.SetExclusive32(current_core, mutex_address);
-
             // If the mutex is not yet acquired, acquire it.
-            mutex_val = memory.Read32(mutex_address);
+            mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);
 
             if (mutex_val != 0) {
                 update_val = mutex_val | Mutex::MutexHasWaitersFlag;

From e061a1d9862a627bcb79fe335014f71a9d79b90b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 15 Mar 2020 21:34:22 -0400
Subject: [PATCH 087/122] GUI: Make multicore only work with Async and add GUI
 for multicore.

---
 src/core/core.cpp                           | 11 ++++++--
 src/core/cpu_manager.cpp                    | 11 ++++++++
 src/core/cpu_manager.h                      | 14 ++++++++++
 src/yuzu/configuration/configure_general.ui |  2 +-
 src/yuzu/main.cpp                           | 29 +++++++++++++++++++--
 src/yuzu/main.h                             |  1 +
 6 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 032da7aa58..0f0eb885ad 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -152,8 +152,12 @@ struct System::Impl {
 
         device_memory = std::make_unique<Core::DeviceMemory>(system);
 
-        kernel.SetMulticore(Settings::values.use_multi_core);
-        cpu_manager.SetMulticore(Settings::values.use_multi_core);
+        is_multicore = Settings::values.use_multi_core;
+        is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation;
+
+        kernel.SetMulticore(is_multicore);
+        cpu_manager.SetMulticore(is_multicore);
+        cpu_manager.SetAsyncGpu(is_async_gpu);
 
         core_timing.Initialize([&system]() { system.RegisterHostThread(); });
         kernel.Initialize();
@@ -395,6 +399,9 @@ struct System::Impl {
     std::unique_ptr<Core::PerfStats> perf_stats;
     Core::FrameLimiter frame_limiter;
 
+    bool is_multicore{};
+    bool is_async_gpu{};
+
     std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
 };
 
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index e92b0fb376..d7bd162bc5 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -9,6 +9,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
+#include "core/frontend/emu_window.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
@@ -21,7 +22,17 @@ CpuManager::CpuManager(System& system) : system{system} {}
 CpuManager::~CpuManager() = default;
 
 void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
+    if (!cpu_manager.is_async_gpu && !cpu_manager.is_multicore) {
+        cpu_manager.render_window->MakeCurrent();
+    }
     cpu_manager.RunThread(core);
+    if (!cpu_manager.is_async_gpu && !cpu_manager.is_multicore) {
+        cpu_manager.render_window->DoneCurrent();
+    }
+}
+
+void CpuManager::SetRenderWindow(Core::Frontend::EmuWindow& render_window) {
+    this->render_window = &render_window;
 }
 
 void CpuManager::Initialize() {
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index c0e454a7dd..37cef2b122 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -16,6 +16,10 @@ class Event;
 class Fiber;
 } // namespace Common
 
+namespace Core::Frontend {
+class EmuWindow;
+} // namespace Core::Frontend
+
 namespace Core {
 
 class System;
@@ -35,6 +39,12 @@ public:
     void SetMulticore(bool is_multicore) {
         this->is_multicore = is_multicore;
     }
+
+    /// Sets if emulation is using an asynchronous GPU.
+    void SetAsyncGpu(bool is_async_gpu) {
+        this->is_async_gpu = is_async_gpu;
+    }
+
     void Initialize();
     void Shutdown();
 
@@ -51,6 +61,8 @@ public:
         return current_core.load();
     }
 
+    void SetRenderWindow(Core::Frontend::EmuWindow& render_window);
+
 private:
     static void GuestThreadFunction(void* cpu_manager);
     static void GuestRewindFunction(void* cpu_manager);
@@ -88,10 +100,12 @@ private:
 
     std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
 
+    bool is_async_gpu{};
     bool is_multicore{};
     std::atomic<std::size_t> current_core{};
     std::size_t preemption_count{};
     static constexpr std::size_t max_cycle_runs = 5;
+    Core::Frontend::EmuWindow* render_window;
 
     System& system;
 };
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index f872bddecf..2711116a23 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -54,7 +54,7 @@
           <item>
            <widget class="QCheckBox" name="use_multi_core">
             <property name="text">
-             <string>Emulate CPU in Multiple Cores</string>
+             <string>Multicore CPU Emulation</string>
             </property>
            </widget>
           </item>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 2d0d535c97..0e4e4b83f0 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -520,14 +520,34 @@ void GMainWindow::InitializeWidgets() {
         if (emulation_running) {
             return;
         }
-        Settings::values.use_asynchronous_gpu_emulation =
-            !Settings::values.use_asynchronous_gpu_emulation;
+        bool is_async = !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+        Settings::values.use_asynchronous_gpu_emulation = is_async;
         async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
         Settings::Apply();
     });
     async_status_button->setText(tr("ASYNC"));
     async_status_button->setCheckable(true);
     async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+
+    // Setup Multicore button
+    multicore_status_button = new QPushButton();
+    multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
+    multicore_status_button->setFocusPolicy(Qt::NoFocus);
+    connect(multicore_status_button, &QPushButton::clicked, [&] {
+        if (emulation_running) {
+            return;
+        }
+        Settings::values.use_multi_core = !Settings::values.use_multi_core;
+        bool is_async = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+        Settings::values.use_asynchronous_gpu_emulation = is_async;
+        async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+        multicore_status_button->setChecked(Settings::values.use_multi_core);
+        Settings::Apply();
+    });
+    multicore_status_button->setText(tr("MULTICORE"));
+    multicore_status_button->setCheckable(true);
+    multicore_status_button->setChecked(Settings::values.use_multi_core);
+    statusBar()->insertPermanentWidget(0, multicore_status_button);
     statusBar()->insertPermanentWidget(0, async_status_button);
 
     // Setup Renderer API button
@@ -1028,6 +1048,7 @@ void GMainWindow::BootGame(const QString& filename) {
     }
     status_bar_update_timer.start(2000);
     async_status_button->setDisabled(true);
+    multicore_status_button->setDisabled(true);
     renderer_status_button->setDisabled(true);
 
     if (UISettings::values.hide_mouse) {
@@ -1115,6 +1136,7 @@ void GMainWindow::ShutdownGame() {
     game_fps_label->setVisible(false);
     emu_frametime_label->setVisible(false);
     async_status_button->setEnabled(true);
+    multicore_status_button->setEnabled(true);
 #ifdef HAS_VULKAN
     renderer_status_button->setEnabled(true);
 #endif
@@ -1910,7 +1932,10 @@ void GMainWindow::OnConfigure() {
     }
 
     dock_status_button->setChecked(Settings::values.use_docked_mode);
+    multicore_status_button->setChecked(Settings::values.use_multi_core);
+    Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
     async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+
 #ifdef HAS_VULKAN
     renderer_status_button->setChecked(Settings::values.renderer_backend ==
                                        Settings::RendererBackend::Vulkan);
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 4f4c8ddbee..cd0f0b8927 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -234,6 +234,7 @@ private:
     QLabel* game_fps_label = nullptr;
     QLabel* emu_frametime_label = nullptr;
     QPushButton* async_status_button = nullptr;
+    QPushButton* multicore_status_button = nullptr;
     QPushButton* renderer_status_button = nullptr;
     QPushButton* dock_status_button = nullptr;
     QTimer status_bar_update_timer;

From 8c4006dd0510da21663c8b9ea7ea26b6043c49ba Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 19 Mar 2020 13:09:32 -0400
Subject: [PATCH 088/122] SingleCore: Move Host Timing from a sepparate thread
 to main cpu thread.

---
 src/core/core.cpp              |  2 ++
 src/core/core_timing.cpp       | 20 +++++++++++++-------
 src/core/core_timing.h         |  7 +++++++
 src/core/cpu_manager.cpp       | 17 +++++++++++++++--
 src/core/cpu_manager.h         |  1 +
 src/core/hle/kernel/kernel.cpp |  2 +-
 src/core/hle/kernel/thread.h   |  9 +++++++++
 7 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 0f0eb885ad..2ca9c0be54 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -158,6 +158,8 @@ struct System::Impl {
         kernel.SetMulticore(is_multicore);
         cpu_manager.SetMulticore(is_multicore);
         cpu_manager.SetAsyncGpu(is_async_gpu);
+        core_timing.SetMulticore(is_multicore);
+        cpu_manager.SetRenderWindow(emu_window);
 
         core_timing.Initialize([&system]() { system.RegisterHostThread(); });
         kernel.Initialize();
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 3438f79cee..189d4aa34d 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -55,7 +55,9 @@ void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
     event_fifo_id = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+    if (is_multicore) {
+        timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+    }
 }
 
 void CoreTiming::Shutdown() {
@@ -63,7 +65,9 @@ void CoreTiming::Shutdown() {
     shutting_down = true;
     pause_event.Set();
     event.Set();
-    timer_thread->join();
+    if (timer_thread) {
+        timer_thread->join();
+    }
     ClearPendingEvents();
     timer_thread.reset();
     has_started = false;
@@ -78,12 +82,14 @@ void CoreTiming::SyncPause(bool is_paused) {
         return;
     }
     Pause(is_paused);
-    if (!is_paused) {
-        pause_event.Set();
+    if (timer_thread) {
+        if (!is_paused) {
+            pause_event.Set();
+        }
+        event.Set();
+        while (paused_set != is_paused)
+            ;
     }
-    event.Set();
-    while (paused_set != is_paused)
-        ;
 }
 
 bool CoreTiming::IsRunning() const {
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 032eb08aad..03f9a5c764 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -67,6 +67,11 @@ public:
     /// Tears down all timing related functionality.
     void Shutdown();
 
+    /// Sets if emulation is multicore or single core, must be set before Initialize
+    void SetMulticore(bool is_multicore) {
+        this->is_multicore = is_multicore;
+    }
+
     /// Pauses/Unpauses the execution of the timer thread.
     void Pause(bool is_paused);
 
@@ -147,6 +152,8 @@ private:
     std::atomic<bool> has_started{};
     std::function<void(void)> on_thread_init{};
 
+    bool is_multicore{};
+
     std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
 };
 
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index d7bd162bc5..2aea95a257 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -242,8 +242,11 @@ void CpuManager::SingleCoreRunGuestLoop() {
                 break;
             }
         }
-        physical_core.ClearExclusive();
         system.ExitDynarmicProfile();
+        thread->SetPhantomMode(true);
+        system.CoreTiming().Advance();
+        thread->SetPhantomMode(false);
+        physical_core.ClearExclusive();
         PreemptSingleCore();
         auto& scheduler = kernel.Scheduler(current_core);
         scheduler.TryDoContextSwitch();
@@ -255,6 +258,7 @@ void CpuManager::SingleCoreRunIdleThread() {
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
         PreemptSingleCore();
+        idle_count++;
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
     }
@@ -280,15 +284,24 @@ void CpuManager::SingleCoreRunSuspendThread() {
 void CpuManager::PreemptSingleCore() {
     preemption_count = 0;
     std::size_t old_core = current_core;
-    current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
     auto& scheduler = system.Kernel().Scheduler(old_core);
     Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+    if (idle_count >= 4) {
+        current_thread->SetPhantomMode(true);
+        system.CoreTiming().Advance();
+        current_thread->SetPhantomMode(false);
+    }
+    current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
     scheduler.Unload();
     auto& next_scheduler = system.Kernel().Scheduler(current_core);
     Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
     /// May have changed scheduler
     auto& current_scheduler = system.Kernel().Scheduler(current_core);
     current_scheduler.Reload();
+    auto* currrent_thread2 = current_scheduler.GetCurrentThread();
+    if (!currrent_thread2->IsIdleThread()) {
+        idle_count = 0;
+    }
 }
 
 void CpuManager::SingleCorePause(bool paused) {
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 37cef2b122..e6b8612f0e 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -104,6 +104,7 @@ private:
     bool is_multicore{};
     std::atomic<std::size_t> current_core{};
     std::size_t preemption_count{};
+    std::size_t idle_count{};
     static constexpr std::size_t max_cycle_runs = 5;
     Core::Frontend::EmuWindow* render_window;
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index d2f5f9bf23..a19cd7a1ff 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -303,7 +303,7 @@ struct KernelCore::Impl {
         }
         const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();
         const Kernel::Thread* current = sched.GetCurrentThread();
-        if (current != nullptr) {
+        if (current != nullptr && !current->IsPhantomMode()) {
             result.guest_handle = current->GetGlobalHandle();
         } else {
             result.guest_handle = InvalidHandle;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index f42d7bd136..f998890c4b 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -597,6 +597,14 @@ public:
         is_continuous_on_svc = is_continuous;
     }
 
+    bool IsPhantomMode() const {
+        return is_phantom_mode;
+    }
+
+    void SetPhantomMode(bool phantom) {
+        is_phantom_mode = phantom;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
@@ -699,6 +707,7 @@ private:
     bool is_continuous_on_svc = false;
 
     bool will_be_terminated = false;
+    bool is_phantom_mode = false;
 
     bool was_running = false;
 

From 81677c61fbac51fcc6ebd5880a8829355562973a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 20 Mar 2020 12:36:01 -0400
Subject: [PATCH 089/122] SVC/ARM: Correct svcSendSyncRequest and cache ticks
 on arm interface.

---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 11 +++++++++--
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 12 ++++++++++--
 src/core/hle/kernel/svc.cpp               |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 1f19be36ed..6f142b75b7 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -71,19 +71,25 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        /// We are using host timing, NOP
+        this->ticks -= ticks;
     }
+
     u64 GetTicksRemaining() override {
         if (!parent.interrupt_handler.IsInterrupted()) {
-            return 1000ULL;
+            return std::max<s64>(ticks, 0);
         }
         return 0ULL;
     }
 
+    void ResetTicks() {
+        ticks = 1000LL;
+    }
+
     ARM_Dynarmic_32& parent;
     std::size_t num_interpreted_instructions{};
     u64 tpidrro_el0{};
     u64 tpidr_el0{};
+    s64 ticks{};
 };
 
 std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -98,6 +104,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
 }
 
 void ARM_Dynarmic_32::Run() {
+    cb->ResetTicks();
     jit->Run();
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 724c68c391..f1f41fc395 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -124,22 +124,29 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        /// We are using host timing, NOP
+        this->ticks -= ticks;
     }
+
     u64 GetTicksRemaining() override {
         if (!parent.interrupt_handler.IsInterrupted()) {
-            return 1000ULL;
+            return std::max<s64>(ticks, 0);
         }
         return 0ULL;
     }
+
     u64 GetCNTPCT() override {
         return parent.system.CoreTiming().GetClockTicks();
     }
 
+    void ResetTicks() {
+        ticks = 1000LL;
+    }
+
     ARM_Dynarmic_64& parent;
     std::size_t num_interpreted_instructions = 0;
     u64 tpidrro_el0 = 0;
     u64 tpidr_el0 = 0;
+    s64 ticks{};
 };
 
 std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@@ -181,6 +188,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
 }
 
 void ARM_Dynarmic_64::Run() {
+    cb->ResetTicks();
     jit->Run();
 }
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 36e9c48f98..f087452262 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -342,7 +342,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
         thread->InvokeHLECallback(SharedFrom(thread));
     }
 
-    return RESULT_SUCCESS;
+    return thread->GetSignalingResult();
 }
 
 static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {

From f24c834a7ddf4d3ed564ed41ad76b80700893644 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 20 Mar 2020 14:05:47 -0400
Subject: [PATCH 090/122] ARM/WaitTree: Better track the CallStack for each
 thread.

---
 src/core/arm/arm_interface.cpp  | 57 +++++++++++++++++++++++++++++++++
 src/core/arm/arm_interface.h    |  3 ++
 src/yuzu/debugger/wait_tree.cpp | 25 ++++++++-------
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index d079a1bc8a..d2295ed900 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -139,6 +139,63 @@ std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_addr
 
 constexpr u64 SEGMENT_BASE = 0x7100000000ull;
 
+std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext(
+    System& system, const ThreadContext64& ctx) {
+    std::vector<BacktraceEntry> out;
+    auto& memory = system.Memory();
+
+    auto fp = ctx.cpu_registers[29];
+    auto lr = ctx.cpu_registers[30];
+    while (true) {
+        out.push_back({"", 0, lr, 0});
+        if (!fp) {
+            break;
+        }
+        lr = memory.Read64(fp + 8) - 4;
+        fp = memory.Read64(fp);
+    }
+
+    std::map<VAddr, std::string> modules;
+    auto& loader{system.GetAppLoader()};
+    if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) {
+        return {};
+    }
+
+    std::map<std::string, Symbols> symbols;
+    for (const auto& module : modules) {
+        symbols.insert_or_assign(module.second, GetSymbols(module.first, memory));
+    }
+
+    for (auto& entry : out) {
+        VAddr base = 0;
+        for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) {
+            const auto& module{*iter};
+            if (entry.original_address >= module.first) {
+                entry.module = module.second;
+                base = module.first;
+                break;
+            }
+        }
+
+        entry.offset = entry.original_address - base;
+        entry.address = SEGMENT_BASE + entry.offset;
+
+        if (entry.module.empty())
+            entry.module = "unknown";
+
+        const auto symbol_set = symbols.find(entry.module);
+        if (symbol_set != symbols.end()) {
+            const auto symbol = GetSymbolName(symbol_set->second, entry.offset);
+            if (symbol.has_value()) {
+                // TODO(DarkLordZach): Add demangling of symbol names.
+                entry.name = *symbol;
+            }
+        }
+    }
+
+    return out;
+}
+
 std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {
     std::vector<BacktraceEntry> out;
     auto& memory = system.Memory();
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 87a1c29cc9..e701ddf218 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -164,6 +164,9 @@ public:
         std::string name;
     };
 
+    static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system,
+                                                               const ThreadContext64& ctx);
+
     std::vector<BacktraceEntry> GetBacktrace() const;
 
     /// fp (= r29) points to the last frame record.
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index fa091f4573..d2dbb259c9 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -2,10 +2,13 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <fmt/format.h>
+
 #include "yuzu/debugger/wait_tree.h"
 #include "yuzu/util/util.h"
 
 #include "common/assert.h"
+#include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/mutex.h"
@@ -116,20 +119,20 @@ QString WaitTreeCallstack::GetText() const {
 std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
     std::vector<std::unique_ptr<WaitTreeItem>> list;
 
-    constexpr std::size_t BaseRegister = 29;
-    auto& memory = Core::System::GetInstance().Memory();
-    u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister];
+    if (thread.IsHLEThread()) {
+        return list;
+    }
 
-    while (base_pointer != 0) {
-        const u64 lr = memory.Read64(base_pointer + sizeof(u64));
-        if (lr == 0) {
-            break;
-        }
+    if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) {
+        return list;
+    }
 
-        list.push_back(std::make_unique<WaitTreeText>(
-            tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'})));
+    auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), thread.GetContext64());
 
-        base_pointer = memory.Read64(base_pointer);
+    for (auto& entry : backtrace) {
+        std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address,
+                  entry.original_address, entry.offset, entry.name);
+        list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s)));
     }
 
     return list;

From 403659b16eb8a8e420d02e7350e281ded3a80880 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 21 Mar 2020 12:23:13 -0400
Subject: [PATCH 091/122] X64 Clock: Reduce accuracy to be less or equal to
 guest accuracy.

---
 src/common/x64/native_clock.cpp           | 3 ++-
 src/common/x64/native_clock.h             | 5 +++++
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 3 +++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 926f92ff86..f1bc60fd20 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -62,7 +62,8 @@ u64 NativeClock::GetRTSC() {
     }
     accumulated_ticks += diff;
     rtsc_serialize.unlock();
-    return accumulated_ticks;
+    /// The clock cannot be more precise than the guest timer, remove the lower bits
+    return accumulated_ticks & inaccuracy_mask;
 }
 
 void NativeClock::Pause(bool is_paused) {
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 3851f8fc22..e853094d20 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -31,6 +31,11 @@ public:
 private:
     u64 GetRTSC();
 
+    /// value used to reduce the native clocks accuracy as some apss rely on
+    /// undefined behavior where the level of accuracy in the clock shouldn't
+    /// be higher.
+    static constexpr u64 inaccuracy_mask = ~(0x100 - 1);
+
     SpinLock rtsc_serialize{};
     u64 last_measure{};
     u64 accumulated_ticks{};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index f1f41fc395..cf5ff1bc88 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -184,6 +184,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
         config.enable_fast_dispatch = false;
     }
 
+    // CNTPCT uses wall clock.
+    config.wall_clock_cntpct = true;
+
     return std::make_shared<Dynarmic::A64::Jit>(config);
 }
 

From 3c45c412f93083427517ffd9eeb00f7df5dc4c84 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 29 Feb 2020 13:58:50 -0400
Subject: [PATCH 092/122] Core: Refactor ARM Interface.

---
 src/core/arm/arm_interface.h              |  9 +++++---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  6 ++---
 src/core/arm/dynarmic/arm_dynarmic_32.h   |  2 +-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 13 +++++++----
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  2 +-
 src/core/arm/unicorn/arm_unicorn.cpp      |  7 +++---
 src/core/arm/unicorn/arm_unicorn.h        |  5 ++--
 src/core/hle/kernel/kernel.cpp            | 28 ++++++++++++++++++++++-
 src/core/hle/kernel/physical_core.cpp     | 25 +++++++-------------
 src/core/hle/kernel/physical_core.h       | 14 +++++++-----
 10 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index e701ddf218..dc9b2ff7b9 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <vector>
 #include "common/common_types.h"
+#include "core/hardware_properties.h"
 
 namespace Common {
 struct PageTable;
@@ -20,11 +21,13 @@ namespace Core {
 class System;
 class CPUInterruptHandler;
 
+using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;
+
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_, CPUInterruptHandler& interrupt_handler)
-        : system{system_}, interrupt_handler{interrupt_handler} {}
+    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers)
+        : system{system_}, interrupt_handlers{interrupt_handlers} {}
     virtual ~ARM_Interface() = default;
 
     struct ThreadContext32 {
@@ -180,7 +183,7 @@ public:
 protected:
     /// System context that this ARM interface is running under.
     System& system;
-    CPUInterruptHandler& interrupt_handler;
+    CPUInterrupts& interrupt_handlers;
 };
 
 } // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 6f142b75b7..eeff870146 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -75,7 +75,7 @@ public:
     }
 
     u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handler.IsInterrupted()) {
+        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
             return std::max<s64>(ticks, 0);
         }
         return 0ULL;
@@ -112,9 +112,9 @@ void ARM_Dynarmic_32::Step() {
     cb->InterpreterFallback(jit->Regs()[15], 1);
 }
 
-ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler,
+ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
                                  ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler},
+    : ARM_Interface{system, interrupt_handlers},
       cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
       exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index e1e4882223..077b180309 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -28,7 +28,7 @@ class System;
 
 class ARM_Dynarmic_32 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_32(System& system, CPUInterruptHandler& interrupt_handler,
+    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
                     ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_32() override;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index cf5ff1bc88..e85f3a3049 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -128,7 +128,7 @@ public:
     }
 
     u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handler.IsInterrupted()) {
+        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
             return std::max<s64>(ticks, 0);
         }
         return 0ULL;
@@ -199,11 +199,14 @@ void ARM_Dynarmic_64::Step() {
     cb->InterpreterFallback(jit->GetPC(), 1);
 }
 
-ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler,
+ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
                                  ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
-      inner_unicorn{system, interrupt_handler, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
-      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+    : ARM_Interface{system, interrupt_handler},
+      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handler,
+                                                                      ARM_Unicorn::Arch::AArch64,
+                                                                      core_index},
+      core_index{core_index}, exclusive_monitor{
+                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
 ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 5560578add..1c6791d4e5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -28,7 +28,7 @@ class System;
 
 class ARM_Dynarmic_64 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_64(System& system, CPUInterruptHandler& interrupt_handler,
+    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
                     ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_64() override;
 
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 0393fe641d..d81d1b5b08 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -63,8 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return false;
 }
 
-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture)
-    : ARM_Interface{system, interrupt_handler} {
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
+                         std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler}, core_index{core_index} {
     const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
     CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
 
@@ -163,7 +164,7 @@ void ARM_Unicorn::Run() {
         ExecuteInstructions(std::max(4000000U, 0U));
     } else {
         while (true) {
-            if (interrupt_handler.IsInterrupted()) {
+            if (interrupt_handlers[core_index].IsInterrupted()) {
                 return;
             }
             ExecuteInstructions(10);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 0a4c087cd8..e3da368de9 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -11,7 +11,6 @@
 
 namespace Core {
 
-class CPUInterruptHandler;
 class System;
 
 class ARM_Unicorn final : public ARM_Interface {
@@ -21,7 +20,8 @@ public:
         AArch64, // 64-bit ARM
     };
 
-    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture);
+    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
+                         std::size_t core_index);
     ~ARM_Unicorn() override;
 
     void SetPC(u64 pc) override;
@@ -56,6 +56,7 @@ private:
     uc_engine* uc{};
     GDBStub::BreakpointAddress last_bkpt{};
     bool last_bkpt_hit = false;
+    std::size_t core_index;
 };
 
 } // namespace Core
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index a19cd7a1ff..e33ef53238 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
 #include <atomic>
 #include <bitset>
 #include <functional>
@@ -16,7 +17,9 @@
 #include "common/microprofile.h"
 #include "common/thread.h"
 #include "core/arm/arm_interface.h"
+#include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/exclusive_monitor.h"
+#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
@@ -42,6 +45,11 @@
 #include "core/hle/result.h"
 #include "core/memory.h"
 
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#endif
+
 MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 
 namespace Kernel {
@@ -178,7 +186,20 @@ struct KernelCore::Impl {
         exclusive_monitor =
             Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-            cores.emplace_back(system, i, *exclusive_monitor);
+#ifdef ARCHITECTURE_x86_64
+            arm_interfaces_32[i] =
+                std::make_unique<Core::ARM_Dynarmic_32>(system, interrupts, *exclusive_monitor, i);
+            arm_interfaces_64[i] =
+                std::make_unique<Core::ARM_Dynarmic_64>(system, interrupts, *exclusive_monitor, i);
+#else
+            arm_interfaces_32[i] = std::make_shared<Core::ARM_Unicorn>(
+                system, interrupts, ARM_Unicorn::Arch::AArch32, i);
+            arm_interfaces_64[i] = std::make_shared<Core::ARM_Unicorn>(
+                system, interrupts, ARM_Unicorn::Arch::AArch64, i);
+            LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+            cores.emplace_back(system, i, *exclusive_monitor, interrupts[i], *arm_interfaces_32[i],
+                               *arm_interfaces_64[i]);
         }
     }
 
@@ -407,6 +428,11 @@ struct KernelCore::Impl {
     std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
 
     std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
+    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
+    std::array<std::unique_ptr<Core::ARM_Interface>, Core::Hardware::NUM_CPU_CORES>
+        arm_interfaces_32{};
+    std::array<std::unique_ptr<Core::ARM_Interface>, Core::Hardware::NUM_CPU_CORES>
+        arm_interfaces_64{};
 
     bool is_multicore{};
     std::thread::id single_core_thread_id{};
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index ff14fcb424..9146b331d4 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -21,21 +21,12 @@
 namespace Kernel {
 
 PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
-                           Core::ExclusiveMonitor& exclusive_monitor)
-    : interrupt_handler{}, core_index{id} {
-#ifdef ARCHITECTURE_x86_64
-    arm_interface_32 = std::make_unique<Core::ARM_Dynarmic_32>(system, interrupt_handler,
-                                                               exclusive_monitor, core_index);
-    arm_interface_64 = std::make_unique<Core::ARM_Dynarmic_64>(system, interrupt_handler,
-                                                               exclusive_monitor, core_index);
-#else
-    using Core::ARM_Unicorn;
-    arm_interface_32 =
-        std::make_unique<ARM_Unicorn>(system, interrupt_handler, ARM_Unicorn::Arch::AArch32);
-    arm_interface_64 =
-        std::make_unique<ARM_Unicorn>(system, interrupt_handler, ARM_Unicorn::Arch::AArch64);
-    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
+                           Core::ExclusiveMonitor& exclusive_monitor,
+                           Core::CPUInterruptHandler& interrupt_handler,
+                           Core::ARM_Interface& arm_interface32,
+                           Core::ARM_Interface& arm_interface64)
+    : interrupt_handler{interrupt_handler}, core_index{id}, arm_interface_32{arm_interface32},
+      arm_interface_64{arm_interface64} {
 
     scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
     guard = std::make_unique<Common::SpinLock>();
@@ -69,9 +60,9 @@ void PhysicalCore::Shutdown() {
 
 void PhysicalCore::SetIs64Bit(bool is_64_bit) {
     if (is_64_bit) {
-        arm_interface = arm_interface_64.get();
+        arm_interface = &arm_interface_64;
     } else {
-        arm_interface = arm_interface_32.get();
+        arm_interface = &arm_interface_32;
     }
 }
 
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index cd2e42fc3c..2673d90f28 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -10,7 +10,7 @@
 #include "core/arm/cpu_interrupt_handler.h"
 
 namespace Common {
-    class SpinLock;
+class SpinLock;
 }
 
 namespace Kernel {
@@ -27,7 +27,9 @@ namespace Kernel {
 
 class PhysicalCore {
 public:
-    PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor);
+    PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor,
+                 Core::CPUInterruptHandler& interrupt_handler, Core::ARM_Interface& arm_interface32,
+                 Core::ARM_Interface& arm_interface64);
     ~PhysicalCore();
 
     PhysicalCore(const PhysicalCore&) = delete;
@@ -92,13 +94,13 @@ public:
     void SetIs64Bit(bool is_64_bit);
 
 private:
-    Core::CPUInterruptHandler interrupt_handler;
+    Core::CPUInterruptHandler& interrupt_handler;
     std::size_t core_index;
-    std::unique_ptr<Core::ARM_Interface> arm_interface_32;
-    std::unique_ptr<Core::ARM_Interface> arm_interface_64;
+    Core::ARM_Interface& arm_interface_32;
+    Core::ARM_Interface& arm_interface_64;
     std::unique_ptr<Kernel::Scheduler> scheduler;
     Core::ARM_Interface* arm_interface{};
-    std::unique_ptr<Common::SpinLock> guard;
+    std::unique_ptr<Common::SpinLock> guard;
 };
 
 } // namespace Kernel

From 1a4cd6b1a5866e474ad220e173aa173a27769d02 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 1 Mar 2020 12:14:17 -0400
Subject: [PATCH 093/122] General: Move ARM_Interface into Threads.

---
 src/core/arm/arm_interface.h              |  2 +
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  4 ++
 src/core/arm/dynarmic/arm_dynarmic_32.h   |  1 +
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  4 ++
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  1 +
 src/core/arm/unicorn/arm_unicorn.cpp      |  4 ++
 src/core/arm/unicorn/arm_unicorn.h        |  1 +
 src/core/core.cpp                         | 34 ++++++-------
 src/core/core_manager.cpp                 | 18 +------
 src/core/cpu_manager.cpp                  | 30 ++++++------
 src/core/hle/kernel/kernel.cpp            | 59 ++++++++---------------
 src/core/hle/kernel/kernel.h              |  8 ++-
 src/core/hle/kernel/physical_core.cpp     | 37 ++------------
 src/core/hle/kernel/physical_core.h       | 37 +++-----------
 src/core/hle/kernel/scheduler.cpp         | 12 ++---
 src/core/hle/kernel/svc.cpp               | 11 +----
 src/core/hle/kernel/thread.cpp            | 35 ++++++++++++++
 src/core/hle/kernel/thread.h              |  8 ++-
 18 files changed, 136 insertions(+), 170 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index dc9b2ff7b9..e5c4843364 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -148,6 +148,8 @@ public:
      */
     virtual void SetTPIDR_EL0(u64 value) = 0;
 
+    virtual void ChangeProcessorId(std::size_t new_core_id) = 0;
+
     virtual void SaveContext(ThreadContext32& ctx) = 0;
     virtual void SaveContext(ThreadContext64& ctx) = 0;
     virtual void LoadContext(const ThreadContext32& ctx) = 0;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index eeff870146..d45772576b 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -166,6 +166,10 @@ void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
     cb->tpidr_el0 = value;
 }
 
+void ARM_Dynarmic_32::ChangeProcessorId(std::size_t new_core_id) {
+    // jit->ChangeProcessorId(new_core_id);
+}
+
 void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
     Dynarmic::A32::Context context;
     jit->SaveContext(context);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 077b180309..937dffee9f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -46,6 +46,7 @@ public:
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
     u64 GetTPIDR_EL0() const override;
+    void ChangeProcessorId(std::size_t new_core_id) override;
 
     void SaveContext(ThreadContext32& ctx) override;
     void SaveContext(ThreadContext64& ctx) override {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index e85f3a3049..5f6906bec4 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -258,6 +258,10 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
     cb->tpidr_el0 = value;
 }
 
+void ARM_Dynarmic_64::ChangeProcessorId(std::size_t new_core_id) {
+    jit->ChangeProcessorId(new_core_id);
+}
+
 void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
     ctx.cpu_registers = jit->GetRegisters();
     ctx.sp = jit->GetSP();
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 1c6791d4e5..c26b47249c 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -46,6 +46,7 @@ public:
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
     u64 GetTPIDR_EL0() const override;
+    void ChangeProcessorId(std::size_t new_core_id) override;
 
     void SaveContext(ThreadContext32& ctx) override {}
     void SaveContext(ThreadContext64& ctx) override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index d81d1b5b08..099229c8d4 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -159,6 +159,10 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) {
     CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value));
 }
 
+void ARM_Unicorn::ChangeProcessorId(std::size_t new_core_id) {
+    core_index = new_core_id;
+}
+
 void ARM_Unicorn::Run() {
     if (GDBStub::IsServerEnabled()) {
         ExecuteInstructions(std::max(4000000U, 0U));
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index e3da368de9..f09b24a850 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -36,6 +36,7 @@ public:
     void SetTlsAddress(VAddr address) override;
     void SetTPIDR_EL0(u64 value) override;
     u64 GetTPIDR_EL0() const override;
+    void ChangeProcessorId(std::size_t new_core_id) override;
     void PrepareReschedule() override;
     void ClearExclusiveState() override;
     void ExecuteInstructions(std::size_t num_instructions);
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 2ca9c0be54..40eea297ef 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -119,14 +119,6 @@ struct System::Impl {
         : kernel{system}, fs_controller{system}, memory{system},
           cpu_manager{system}, reporter{system}, applet_manager{system} {}
 
-    Kernel::PhysicalCore& CurrentPhysicalCore() {
-        return kernel.CurrentPhysicalCore();
-    }
-
-    Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) {
-        return kernel.PhysicalCore(index);
-    }
-
     ResultStatus Run() {
         status = ResultStatus::Success;
 
@@ -443,7 +435,7 @@ bool System::IsPoweredOn() const {
 }
 
 void System::PrepareReschedule() {
-    impl->CurrentPhysicalCore().Stop();
+    //impl->CurrentPhysicalCore().Stop();
 }
 
 void System::PrepareReschedule(const u32 core_index) {
@@ -463,11 +455,11 @@ const TelemetrySession& System::TelemetrySession() const {
 }
 
 ARM_Interface& System::CurrentArmInterface() {
-    return impl->CurrentPhysicalCore().ArmInterface();
+    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
 }
 
 const ARM_Interface& System::CurrentArmInterface() const {
-    return impl->CurrentPhysicalCore().ArmInterface();
+    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
 }
 
 std::size_t System::CurrentCoreIndex() const {
@@ -477,27 +469,27 @@ std::size_t System::CurrentCoreIndex() const {
 }
 
 Kernel::Scheduler& System::CurrentScheduler() {
-    return impl->CurrentPhysicalCore().Scheduler();
+    return impl->kernel.CurrentScheduler();
 }
 
 const Kernel::Scheduler& System::CurrentScheduler() const {
-    return impl->CurrentPhysicalCore().Scheduler();
+    return impl->kernel.CurrentScheduler();
 }
 
 Kernel::PhysicalCore& System::CurrentPhysicalCore() {
-    return impl->CurrentPhysicalCore();
+    return impl->kernel.CurrentPhysicalCore();
 }
 
 const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
-    return impl->CurrentPhysicalCore();
+    return impl->kernel.CurrentPhysicalCore();
 }
 
 Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
-    return impl->GetPhysicalCore(core_index).Scheduler();
+    return impl->kernel.Scheduler(core_index);
 }
 
 const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
-    return impl->GetPhysicalCore(core_index).Scheduler();
+    return impl->kernel.Scheduler(core_index);
 }
 
 /// Gets the global scheduler
@@ -527,11 +519,15 @@ const Kernel::Process* System::CurrentProcess() const {
 }
 
 ARM_Interface& System::ArmInterface(std::size_t core_index) {
-    return impl->GetPhysicalCore(core_index).ArmInterface();
+    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
+    ASSERT(thread && !thread->IsHLEThread());
+    return thread->ArmInterface();
 }
 
 const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
-    return impl->GetPhysicalCore(core_index).ArmInterface();
+    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
+    ASSERT(thread && !thread->IsHLEThread());
+    return thread->ArmInterface();
 }
 
 ExclusiveMonitor& System::Monitor() {
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
index 45f0bb5470..82d7acb406 100644
--- a/src/core/core_manager.cpp
+++ b/src/core/core_manager.cpp
@@ -28,21 +28,7 @@ CoreManager::CoreManager(System& system, std::size_t core_index)
 CoreManager::~CoreManager() = default;
 
 void CoreManager::RunLoop(bool tight_loop) {
-    Reschedule();
-
-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        LOG_TRACE(Core, "Core-{} idling", core_index);
-    } else {
-        if (tight_loop) {
-            physical_core.Run();
-        } else {
-            physical_core.Step();
-        }
-    }
-
-    Reschedule();
+    /// Deprecated
 }
 
 void CoreManager::SingleStep() {
@@ -50,7 +36,7 @@ void CoreManager::SingleStep() {
 }
 
 void CoreManager::PrepareReschedule() {
-    physical_core.Stop();
+    //physical_core.Stop();
 }
 
 void CoreManager::Reschedule() {
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 2aea95a257..2e9dc9dc3d 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -129,18 +129,17 @@ void CpuManager::MultiCoreRunGuestThread() {
 void CpuManager::MultiCoreRunGuestLoop() {
     auto& kernel = system.Kernel();
     auto* thread = kernel.CurrentScheduler().GetCurrentThread();
-    auto host_context = thread->GetHostContext();
-    host_context->SetRewindPoint(std::function<void(void*)>(GuestRewindFunction), this);
-    host_context.reset();
     while (true) {
-        auto& physical_core = kernel.CurrentPhysicalCore();
+        auto* physical_core = &kernel.CurrentPhysicalCore();
+        auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
-        while (!physical_core.IsInterrupted()) {
-            physical_core.Run();
+        while (!physical_core->IsInterrupted()) {
+            arm_interface.Run();
+            physical_core = &kernel.CurrentPhysicalCore();
         }
         system.ExitDynarmicProfile();
-        physical_core.ClearExclusive();
-        auto& scheduler = physical_core.Scheduler();
+        arm_interface.ClearExclusiveState();
+        auto& scheduler = kernel.CurrentScheduler();
         scheduler.TryDoContextSwitch();
     }
 }
@@ -150,7 +149,7 @@ void CpuManager::MultiCoreRunIdleThread() {
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
         physical_core.Idle();
-        auto& scheduler = physical_core.Scheduler();
+        auto& scheduler = kernel.CurrentScheduler();
         scheduler.TryDoContextSwitch();
     }
 }
@@ -229,14 +228,13 @@ void CpuManager::SingleCoreRunGuestThread() {
 void CpuManager::SingleCoreRunGuestLoop() {
     auto& kernel = system.Kernel();
     auto* thread = kernel.CurrentScheduler().GetCurrentThread();
-    auto host_context = thread->GetHostContext();
-    host_context->SetRewindPoint(std::function<void(void*)>(GuestRewindFunction), this);
-    host_context.reset();
     while (true) {
-        auto& physical_core = kernel.CurrentPhysicalCore();
+        auto* physical_core = &kernel.CurrentPhysicalCore();
+        auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
-        while (!physical_core.IsInterrupted()) {
-            physical_core.Run();
+        while (!physical_core->IsInterrupted()) {
+            arm_interface.Run();
+            physical_core = &kernel.CurrentPhysicalCore();
             preemption_count++;
             if (preemption_count % max_cycle_runs == 0) {
                 break;
@@ -246,7 +244,7 @@ void CpuManager::SingleCoreRunGuestLoop() {
         thread->SetPhantomMode(true);
         system.CoreTiming().Advance();
         thread->SetPhantomMode(false);
-        physical_core.ClearExclusive();
+        arm_interface.ClearExclusiveState();
         PreemptSingleCore();
         auto& scheduler = kernel.Scheduler(current_core);
         scheduler.TryDoContextSwitch();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index e33ef53238..3feddd9ad8 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -19,7 +19,6 @@
 #include "core/arm/arm_interface.h"
 #include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
@@ -45,11 +44,6 @@
 #include "core/hle/result.h"
 #include "core/memory.h"
 
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic_32.h"
-#include "core/arm/dynarmic/arm_dynarmic_64.h"
-#endif
-
 MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 
 namespace Kernel {
@@ -186,20 +180,8 @@ struct KernelCore::Impl {
         exclusive_monitor =
             Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
         for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-#ifdef ARCHITECTURE_x86_64
-            arm_interfaces_32[i] =
-                std::make_unique<Core::ARM_Dynarmic_32>(system, interrupts, *exclusive_monitor, i);
-            arm_interfaces_64[i] =
-                std::make_unique<Core::ARM_Dynarmic_64>(system, interrupts, *exclusive_monitor, i);
-#else
-            arm_interfaces_32[i] = std::make_shared<Core::ARM_Unicorn>(
-                system, interrupts, ARM_Unicorn::Arch::AArch32, i);
-            arm_interfaces_64[i] = std::make_shared<Core::ARM_Unicorn>(
-                system, interrupts, ARM_Unicorn::Arch::AArch64, i);
-            LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-            cores.emplace_back(system, i, *exclusive_monitor, interrupts[i], *arm_interfaces_32[i],
-                               *arm_interfaces_64[i]);
+            schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i);
+            cores.emplace_back(system, i, *schedulers[i], interrupts[i]);
         }
     }
 
@@ -268,10 +250,6 @@ struct KernelCore::Impl {
             return;
         }
 
-        for (auto& core : cores) {
-            core.SetIs64Bit(process->Is64BitProcess());
-        }
-
         u32 core_id = GetCurrentHostThreadID();
         if (core_id < Core::Hardware::NUM_CPU_CORES) {
             system.Memory().SetCurrentPageTable(*process, core_id);
@@ -429,10 +407,7 @@ struct KernelCore::Impl {
 
     std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
     std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
-    std::array<std::unique_ptr<Core::ARM_Interface>, Core::Hardware::NUM_CPU_CORES>
-        arm_interfaces_32{};
-    std::array<std::unique_ptr<Core::ARM_Interface>, Core::Hardware::NUM_CPU_CORES>
-        arm_interfaces_64{};
+    std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
 
     bool is_multicore{};
     std::thread::id single_core_thread_id{};
@@ -497,11 +472,11 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
 }
 
 Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) {
-    return impl->cores[id].Scheduler();
+    return *impl->schedulers[id];
 }
 
 const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const {
-    return impl->cores[id].Scheduler();
+    return *impl->schedulers[id];
 }
 
 Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {
@@ -525,11 +500,23 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
 }
 
 Kernel::Scheduler& KernelCore::CurrentScheduler() {
-    return CurrentPhysicalCore().Scheduler();
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return *impl->schedulers[core_id];
 }
 
 const Kernel::Scheduler& KernelCore::CurrentScheduler() const {
-    return CurrentPhysicalCore().Scheduler();
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return *impl->schedulers[core_id];
+}
+
+std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() {
+    return impl->interrupts;
+}
+
+const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() const {
+    return impl->interrupts;
 }
 
 Kernel::Synchronization& KernelCore::Synchronization() {
@@ -557,15 +544,11 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
 }
 
 void KernelCore::InvalidateAllInstructionCaches() {
-    for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) {
-        PhysicalCore(i).ArmInterface().ClearInstructionCache();
-    }
+    //TODO: Reimplement, this
 }
 
 void KernelCore::PrepareReschedule(std::size_t id) {
-    if (id < impl->global_scheduler.CpuCoresCount()) {
-        impl->cores[id].Stop();
-    }
+    // TODO: Reimplement, this
 }
 
 void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 1eb6ede73e..846056b85a 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,15 +4,17 @@
 
 #pragma once
 
+#include <array>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/memory/memory_types.h"
 #include "core/hle/kernel/object.h"
 
 namespace Core {
-struct EmuThreadHandle;
+class CPUInterruptHandler;
 class ExclusiveMonitor;
 class System;
 } // namespace Core
@@ -144,6 +146,10 @@ public:
 
     const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
 
+    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts();
+
+    const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts() const;
+
     void InvalidateAllInstructionCaches();
 
     /// Adds a port to the named port table
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 9146b331d4..c82c60a16d 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -20,50 +20,21 @@
 
 namespace Kernel {
 
-PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
-                           Core::ExclusiveMonitor& exclusive_monitor,
-                           Core::CPUInterruptHandler& interrupt_handler,
-                           Core::ARM_Interface& arm_interface32,
-                           Core::ARM_Interface& arm_interface64)
-    : interrupt_handler{interrupt_handler}, core_index{id}, arm_interface_32{arm_interface32},
-      arm_interface_64{arm_interface64} {
+PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
+                           Core::CPUInterruptHandler& interrupt_handler)
+    : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} {
 
-    scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
     guard = std::make_unique<Common::SpinLock>();
 }
 
 PhysicalCore::~PhysicalCore() = default;
 
-void PhysicalCore::Run() {
-    arm_interface->Run();
-}
-
-void PhysicalCore::ClearExclusive() {
-    arm_interface->ClearExclusiveState();
-}
-
-void PhysicalCore::Step() {
-    arm_interface->Step();
-}
-
 void PhysicalCore::Idle() {
     interrupt_handler.AwaitInterrupt();
 }
 
-void PhysicalCore::Stop() {
-    arm_interface->PrepareReschedule();
-}
-
 void PhysicalCore::Shutdown() {
-    scheduler->Shutdown();
-}
-
-void PhysicalCore::SetIs64Bit(bool is_64_bit) {
-    if (is_64_bit) {
-        arm_interface = &arm_interface_64;
-    } else {
-        arm_interface = &arm_interface_32;
-    }
+    scheduler.Shutdown();
 }
 
 void PhysicalCore::Interrupt() {
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 2673d90f28..751b994a78 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -10,7 +10,7 @@
 #include "core/arm/cpu_interrupt_handler.h"
 
 namespace Common {
-class SpinLock;
+    class SpinLock;
 }
 
 namespace Kernel {
@@ -27,9 +27,9 @@ namespace Kernel {
 
 class PhysicalCore {
 public:
-    PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor,
-                 Core::CPUInterruptHandler& interrupt_handler, Core::ARM_Interface& arm_interface32,
-                 Core::ARM_Interface& arm_interface64);
+    PhysicalCore(Core::System& system, std::size_t id,
+                               Kernel::Scheduler& scheduler,
+                               Core::CPUInterruptHandler& interrupt_handler);
     ~PhysicalCore();
 
     PhysicalCore(const PhysicalCore&) = delete;
@@ -38,17 +38,7 @@ public:
     PhysicalCore(PhysicalCore&&) = default;
     PhysicalCore& operator=(PhysicalCore&&) = default;
 
-    /// Execute current jit state
-    void Run();
-    /// Clear Exclusive state.
-    void ClearExclusive();
-    /// Set this core in IdleState.
     void Idle();
-    /// Execute a single instruction in current jit.
-    void Step();
-    /// Stop JIT execution/exit
-    void Stop();
-
     /// Interrupt this physical core.
     void Interrupt();
 
@@ -63,14 +53,6 @@ public:
     // Shutdown this physical core.
     void Shutdown();
 
-    Core::ARM_Interface& ArmInterface() {
-        return *arm_interface;
-    }
-
-    const Core::ARM_Interface& ArmInterface() const {
-        return *arm_interface;
-    }
-
     bool IsMainCore() const {
         return core_index == 0;
     }
@@ -84,22 +66,17 @@ public:
     }
 
     Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+        return scheduler;
     }
 
     const Kernel::Scheduler& Scheduler() const {
-        return *scheduler;
+        return scheduler;
     }
 
-    void SetIs64Bit(bool is_64_bit);
-
 private:
     Core::CPUInterruptHandler& interrupt_handler;
     std::size_t core_index;
-    Core::ARM_Interface& arm_interface_32;
-    Core::ARM_Interface& arm_interface_64;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
-    Core::ARM_Interface* arm_interface{};
+    Kernel::Scheduler& scheduler;
     std::unique_ptr<Common::SpinLock> guard;
 };
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 8d56b49ce6..a5083ae7c4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -681,15 +681,16 @@ void Scheduler::SwitchContextStep2() {
         new_thread->SetWasRunning(false);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
-        if (previous_process != thread_owner_process && thread_owner_process != nullptr) {
+        if (thread_owner_process != nullptr) {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!new_thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
+            Core::ARM_Interface& cpu_core = new_thread->ArmInterface();
             cpu_core.LoadContext(new_thread->GetContext32());
             cpu_core.LoadContext(new_thread->GetContext64());
             cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
             cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+            cpu_core.ChangeProcessorId(this->core_id);
             cpu_core.ClearExclusiveState();
         }
     }
@@ -722,18 +723,15 @@ void Scheduler::SwitchContext() {
         }
         previous_thread->SetContinuousOnSVC(false);
         previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+        previous_thread->SetIsRunning(false);
         if (!previous_thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
+            Core::ARM_Interface& cpu_core = previous_thread->ArmInterface();
             cpu_core.SaveContext(previous_thread->GetContext32());
             cpu_core.SaveContext(previous_thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
             previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
             cpu_core.ClearExclusiveState();
         }
-        if (previous_thread->GetStatus() == ThreadStatus::Running) {
-            previous_thread->SetStatus(ThreadStatus::Ready);
-        }
-        previous_thread->SetIsRunning(false);
         previous_thread->context_guard.unlock();
     }
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index f087452262..5999722119 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1533,7 +1533,9 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     }
 
     if (is_redundant && !system.Kernel().IsMulticore()) {
+        system.Kernel().ExitSVCProfile();
         system.GetCpuManager().PreemptSingleCore();
+        system.Kernel().EnterSVCProfile();
     }
 }
 
@@ -2457,9 +2459,6 @@ void Call(Core::System& system, u32 immediate) {
     auto& kernel = system.Kernel();
     kernel.EnterSVCProfile();
 
-    auto* thread = system.CurrentScheduler().GetCurrentThread();
-    thread->SetContinuousOnSVC(true);
-
     const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
                                                                         : GetSVCInfo32(immediate);
     if (info) {
@@ -2473,12 +2472,6 @@ void Call(Core::System& system, u32 immediate) {
     }
 
     kernel.ExitSVCProfile();
-
-    if (!thread->IsContinuousOnSVC()) {
-        auto* host_context = thread->GetHostContext().get();
-        host_context->Rewind();
-    }
-
     system.EnterDynarmicProfile();
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 6f8e7a070a..58b06aa9ec 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -13,6 +13,13 @@
 #include "common/logging/log.h"
 #include "common/thread_queue_list.h"
 #include "core/arm/arm_interface.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#endif
+#include "core/arm/cpu_interrupt_handler.h"
+#include "core/arm/exclusive_monitor.h"
+#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
@@ -232,7 +239,27 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
     }
     // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
     // to initialize the context
+    thread->arm_interface.reset();
     if ((type_flags & THREADTYPE_HLE) == 0) {
+#ifdef ARCHITECTURE_x86_64
+        if (owner_process && !owner_process->Is64BitProcess()) {
+            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
+                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+        } else {
+            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
+                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+        }
+
+#else
+        if (owner_process && !owner_process->Is64BitProcess()) {
+            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
+                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch32, processor_id);
+        } else {
+            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
+                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch64, processor_id);
+        }
+        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
         ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
                              static_cast<u32>(entry_point), static_cast<u32>(arg));
         ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
@@ -276,6 +303,14 @@ VAddr Thread::GetCommandBufferAddress() const {
     return GetTLSAddress() + command_header_offset;
 }
 
+Core::ARM_Interface& Thread::ArmInterface() {
+    return *arm_interface;
+}
+
+const Core::ARM_Interface& Thread::ArmInterface() const {
+    return *arm_interface;
+}
+
 void Thread::SetStatus(ThreadStatus new_status) {
     if (new_status == status) {
         return;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index f998890c4b..c08fc3a894 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -21,6 +21,7 @@ class Fiber;
 }
 
 namespace Core {
+class ARM_Interface;
 class System;
 } // namespace Core
 
@@ -271,6 +272,10 @@ public:
 
     void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
 
+    Core::ARM_Interface& ArmInterface();
+
+    const Core::ARM_Interface& ArmInterface() const;
+
     SynchronizationObject* GetSignalingObject() const {
         return signaling_object;
     }
@@ -617,9 +622,10 @@ private:
 
     void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
 
+    Common::SpinLock context_guard{};
     ThreadContext32 context_32{};
     ThreadContext64 context_64{};
-    Common::SpinLock context_guard{};
+    std::unique_ptr<Core::ARM_Interface> arm_interface{};
     std::shared_ptr<Common::Fiber> host_context{};
 
     u64 thread_id = 0;

From a5a5447fb30b04be54f33fce9ec22de1d8d80f0f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 7 Mar 2020 14:16:25 -0400
Subject: [PATCH 094/122] Thread: Release the ARM Interface on exitting.

---
 src/core/hle/kernel/scheduler.cpp | 2 +-
 src/core/hle/kernel/thread.cpp    | 2 ++
 src/core/hle/kernel/thread.h      | 5 +++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index a5083ae7c4..ce7e1986d4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -724,7 +724,7 @@ void Scheduler::SwitchContext() {
         previous_thread->SetContinuousOnSVC(false);
         previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         previous_thread->SetIsRunning(false);
-        if (!previous_thread->IsHLEThread()) {
+        if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) {
             Core::ARM_Interface& cpu_core = previous_thread->ArmInterface();
             cpu_core.SaveContext(previous_thread->GetContext32());
             cpu_core.SaveContext(previous_thread->GetContext64());
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 58b06aa9ec..65fedfc9b9 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -69,6 +69,8 @@ void Thread::Stop() {
             // Mark the TLS slot in the thread's page as free.
             owner_process->FreeTLSRegion(tls_address);
         }
+        arm_interface.reset();
+        has_exited = true;
     }
     global_handle = 0;
 }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c08fc3a894..f651d78229 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -610,6 +610,10 @@ public:
         is_phantom_mode = phantom;
     }
 
+    bool HasExited() const {
+        return has_exited;
+    }
+
 private:
     friend class GlobalScheduler;
     friend class Scheduler;
@@ -714,6 +718,7 @@ private:
 
     bool will_be_terminated = false;
     bool is_phantom_mode = false;
+    bool has_exited = false;
 
     bool was_running = false;
 

From e50f584fa273fbcbae03356b37c69785ebb8ee53 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Mar 2020 13:24:52 -0400
Subject: [PATCH 095/122] Scheduler: Correct Reload/Unload

---
 src/core/hle/kernel/kernel.cpp    | 1 +
 src/core/hle/kernel/scheduler.cpp | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 3feddd9ad8..739205eca3 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -149,6 +149,7 @@ struct KernelCore::Impl {
 
         for (std::size_t i = 0; i < cores.size(); i++) {
             cores[i].Shutdown();
+            schedulers[i].reset();
         }
         cores.clear();
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ce7e1986d4..43c924fa02 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -625,8 +625,8 @@ void Scheduler::Unload() {
         thread->SetContinuousOnSVC(false);
         thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
         thread->SetIsRunning(false);
-        if (!thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
+        if (!thread->IsHLEThread() && !thread->HasExited()) {
+            Core::ARM_Interface& cpu_core = thread->ArmInterface();
             cpu_core.SaveContext(thread->GetContext32());
             cpu_core.SaveContext(thread->GetContext64());
             // Save the TPIDR_EL0 system register in case it was modified.
@@ -653,11 +653,12 @@ void Scheduler::Reload() {
             system.Kernel().MakeCurrentProcess(thread_owner_process);
         }
         if (!thread->IsHLEThread()) {
-            auto& cpu_core = system.ArmInterface(core_id);
+            Core::ARM_Interface& cpu_core = thread->ArmInterface();
             cpu_core.LoadContext(thread->GetContext32());
             cpu_core.LoadContext(thread->GetContext64());
             cpu_core.SetTlsAddress(thread->GetTLSAddress());
             cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
+            cpu_core.ChangeProcessorId(this->core_id);
             cpu_core.ClearExclusiveState();
         }
     }

From d8da19a8fb9a1145cba096f5ecac15501fd8cc80 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 28 Mar 2020 15:23:28 -0400
Subject: [PATCH 096/122] SingleCore: Use Cycle Timing instead of Host Timing.

---
 src/core/arm/arm_interface.h              |  6 ++--
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 36 +++++++++++++-------
 src/core/arm/dynarmic/arm_dynarmic_32.h   |  2 +-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 39 ++++++++++++++-------
 src/core/arm/dynarmic/arm_dynarmic_64.h   |  2 +-
 src/core/arm/unicorn/arm_unicorn.cpp      |  6 ++--
 src/core/arm/unicorn/arm_unicorn.h        |  4 +--
 src/core/core_timing.cpp                  | 41 ++++++++++++++++++-----
 src/core/core_timing.h                    | 14 ++++++--
 src/core/core_timing_util.cpp             | 29 +++++++++++-----
 src/core/core_timing_util.h               | 15 ++-------
 src/core/cpu_manager.cpp                  | 18 +++++-----
 src/core/cpu_manager.h                    |  2 +-
 src/core/hle/kernel/svc.cpp               |  5 +++
 src/core/hle/kernel/thread.cpp            | 12 ++++---
 15 files changed, 151 insertions(+), 80 deletions(-)

diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index e5c4843364..fbdce4134d 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -26,8 +26,9 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers)
-        : system{system_}, interrupt_handlers{interrupt_handlers} {}
+    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
+        : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
+                                                                       uses_wall_clock} {}
     virtual ~ARM_Interface() = default;
 
     struct ThreadContext32 {
@@ -186,6 +187,7 @@ protected:
     /// System context that this ARM interface is running under.
     System& system;
     CPUInterrupts& interrupt_handlers;
+    bool uses_wall_clock;
 };
 
 } // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index d45772576b..5d78726fa8 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -71,25 +71,37 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        this->ticks -= ticks;
+        if (parent.uses_wall_clock) {
+            return;
+        }
+        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+        // rough approximation of the amount of executed ticks in the system, it may be thrown off
+        // if not all cores are doing a similar amount of work. Instead of doing this, we should
+        // device a way so that timing is consistent across all cores without increasing the ticks 4
+        // times.
+        u64 amortized_ticks =
+            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        // Always execute at least one tick.
+        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
+        num_interpreted_instructions = 0;
     }
 
     u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-            return std::max<s64>(ticks, 0);
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return std::max<s64>(1000U, 0);
+            }
+            return 0ULL;
         }
-        return 0ULL;
-    }
-
-    void ResetTicks() {
-        ticks = 1000LL;
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
     }
 
     ARM_Dynarmic_32& parent;
     std::size_t num_interpreted_instructions{};
     u64 tpidrro_el0{};
     u64 tpidr_el0{};
-    s64 ticks{};
 };
 
 std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -104,7 +116,6 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
 }
 
 void ARM_Dynarmic_32::Run() {
-    cb->ResetTicks();
     jit->Run();
 }
 
@@ -113,8 +124,9 @@ void ARM_Dynarmic_32::Step() {
 }
 
 ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
-                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handlers},
+                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
+                                 std::size_t core_index)
+    : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
       cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
       exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 937dffee9f..ae3e7fc8c2 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -28,7 +28,7 @@ class System;
 
 class ARM_Dynarmic_32 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
+    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
                     ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_32() override;
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 5f6906bec4..713a8869e9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -124,29 +124,41 @@ public:
     }
 
     void AddTicks(u64 ticks) override {
-        this->ticks -= ticks;
+        if (parent.uses_wall_clock) {
+            return;
+        }
+        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+        // rough approximation of the amount of executed ticks in the system, it may be thrown off
+        // if not all cores are doing a similar amount of work. Instead of doing this, we should
+        // device a way so that timing is consistent across all cores without increasing the ticks 4
+        // times.
+        u64 amortized_ticks =
+            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        // Always execute at least one tick.
+        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
+        num_interpreted_instructions = 0;
     }
 
     u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-            return std::max<s64>(ticks, 0);
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return std::max<s64>(1000U, 0);
+            }
+            return 0ULL;
         }
-        return 0ULL;
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
     }
 
     u64 GetCNTPCT() override {
         return parent.system.CoreTiming().GetClockTicks();
     }
 
-    void ResetTicks() {
-        ticks = 1000LL;
-    }
-
     ARM_Dynarmic_64& parent;
     std::size_t num_interpreted_instructions = 0;
     u64 tpidrro_el0 = 0;
     u64 tpidr_el0 = 0;
-    s64 ticks{};
 };
 
 std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@@ -185,13 +197,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
     }
 
     // CNTPCT uses wall clock.
-    config.wall_clock_cntpct = true;
+    config.wall_clock_cntpct = uses_wall_clock;
 
     return std::make_shared<Dynarmic::A64::Jit>(config);
 }
 
 void ARM_Dynarmic_64::Run() {
-    cb->ResetTicks();
     jit->Run();
 }
 
@@ -200,9 +211,11 @@ void ARM_Dynarmic_64::Step() {
 }
 
 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
-                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler},
+                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
+                                 std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler, uses_wall_clock},
       cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handler,
+                                                                      uses_wall_clock,
                                                                       ARM_Unicorn::Arch::AArch64,
                                                                       core_index},
       core_index{core_index}, exclusive_monitor{
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index c26b47249c..31ec165210 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -28,7 +28,7 @@ class System;
 
 class ARM_Dynarmic_64 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
+    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
                     ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ARM_Dynarmic_64() override;
 
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 099229c8d4..1cb71942b9 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -63,9 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return false;
 }
 
-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
-                         std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler}, core_index{core_index} {
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+                         bool uses_wall_clock, Arch architecture, std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler, uses_wall_clock}, core_index{core_index} {
     const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
     CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
 
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index f09b24a850..a01751e651 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -20,8 +20,8 @@ public:
         AArch64, // 64-bit ARM
     };
 
-    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
-                         std::size_t core_index);
+    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+                         bool uses_wall_clock, Arch architecture, std::size_t core_index);
     ~ARM_Unicorn() override;
 
     void SetPC(u64 pc) override;
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 189d4aa34d..12e9e60a4a 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -14,6 +14,8 @@
 
 namespace Core::Timing {
 
+constexpr u64 MAX_SLICE_LENGTH = 4000;
+
 std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
     return std::make_shared<EventType>(std::move(callback), std::move(name));
 }
@@ -53,6 +55,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
 void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
     on_thread_init = std::move(on_thread_init_);
     event_fifo_id = 0;
+    ticks = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
     if (is_multicore) {
@@ -126,20 +129,36 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
     basic_lock.unlock();
 }
 
-void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
-    ticks_count[core_index] += ticks;
+void CoreTiming::AddTicks(u64 ticks) {
+    this->ticks += ticks;
+    downcount -= ticks;
 }
 
-void CoreTiming::ResetTicks(std::size_t core_index) {
-    ticks_count[core_index] = 0;
+void CoreTiming::Idle() {
+    if (!event_queue.empty()) {
+        u64 next_event_time = event_queue.front().time;
+        ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
+        return;
+    }
+    ticks += 1000U;
+}
+
+void CoreTiming::ResetTicks() {
+    downcount = MAX_SLICE_LENGTH;
 }
 
 u64 CoreTiming::GetCPUTicks() const {
-    return clock->GetCPUCycles();
+    if (is_multicore) {
+        return clock->GetCPUCycles();
+    }
+    return ticks;
 }
 
 u64 CoreTiming::GetClockTicks() const {
-    return clock->GetClockCycles();
+    if (is_multicore) {
+        return clock->GetClockCycles();
+    }
+    return CpuCyclesToClockCycles(ticks);
 }
 
 void CoreTiming::ClearPendingEvents() {
@@ -217,11 +236,17 @@ void CoreTiming::ThreadLoop() {
 }
 
 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    return clock->GetTimeNS();
+    if (is_multicore) {
+        return clock->GetTimeNS();
+    }
+    return CyclesToNs(ticks);
 }
 
 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return clock->GetTimeUS();
+    if (is_multicore) {
+        return clock->GetTimeUS();
+    }
+    return CyclesToUs(ticks);
 }
 
 } // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 03f9a5c764..ed5de9b97a 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -98,9 +98,15 @@ public:
     /// We only permit one event of each type in the queue at a time.
     void RemoveEvent(const std::shared_ptr<EventType>& event_type);
 
-    void AddTicks(std::size_t core_index, u64 ticks);
+    void AddTicks(u64 ticks);
 
-    void ResetTicks(std::size_t core_index);
+    void ResetTicks();
+
+    void Idle();
+
+    s64 GetDowncount() const {
+        return downcount;
+    }
 
     /// Returns current time in emulated CPU cycles
     u64 GetCPUTicks() const;
@@ -154,7 +160,9 @@ private:
 
     bool is_multicore{};
 
-    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
+    /// Cycle timing
+    u64 ticks{};
+    s64 downcount{};
 };
 
 /// Creates a core timing event with the given name and callback.
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index be34b26fe4..aefc636638 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -38,15 +38,8 @@ s64 usToCycles(std::chrono::microseconds us) {
 }
 
 s64 nsToCycles(std::chrono::nanoseconds ns) {
-    if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
+    const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
+    return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
 }
 
 u64 msToClockCycles(std::chrono::milliseconds ns) {
@@ -69,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {
     return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
 }
 
+std::chrono::milliseconds CyclesToMs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000);
+    u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::milliseconds(ms);
+}
+
+std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
+    u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::nanoseconds(ns);
+}
+
+std::chrono::microseconds CyclesToUs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
+    u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::microseconds(us);
+}
+
 } // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index b3c58447d5..2ed979e14c 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -16,18 +16,9 @@ s64 nsToCycles(std::chrono::nanoseconds ns);
 u64 msToClockCycles(std::chrono::milliseconds ns);
 u64 usToClockCycles(std::chrono::microseconds ns);
 u64 nsToClockCycles(std::chrono::nanoseconds ns);
-
-inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::microseconds CyclesToUs(s64 cycles) {
-    return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
-}
+std::chrono::milliseconds CyclesToMs(s64 cycles);
+std::chrono::nanoseconds CyclesToNs(s64 cycles);
+std::chrono::microseconds CyclesToUs(s64 cycles);
 
 u64 CpuCyclesToClockCycles(u64 ticks);
 
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 2e9dc9dc3d..6044050607 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -232,13 +232,10 @@ void CpuManager::SingleCoreRunGuestLoop() {
         auto* physical_core = &kernel.CurrentPhysicalCore();
         auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
-        while (!physical_core->IsInterrupted()) {
+        if (!physical_core->IsInterrupted()) {
+            system.CoreTiming().ResetTicks();
             arm_interface.Run();
             physical_core = &kernel.CurrentPhysicalCore();
-            preemption_count++;
-            if (preemption_count % max_cycle_runs == 0) {
-                break;
-            }
         }
         system.ExitDynarmicProfile();
         thread->SetPhantomMode(true);
@@ -255,7 +252,7 @@ void CpuManager::SingleCoreRunIdleThread() {
     auto& kernel = system.Kernel();
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
-        PreemptSingleCore();
+        PreemptSingleCore(false);
         idle_count++;
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
@@ -279,12 +276,15 @@ void CpuManager::SingleCoreRunSuspendThread() {
     }
 }
 
-void CpuManager::PreemptSingleCore() {
-    preemption_count = 0;
+void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
     std::size_t old_core = current_core;
     auto& scheduler = system.Kernel().Scheduler(old_core);
     Kernel::Thread* current_thread = scheduler.GetCurrentThread();
-    if (idle_count >= 4) {
+    if (idle_count >= 4 || from_running_enviroment) {
+        if (!from_running_enviroment) {
+            system.CoreTiming().Idle();
+            idle_count = 0;
+        }
         current_thread->SetPhantomMode(true);
         system.CoreTiming().Advance();
         current_thread->SetPhantomMode(false);
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index e6b8612f0e..ae55d6427e 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -55,7 +55,7 @@ public:
     std::function<void(void*)> GetSuspendThreadStartFunc();
     void* GetStartFuncParamater();
 
-    void PreemptSingleCore();
+    void PreemptSingleCore(bool from_running_enviroment = true);
 
     std::size_t CurrentCore() const {
         return current_core.load();
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 5999722119..c47fa91671 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1534,6 +1534,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
 
     if (is_redundant && !system.Kernel().IsMulticore()) {
         system.Kernel().ExitSVCProfile();
+        system.CoreTiming().AddTicks(1000U);
         system.GetCpuManager().PreemptSingleCore();
         system.Kernel().EnterSVCProfile();
     }
@@ -1762,6 +1763,10 @@ static u64 GetSystemTick(Core::System& system) {
     // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
     const u64 result{system.CoreTiming().GetClockTicks()};
 
+    if (!system.Kernel().IsMulticore()) {
+        core_timing.AddTicks(400U);
+    }
+
     return result;
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 65fedfc9b9..d88039a162 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -246,19 +246,23 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
 #ifdef ARCHITECTURE_x86_64
         if (owner_process && !owner_process->Is64BitProcess()) {
             thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
-                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+                processor_id);
         } else {
             thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
-                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+                processor_id);
         }
 
 #else
         if (owner_process && !owner_process->Is64BitProcess()) {
             thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch32, processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
+                processor_id);
         } else {
             thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch64, processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
+                processor_id);
         }
         LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif

From 877ce5b14e24e6772ae6e292eb0905cb246916fa Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 28 Mar 2020 15:40:57 -0400
Subject: [PATCH 097/122] FrameLimiting: Enable frame limiting for single core.

---
 src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 1 +
 src/core/perf_stats.cpp                             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 19df0dca7c..3f7b8e6704 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -41,6 +41,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
     system.GetPerfStats().EndGameFrame();
     system.GetPerfStats().EndSystemFrame();
     system.GPU().SwapBuffers(&framebuffer);
+    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
     system.GetPerfStats().BeginSystemFrame();
 }
 
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index f1ae9d4df5..9f3a6b8113 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -119,7 +119,7 @@ double PerfStats::GetLastFrameTimeScale() {
 }
 
 void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
-    if (!Settings::values.use_frame_limit) {
+    if (!Settings::values.use_frame_limit || Settings::values.use_multi_core) {
         return;
     }
 

From 26dddfda52c6bf862f87f0a2d74c7220d65aad30 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 29 Mar 2020 17:06:46 -0400
Subject: [PATCH 098/122] SingleCore: Improve Cycle timing Behavior and replace
 mutex in global scheduler for spinlock.

---
 src/core/cpu_manager.cpp               | 1 +
 src/core/hle/kernel/scheduler.h        | 2 +-
 src/core/hle/kernel/server_session.cpp | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 6044050607..d604aa4464 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -253,6 +253,7 @@ void CpuManager::SingleCoreRunIdleThread() {
     while (true) {
         auto& physical_core = kernel.CurrentPhysicalCore();
         PreemptSingleCore(false);
+        system.CoreTiming().AddTicks(1000U);
         idle_count++;
         auto& scheduler = physical_core.Scheduler();
         scheduler.TryDoContextSwitch();
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index f63cc50859..10dc4b832a 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -188,7 +188,7 @@ private:
 
     /// Scheduler lock mechanisms.
     bool is_locked{};
-    std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock
+    Common::SpinLock inner_lock{};
     std::atomic<s64> scope_lock{};
     Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
 
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 05516a453f..e988a3f222 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -185,7 +185,8 @@ ResultCode ServerSession::CompleteSyncRequest() {
 ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
                                             Core::Memory::Memory& memory) {
     ResultCode result = QueueSyncRequest(std::move(thread), memory);
-    Core::System::GetInstance().CoreTiming().ScheduleEvent(0, request_event, {});
+    const u64 delay = kernel.IsMulticore() ? 0U : 20000U;
+    Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {});
     return result;
 }
 

From e4638c567cee727c99a86c48d5f7b057d73960e4 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 30 Mar 2020 21:50:05 -0400
Subject: [PATCH 099/122] HLE_IPC: Correct HLE Event behavior on timeout.

---
 src/core/hle/kernel/hle_ipc.cpp |  1 +
 src/core/hle/kernel/svc.cpp     | 10 +++++++++-
 src/core/hle/kernel/thread.h    |  9 +++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index c3d612f341..396e493cf0 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -70,6 +70,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
             });
         const auto readable_event{writable_event->GetReadableEvent()};
         writable_event->Clear();
+        thread->SetHLESyncObject(readable_event.get());
         thread->SetStatus(ThreadStatus::WaitHLEEvent);
         thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
         readable_event->AddWaitingThread(thread);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c47fa91671..37e893c841 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -333,13 +333,21 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
         thread->SetStatus(ThreadStatus::WaitIPC);
         session->SendSyncRequest(SharedFrom(thread), system.Memory());
     }
+
     if (thread->HasHLECallback()) {
         Handle event_handle = thread->GetHLETimeEvent();
         if (event_handle != InvalidHandle) {
             auto& time_manager = system.Kernel().TimeManager();
             time_manager.UnscheduleTimeEvent(event_handle);
         }
-        thread->InvokeHLECallback(SharedFrom(thread));
+
+        {
+            SchedulerLock lock(system.Kernel());
+            auto* sync_object = thread->GetHLESyncObject();
+            sync_object->RemoveWaitingThread(SharedFrom(thread));
+
+            thread->InvokeHLECallback(SharedFrom(thread));
+        }
     }
 
     return thread->GetSignalingResult();
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index f651d78229..61963148db 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -489,10 +489,18 @@ public:
         hle_time_event = time_event;
     }
 
+    void SetHLESyncObject(SynchronizationObject* object) {
+        hle_object = object;
+    }
+
     Handle GetHLETimeEvent() const {
         return hle_time_event;
     }
 
+    SynchronizationObject* GetHLESyncObject() const {
+        return hle_object;
+    }
+
     void InvalidateWakeupCallback() {
         SetWakeupCallback(nullptr);
     }
@@ -698,6 +706,7 @@ private:
     /// Callback for HLE Events
     HLECallback hle_callback;
     Handle hle_time_event;
+    SynchronizationObject* hle_object;
 
     Scheduler* scheduler = nullptr;
 

From 83adf675c09a9afaac4a726b006e48692a91aa27 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 31 Mar 2020 13:52:07 -0400
Subject: [PATCH 100/122] Bootmanager/CPU_Manager: Correct shader caches and
 sync GPU on OpenGL.

---
 src/core/cpu_manager.cpp | 15 +++++++++------
 src/yuzu/bootmanager.cpp |  4 ++++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index d604aa4464..c0974ee383 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -22,13 +22,7 @@ CpuManager::CpuManager(System& system) : system{system} {}
 CpuManager::~CpuManager() = default;
 
 void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
-    if (!cpu_manager.is_async_gpu && !cpu_manager.is_multicore) {
-        cpu_manager.render_window->MakeCurrent();
-    }
     cpu_manager.RunThread(core);
-    if (!cpu_manager.is_async_gpu && !cpu_manager.is_multicore) {
-        cpu_manager.render_window->DoneCurrent();
-    }
 }
 
 void CpuManager::SetRenderWindow(Core::Frontend::EmuWindow& render_window) {
@@ -353,10 +347,16 @@ void CpuManager::RunThread(std::size_t core) {
     data.host_context = Common::Fiber::ThreadToFiber();
     data.is_running = false;
     data.initialized = true;
+    const bool sc_sync = !is_async_gpu && !is_multicore;
+    bool sc_sync_first_use = sc_sync;
     /// Running
     while (running_mode) {
         data.is_running = false;
         data.enter_barrier->Wait();
+        if (sc_sync_first_use) {
+            render_window->MakeCurrent();
+            sc_sync_first_use = false;
+        }
         auto& scheduler = system.Kernel().CurrentScheduler();
         Kernel::Thread* current_thread = scheduler.GetCurrentThread();
         data.is_running = true;
@@ -366,6 +366,9 @@ void CpuManager::RunThread(std::size_t core) {
         data.exit_barrier->Wait();
         data.is_paused = false;
     }
+    if (sc_sync) {
+        render_window->DoneCurrent();
+    }
     /// Time to cleanup
     data.host_context->Exit();
     data.enter_barrier.reset();
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index bce66cf076..b1305a9392 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -53,6 +53,8 @@ void EmuThread::run() {
 
     Core::System::GetInstance().RegisterHostThread();
 
+    context.MakeCurrent();
+
     Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
         stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
             emit LoadProgress(stage, value, total);
@@ -60,6 +62,8 @@ void EmuThread::run() {
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
 
+    context.DoneCurrent();
+
     // Holds whether the cpu was running during the last iteration,
     // so that the DebugModeLeft signal can be emitted before the
     // next execution step

From 223799f7a7b4533db4dbed51b93a8683cd00e33f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 31 Mar 2020 15:12:41 -0400
Subject: [PATCH 101/122] Kernel/svcBreak: Implement CacheInvalidation for
 Singlecore and correct svcBreak.

---
 src/core/hle/kernel/kernel.cpp | 12 +++++++++++-
 src/core/hle/kernel/svc.cpp    |  4 ++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 739205eca3..1f230fc4a0 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -545,7 +545,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
 }
 
 void KernelCore::InvalidateAllInstructionCaches() {
-    //TODO: Reimplement, this
+    if (!IsMulticore()) {
+        auto& threads = GlobalScheduler().GetThreadList();
+        for (auto& thread : threads) {
+            if (!thread->IsHLEThread()) {
+                auto& arm_interface = thread->ArmInterface();
+                arm_interface.ClearInstructionCache();
+            }
+        }
+    } else {
+        UNIMPLEMENTED_MSG("Cache Invalidation unimplemented for multicore");
+    }
 }
 
 void KernelCore::PrepareReschedule(std::size_t id) {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 37e893c841..dbd35580e9 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -622,6 +622,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
         info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt);
 
     if (!break_reason.signal_debugger) {
+        SchedulerLock lock(system.Kernel());
         LOG_CRITICAL(
             Debug_Emulated,
             "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
@@ -633,9 +634,8 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
         const auto thread_processor_id = current_thread->GetProcessorID();
         system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
 
-        system.Kernel().CurrentProcess()->PrepareForTermination();
-
         // Kill the current thread
+        system.Kernel().ExceptionalExit();
         current_thread->Stop();
     }
 }

From 29d6be18592e5a532d4183857dafabc7d3aca057 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 1 Apr 2020 17:28:49 -0400
Subject: [PATCH 102/122] General: Cleanup legacy code.

---
 src/core/CMakeLists.txt                       |   2 -
 src/core/arm/dynarmic/arm_dynarmic_32.cpp     |   1 -
 src/core/arm/dynarmic/arm_dynarmic_64.cpp     |   1 -
 src/core/core_manager.cpp                     |  51 -----
 src/core/core_manager.h                       |  63 ------
 src/core/gdbstub/gdbstub.cpp                  |   1 -
 src/core/hle/kernel/client_port.cpp           |   2 +-
 src/core/hle/kernel/kernel.cpp                |  81 +------
 src/core/hle/kernel/kernel.h                  |   3 -
 src/core/hle/kernel/svc.cpp                   |   3 +-
 .../hle/kernel/synchronization_object.cpp     |  64 ------
 src/core/hle/kernel/synchronization_object.h  |  15 --
 src/core/hle/kernel/thread.cpp                |  34 ---
 src/core/hle/kernel/thread.h                  |  56 +----
 src/core/hle/service/sm/sm.cpp                |   2 +-
 src/core/host_timing.cpp                      | 206 ------------------
 src/core/host_timing.h                        | 160 --------------
 src/tests/core/core_timing.cpp                |   1 +
 src/yuzu/debugger/wait_tree.cpp               |   2 +-
 19 files changed, 8 insertions(+), 740 deletions(-)
 delete mode 100644 src/core/core_manager.cpp
 delete mode 100644 src/core/core_manager.h
 delete mode 100644 src/core/host_timing.cpp
 delete mode 100644 src/core/host_timing.h

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e65524b8a9..1527f280b8 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -17,8 +17,6 @@ add_library(core STATIC
     constants.h
     core.cpp
     core.h
-    core_manager.cpp
-    core_manager.h
     core_timing.cpp
     core_timing.h
     core_timing_util.cpp
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 5d78726fa8..b094812318 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -12,7 +12,6 @@
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/svc.h"
 #include "core/memory.h"
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 713a8869e9..986b4ce92a 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -11,7 +11,6 @@
 #include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/dynarmic/arm_dynarmic_64.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
deleted file mode 100644
index 82d7acb406..0000000000
--- a/src/core/core_manager.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-
-#include "common/logging/log.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"
-#include "core/core.h"
-#include "core/core_manager.h"
-#include "core/core_timing.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/physical_core.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/lock.h"
-#include "core/settings.h"
-
-namespace Core {
-
-CoreManager::CoreManager(System& system, std::size_t core_index)
-    : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore(
-                                                      core_index)},
-      core_timing{system.CoreTiming()}, core_index{core_index} {}
-
-CoreManager::~CoreManager() = default;
-
-void CoreManager::RunLoop(bool tight_loop) {
-    /// Deprecated
-}
-
-void CoreManager::SingleStep() {
-    return RunLoop(false);
-}
-
-void CoreManager::PrepareReschedule() {
-    //physical_core.Stop();
-}
-
-void CoreManager::Reschedule() {
-    // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard lock(HLE::g_hle_lock);
-
-    // global_scheduler.SelectThread(core_index);
-
-    physical_core.Scheduler().TryDoContextSwitch();
-}
-
-} // namespace Core
diff --git a/src/core/core_manager.h b/src/core/core_manager.h
deleted file mode 100644
index d525de00ab..0000000000
--- a/src/core/core_manager.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <cstddef>
-#include <memory>
-#include "common/common_types.h"
-
-namespace Kernel {
-class GlobalScheduler;
-class PhysicalCore;
-} // namespace Kernel
-
-namespace Core {
-class System;
-}
-
-namespace Core::Timing {
-class CoreTiming;
-}
-
-namespace Core::Memory {
-class Memory;
-}
-
-namespace Core {
-
-constexpr unsigned NUM_CPU_CORES{4};
-
-class CoreManager {
-public:
-    CoreManager(System& system, std::size_t core_index);
-    ~CoreManager();
-
-    void RunLoop(bool tight_loop = true);
-
-    void SingleStep();
-
-    void PrepareReschedule();
-
-    bool IsMainCore() const {
-        return core_index == 0;
-    }
-
-    std::size_t CoreIndex() const {
-        return core_index;
-    }
-
-private:
-    void Reschedule();
-
-    Kernel::GlobalScheduler& global_scheduler;
-    Kernel::PhysicalCore& physical_core;
-    Timing::CoreTiming& core_timing;
-
-    std::atomic<bool> reschedule_pending = false;
-    std::size_t core_index;
-};
-
-} // namespace Core
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 70c0f8b80b..79f22a4031 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -35,7 +35,6 @@
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process.h"
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index 5498fd313b..8aff2227a0 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
     }
 
     // Wake the threads waiting on the ServerPort
-    server_port->WakeupAllWaitingThreads();
+    server_port->Signal();
 
     return MakeResult(std::move(client));
 }
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 1f230fc4a0..dbb75416d7 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -48,72 +48,6 @@ MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
 
 namespace Kernel {
 
-/**
- * Callback that will wake up the thread it was scheduled for
- * @param thread_handle The handle of the thread that's been awoken
- * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
- */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
-    UNREACHABLE();
-    const auto proper_handle = static_cast<Handle>(thread_handle);
-    const auto& system = Core::System::GetInstance();
-
-    // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard lock{HLE::g_hle_lock};
-
-    std::shared_ptr<Thread> thread =
-        system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
-    if (thread == nullptr) {
-        LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
-        return;
-    }
-
-    bool resume = true;
-
-    if (thread->GetStatus() == ThreadStatus::WaitSynch ||
-        thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
-        // Remove the thread from each of its waiting objects' waitlists
-        for (const auto& object : thread->GetSynchronizationObjects()) {
-            object->RemoveWaitingThread(thread);
-        }
-        thread->ClearSynchronizationObjects();
-
-        // Invoke the wakeup callback before clearing the wait objects
-        if (thread->HasWakeupCallback()) {
-            resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
-        }
-    } else if (thread->GetStatus() == ThreadStatus::WaitMutex ||
-               thread->GetStatus() == ThreadStatus::WaitCondVar) {
-        thread->SetMutexWaitAddress(0);
-        thread->SetWaitHandle(0);
-        if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
-            thread->GetOwnerProcess()->RemoveConditionVariableThread(thread);
-            thread->SetCondVarWaitAddress(0);
-        }
-
-        auto* const lock_owner = thread->GetLockOwner();
-        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
-        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
-        // wasn't awakened due to the mutex already being acquired.
-        if (lock_owner != nullptr) {
-            lock_owner->RemoveMutexWaiter(thread);
-        }
-    }
-
-    if (thread->GetStatus() == ThreadStatus::WaitArb) {
-        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
-        address_arbiter.HandleWakeupThread(thread);
-    }
-
-    if (resume) {
-        if (thread->GetStatus() == ThreadStatus::WaitCondVar ||
-            thread->GetStatus() == ThreadStatus::WaitArb) {
-            thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
-        }
-        thread->ResumeFromWait();
-    }
-}
-
 struct KernelCore::Impl {
     explicit Impl(Core::System& system, KernelCore& kernel)
         : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
@@ -129,7 +63,6 @@ struct KernelCore::Impl {
         InitializePhysicalCores();
         InitializeSystemResourceLimit(kernel);
         InitializeMemoryLayout();
-        InitializeThreads();
         InitializePreemption(kernel);
         InitializeSchedulers();
         InitializeSuspendThreads();
@@ -161,7 +94,6 @@ struct KernelCore::Impl {
         system_resource_limit = nullptr;
 
         global_handle_table.Clear();
-        thread_wakeup_event_type = nullptr;
         preemption_event = nullptr;
 
         global_scheduler.Shutdown();
@@ -210,11 +142,6 @@ struct KernelCore::Impl {
         }
     }
 
-    void InitializeThreads() {
-        thread_wakeup_event_type =
-            Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback);
-    }
-
     void InitializePreemption(KernelCore& kernel) {
         preemption_event = Core::Timing::CreateEvent(
             "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) {
@@ -376,7 +303,6 @@ struct KernelCore::Impl {
 
     std::shared_ptr<ResourceLimit> system_resource_limit;
 
-    std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;
     std::shared_ptr<Core::Timing::EventType> preemption_event;
 
     // This is the kernel's handle table or supervisor handle table which
@@ -516,7 +442,8 @@ std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore
     return impl->interrupts;
 }
 
-const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() const {
+const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts()
+    const {
     return impl->interrupts;
 }
 
@@ -595,10 +522,6 @@ u64 KernelCore::CreateNewUserProcessID() {
     return impl->next_user_process_id++;
 }
 
-const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const {
-    return impl->thread_wakeup_event_type;
-}
-
 Kernel::HandleTable& KernelCore::GlobalHandleTable() {
     return impl->global_handle_table;
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 846056b85a..49bd47e89b 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -241,9 +241,6 @@ private:
     /// Creates a new thread ID, incrementing the internal thread ID counter.
     u64 CreateNewThreadID();
 
-    /// Retrieves the event type used for thread wakeup callbacks.
-    const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const;
-
     /// Provides a reference to the global handle table.
     Kernel::HandleTable& GlobalHandleTable();
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index dbd35580e9..781032cd18 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -16,7 +16,6 @@
 #include "common/string_util.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
-#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/cpu_manager.h"
@@ -1909,7 +1908,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
             return ERR_INVALID_COMBINATION;
         }
 
-        if (core < Core::NUM_CPU_CORES) {
+        if (core < Core::Hardware::NUM_CPU_CORES) {
             if ((affinity_mask & (1ULL << core)) == 0) {
                 LOG_ERROR(Kernel_SVC,
                           "Core is not enabled for the current mask, core={}, mask={:016X}", core,
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
index be9e091068..ba4d39157e 100644
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -38,70 +38,6 @@ void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread)
         waiting_threads.erase(itr);
 }
 
-std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const {
-    Thread* candidate = nullptr;
-    u32 candidate_priority = THREADPRIO_LOWEST + 1;
-
-    for (const auto& thread : waiting_threads) {
-        const ThreadStatus thread_status = thread->GetStatus();
-
-        // The list of waiting threads must not contain threads that are not waiting to be awakened.
-        ASSERT_MSG(thread_status == ThreadStatus::WaitSynch ||
-                       thread_status == ThreadStatus::WaitHLEEvent,
-                   "Inconsistent thread statuses in waiting_threads");
-
-        if (thread->GetPriority() >= candidate_priority)
-            continue;
-
-        if (ShouldWait(thread.get()))
-            continue;
-
-        candidate = thread.get();
-        candidate_priority = thread->GetPriority();
-    }
-
-    return SharedFrom(candidate);
-}
-
-void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
-    ASSERT(!ShouldWait(thread.get()));
-
-    if (!thread) {
-        return;
-    }
-
-    if (thread->IsSleepingOnWait()) {
-        for (const auto& object : thread->GetSynchronizationObjects()) {
-            ASSERT(!object->ShouldWait(thread.get()));
-            object->Acquire(thread.get());
-        }
-    } else {
-        Acquire(thread.get());
-    }
-
-    const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));
-
-    thread->ClearSynchronizationObjects();
-
-    thread->CancelWakeupTimer();
-
-    bool resume = true;
-    if (thread->HasWakeupCallback()) {
-        resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, SharedFrom(this),
-                                              index);
-    }
-    if (resume) {
-        thread->ResumeFromWait();
-        kernel.PrepareReschedule(thread->GetProcessorID());
-    }
-}
-
-void SynchronizationObject::WakeupAllWaitingThreads() {
-    while (auto thread = GetHighestPriorityReadyThread()) {
-        WakeupWaitingThread(thread);
-    }
-}
-
 void SynchronizationObject::ClearWaitingThreads() {
     waiting_threads.clear();
 }
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index a35544ac13..f89b242040 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -50,21 +50,6 @@ public:
      */
     void RemoveWaitingThread(std::shared_ptr<Thread> thread);
 
-    /**
-     * Wake up all threads waiting on this object that can be awoken, in priority order,
-     * and set the synchronization result and output of the thread.
-     */
-    void /* deprecated */ WakeupAllWaitingThreads();
-
-    /**
-     * Wakes up a single thread waiting on this object.
-     * @param thread Thread that is waiting on this object to wakeup.
-     */
-    void WakeupWaitingThread(std::shared_ptr<Thread> thread);
-
-    /// Obtains the highest priority thread that is ready to run from this object's waiting list.
-    std::shared_ptr<Thread> /* deprecated */ GetHighestPriorityReadyThread() const;
-
     /// Get a const reference to the waiting threads list for debug use
     const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d88039a162..fba2a9c85a 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -56,9 +56,6 @@ Thread::~Thread() = default;
 void Thread::Stop() {
     {
         SchedulerLock lock(kernel);
-        // Cancel any outstanding wakeup events for this thread
-        Core::System::GetInstance().CoreTiming().UnscheduleEvent(
-            kernel.ThreadWakeupCallbackEventType(), global_handle);
         SetStatus(ThreadStatus::Dead);
         Signal();
         kernel.GlobalHandleTable().Close(global_handle);
@@ -75,22 +72,6 @@ void Thread::Stop() {
     global_handle = 0;
 }
 
-void Thread::WakeAfterDelay(s64 nanoseconds) {
-    // Don't schedule a wakeup if the thread wants to wait forever
-    if (nanoseconds == -1)
-        return;
-
-    // This function might be called from any thread so we have to be cautious and use the
-    // thread-safe version of ScheduleEvent.
-    Core::System::GetInstance().CoreTiming().ScheduleEvent(
-        nanoseconds, kernel.ThreadWakeupCallbackEventType(), global_handle);
-}
-
-void Thread::CancelWakeupTimer() {
-    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                             global_handle);
-}
-
 void Thread::ResumeFromWait() {
     SchedulerLock lock(kernel);
     switch (status) {
@@ -284,14 +265,6 @@ void Thread::SetPriority(u32 priority) {
     UpdatePriority();
 }
 
-void Thread::SetWaitSynchronizationResult(ResultCode result) {
-    UNREACHABLE();
-}
-
-void Thread::SetWaitSynchronizationOutput(s32 output) {
-    UNREACHABLE();
-}
-
 void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
     signaling_object = object;
     signaling_result = result;
@@ -425,13 +398,6 @@ bool Thread::AllSynchronizationObjectsReady() const {
                         });
 }
 
-bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                  std::shared_ptr<SynchronizationObject> object,
-                                  std::size_t index) {
-    ASSERT(wakeup_callback);
-    return wakeup_callback(reason, std::move(thread), std::move(object), index);
-}
-
 bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
     ASSERT(hle_callback);
     return hle_callback(std::move(thread));
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 61963148db..3ae0df6ef8 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -128,9 +128,6 @@ public:
 
     using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
 
-    using WakeupCallback =
-        std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                           std::shared_ptr<SynchronizationObject> object, std::size_t index)>;
     using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
 
     /**
@@ -235,7 +232,7 @@ public:
     }
 
     /// Resumes a thread from waiting
-    void /* deprecated */ ResumeFromWait();
+    void ResumeFromWait();
 
     void OnWakeUp();
 
@@ -249,27 +246,6 @@ public:
     ///
     void CancelWait();
 
-    /**
-     * Schedules an event to wake up the specified thread after the specified delay
-     * @param nanoseconds The time this thread will be allowed to sleep for
-     */
-    void /* deprecated */ WakeAfterDelay(s64 nanoseconds);
-
-    /// Cancel any outstanding wakeup events for this thread
-    void /* deprecated */ CancelWakeupTimer();
-
-    /**
-     * Sets the result after the thread awakens (from svcWaitSynchronization)
-     * @param result Value to set to the returned result
-     */
-    void /*deprecated*/ SetWaitSynchronizationResult(ResultCode result);
-
-    /**
-     * Sets the output parameter value after the thread awakens (from svcWaitSynchronization)
-     * @param output Value to set to the output parameter
-     */
-    void /*deprecated*/ SetWaitSynchronizationOutput(s32 output);
-
     void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
 
     Core::ARM_Interface& ArmInterface();
@@ -330,11 +306,6 @@ public:
      */
     VAddr GetCommandBufferAddress() const;
 
-    /// Returns whether this thread is waiting on objects from a WaitSynchronization call.
-    bool IsSleepingOnWait() const {
-        return status == ThreadStatus::WaitSynch;
-    }
-
     ThreadContext32& GetContext32() {
         return context_32;
     }
@@ -469,18 +440,10 @@ public:
         arb_wait_address = address;
     }
 
-    bool HasWakeupCallback() const {
-        return wakeup_callback != nullptr;
-    }
-
     bool HasHLECallback() const {
         return hle_callback != nullptr;
     }
 
-    void SetWakeupCallback(WakeupCallback callback) {
-        wakeup_callback = std::move(callback);
-    }
-
     void SetHLECallback(HLECallback callback) {
         hle_callback = std::move(callback);
     }
@@ -501,22 +464,10 @@ public:
         return hle_object;
     }
 
-    void InvalidateWakeupCallback() {
-        SetWakeupCallback(nullptr);
-    }
-
     void InvalidateHLECallback() {
         SetHLECallback(nullptr);
     }
 
-    /**
-     * Invokes the thread's wakeup callback.
-     *
-     * @pre A valid wakeup callback has been set. Violating this precondition
-     *      will cause an assertion to trigger.
-     */
-    bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                              std::shared_ptr<SynchronizationObject> object, std::size_t index);
     bool InvokeHLECallback(std::shared_ptr<Thread> thread);
 
     u32 GetIdealCore() const {
@@ -698,11 +649,6 @@ private:
     /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
     Handle global_handle = 0;
 
-    /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
-    /// was waiting via WaitSynchronization then the object will be the last object that became
-    /// available. In case of a timeout, the object will be nullptr. DEPRECATED
-    WakeupCallback wakeup_callback;
-
     /// Callback for HLE Events
     HLECallback hle_callback;
     Handle hle_time_event;
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 6ada13be44..d872de16c0 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -142,7 +142,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
     }
 
     // Wake the threads waiting on the ServerPort
-    server_port->WakeupAllWaitingThreads();
+    server_port->Signal();
 
     LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
     IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
deleted file mode 100644
index 2f40de1a1c..0000000000
--- a/src/core/host_timing.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "core/host_timing.h"
-
-#include <algorithm>
-#include <mutex>
-#include <string>
-#include <tuple>
-
-#include "common/assert.h"
-#include "core/core_timing_util.h"
-
-namespace Core::HostTiming {
-
-std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
-    return std::make_shared<EventType>(std::move(callback), std::move(name));
-}
-
-struct CoreTiming::Event {
-    u64 time;
-    u64 fifo_order;
-    u64 userdata;
-    std::weak_ptr<EventType> type;
-
-    // Sort by time, unless the times are the same, in which case sort by
-    // the order added to the queue
-    friend bool operator>(const Event& left, const Event& right) {
-        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
-    }
-
-    friend bool operator<(const Event& left, const Event& right) {
-        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
-    }
-};
-
-CoreTiming::CoreTiming() {
-    clock =
-        Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
-}
-
-CoreTiming::~CoreTiming() = default;
-
-void CoreTiming::ThreadEntry(CoreTiming& instance) {
-    instance.ThreadLoop();
-}
-
-void CoreTiming::Initialize() {
-    event_fifo_id = 0;
-    const auto empty_timed_callback = [](u64, s64) {};
-    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
-    timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
-}
-
-void CoreTiming::Shutdown() {
-    paused = true;
-    shutting_down = true;
-    event.Set();
-    timer_thread->join();
-    ClearPendingEvents();
-    timer_thread.reset();
-    has_started = false;
-}
-
-void CoreTiming::Pause(bool is_paused) {
-    paused = is_paused;
-}
-
-void CoreTiming::SyncPause(bool is_paused) {
-    if (is_paused == paused && paused_set == paused) {
-        return;
-    }
-    Pause(is_paused);
-    event.Set();
-    while (paused_set != is_paused)
-        ;
-}
-
-bool CoreTiming::IsRunning() const {
-    return !paused_set;
-}
-
-bool CoreTiming::HasPendingEvents() const {
-    return !(wait_set && event_queue.empty());
-}
-
-void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
-                               u64 userdata) {
-    basic_lock.lock();
-    const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
-
-    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
-
-    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-    basic_lock.unlock();
-    event.Set();
-}
-
-void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
-    basic_lock.lock();
-    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
-        return e.type.lock().get() == event_type.get() && e.userdata == userdata;
-    });
-
-    // Removing random items breaks the invariant so we have to re-establish it.
-    if (itr != event_queue.end()) {
-        event_queue.erase(itr, event_queue.end());
-        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-    }
-    basic_lock.unlock();
-}
-
-void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
-    ticks_count[core_index] += ticks;
-}
-
-void CoreTiming::ResetTicks(std::size_t core_index) {
-    ticks_count[core_index] = 0;
-}
-
-u64 CoreTiming::GetCPUTicks() const {
-    return clock->GetCPUCycles();
-}
-
-u64 CoreTiming::GetClockTicks() const {
-    return clock->GetClockCycles();
-}
-
-void CoreTiming::ClearPendingEvents() {
-    event_queue.clear();
-}
-
-void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
-    basic_lock.lock();
-
-    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
-        return e.type.lock().get() == event_type.get();
-    });
-
-    // Removing random items breaks the invariant so we have to re-establish it.
-    if (itr != event_queue.end()) {
-        event_queue.erase(itr, event_queue.end());
-        std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-    }
-    basic_lock.unlock();
-}
-
-std::optional<u64> CoreTiming::Advance() {
-    advance_lock.lock();
-    basic_lock.lock();
-    global_timer = GetGlobalTimeNs().count();
-
-    while (!event_queue.empty() && event_queue.front().time <= global_timer) {
-        Event evt = std::move(event_queue.front());
-        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
-        event_queue.pop_back();
-        basic_lock.unlock();
-
-        if (auto event_type{evt.type.lock()}) {
-            event_type->callback(evt.userdata, global_timer - evt.time);
-        }
-
-        basic_lock.lock();
-    }
-
-    if (!event_queue.empty()) {
-        const u64 next_time = event_queue.front().time - global_timer;
-        basic_lock.unlock();
-        advance_lock.unlock();
-        return next_time;
-    } else {
-        basic_lock.unlock();
-        advance_lock.unlock();
-        return std::nullopt;
-    }
-}
-
-void CoreTiming::ThreadLoop() {
-    has_started = true;
-    while (!shutting_down) {
-        while (!paused) {
-            paused_set = false;
-            const auto next_time = Advance();
-            if (next_time) {
-                std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
-                event.WaitFor(next_time_ns);
-            } else {
-                wait_set = true;
-                event.Wait();
-            }
-            wait_set = false;
-        }
-        paused_set = true;
-    }
-}
-
-std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    return clock->GetTimeNS();
-}
-
-std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return clock->GetTimeUS();
-}
-
-} // namespace Core::HostTiming
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
deleted file mode 100644
index be6b68d7cc..0000000000
--- a/src/core/host_timing.h
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <chrono>
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <optional>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "common/common_types.h"
-#include "common/spin_lock.h"
-#include "common/thread.h"
-#include "common/threadsafe_queue.h"
-#include "common/wall_clock.h"
-#include "core/hardware_properties.h"
-
-namespace Core::HostTiming {
-
-/// A callback that may be scheduled for a particular core timing event.
-using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
-
-/// Contains the characteristics of a particular event.
-struct EventType {
-    EventType(TimedCallback&& callback, std::string&& name)
-        : callback{std::move(callback)}, name{std::move(name)} {}
-
-    /// The event's callback function.
-    TimedCallback callback;
-    /// A pointer to the name of the event.
-    const std::string name;
-};
-
-/**
- * This is a system to schedule events into the emulated machine's future. Time is measured
- * in main CPU clock cycles.
- *
- * To schedule an event, you first have to register its type. This is where you pass in the
- * callback. You then schedule events using the type id you get back.
- *
- * The int cyclesLate that the callbacks get is how many cycles late it was.
- * So to schedule a new event on a regular basis:
- * inside callback:
- *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
- */
-class CoreTiming {
-public:
-    CoreTiming();
-    ~CoreTiming();
-
-    CoreTiming(const CoreTiming&) = delete;
-    CoreTiming(CoreTiming&&) = delete;
-
-    CoreTiming& operator=(const CoreTiming&) = delete;
-    CoreTiming& operator=(CoreTiming&&) = delete;
-
-    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
-    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
-    void Initialize();
-
-    /// Tears down all timing related functionality.
-    void Shutdown();
-
-    /// Pauses/Unpauses the execution of the timer thread.
-    void Pause(bool is_paused);
-
-    /// Pauses/Unpauses the execution of the timer thread and waits until paused.
-    void SyncPause(bool is_paused);
-
-    /// Checks if core timing is running.
-    bool IsRunning() const;
-
-    /// Checks if the timer thread has started.
-    bool HasStarted() const {
-        return has_started;
-    }
-
-    /// Checks if there are any pending time events.
-    bool HasPendingEvents() const;
-
-    /// Schedules an event in core timing
-    void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
-                       u64 userdata = 0);
-
-    void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
-
-    /// We only permit one event of each type in the queue at a time.
-    void RemoveEvent(const std::shared_ptr<EventType>& event_type);
-
-    void AddTicks(std::size_t core_index, u64 ticks);
-
-    void ResetTicks(std::size_t core_index);
-
-    /// Returns current time in emulated CPU cycles
-    u64 GetCPUTicks() const;
-
-    /// Returns current time in emulated in Clock cycles
-    u64 GetClockTicks() const;
-
-    /// Returns current time in microseconds.
-    std::chrono::microseconds GetGlobalTimeUs() const;
-
-    /// Returns current time in nanoseconds.
-    std::chrono::nanoseconds GetGlobalTimeNs() const;
-
-    /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
-    std::optional<u64> Advance();
-
-private:
-    struct Event;
-
-    /// Clear all pending events. This should ONLY be done on exit.
-    void ClearPendingEvents();
-
-    static void ThreadEntry(CoreTiming& instance);
-    void ThreadLoop();
-
-    std::unique_ptr<Common::WallClock> clock;
-
-    u64 global_timer = 0;
-
-    std::chrono::nanoseconds start_point;
-
-    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
-    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
-    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
-    // accomodated by the standard adaptor class.
-    std::vector<Event> event_queue;
-    u64 event_fifo_id = 0;
-
-    std::shared_ptr<EventType> ev_lost;
-    Common::Event event{};
-    Common::SpinLock basic_lock{};
-    Common::SpinLock advance_lock{};
-    std::unique_ptr<std::thread> timer_thread;
-    std::atomic<bool> paused{};
-    std::atomic<bool> paused_set{};
-    std::atomic<bool> wait_set{};
-    std::atomic<bool> shutting_down{};
-    std::atomic<bool> has_started{};
-
-    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
-};
-
-/// Creates a core timing event with the given name and callback.
-///
-/// @param name     The name of the core timing event to create.
-/// @param callback The callback to execute for the event.
-///
-/// @returns An EventType instance representing the created event.
-///
-std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
-
-} // namespace Core::HostTiming
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 795f3da099..21a5840fb5 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -39,6 +39,7 @@ u64 callbacks_done = 0;
 
 struct ScopeInit final {
     ScopeInit() {
+        core_timing.SetMulticore(true);
         core_timing.Initialize([]() {});
     }
     ~ScopeInit() {
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index d2dbb259c9..0226ae2e28 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -340,7 +340,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
 
     if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
         list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
-                                                            thread.IsSleepingOnWait()));
+                                                            thread.IsWaitingSync()));
     }
 
     list.push_back(std::make_unique<WaitTreeCallstack>(thread));

From 1b7e8e0224b16fa25fe95e239ddee4f351955280 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 2 Apr 2020 13:24:39 -0400
Subject: [PATCH 103/122] SingleCore: Correct ticks reset to be on preemption.

---
 src/core/cpu_manager.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index c0974ee383..b7c2a78322 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -227,7 +227,6 @@ void CpuManager::SingleCoreRunGuestLoop() {
         auto& arm_interface = thread->ArmInterface();
         system.EnterDynarmicProfile();
         if (!physical_core->IsInterrupted()) {
-            system.CoreTiming().ResetTicks();
             arm_interface.Run();
             physical_core = &kernel.CurrentPhysicalCore();
         }
@@ -285,6 +284,7 @@ void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
         current_thread->SetPhantomMode(false);
     }
     current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
+    system.CoreTiming().ResetTicks();
     scheduler.Unload();
     auto& next_scheduler = system.Kernel().Scheduler(current_core);
     Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());

From 99fd99eaa2000d84bae89bfe35499dcf5c101548 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 2 Apr 2020 13:27:08 -0400
Subject: [PATCH 104/122] CoreTiming/CycleTimer: Correct Idling.

---
 src/core/core_timing.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 12e9e60a4a..b02119494e 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -136,8 +136,11 @@ void CoreTiming::AddTicks(u64 ticks) {
 
 void CoreTiming::Idle() {
     if (!event_queue.empty()) {
-        u64 next_event_time = event_queue.front().time;
-        ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
+        const u64 next_event_time = event_queue.front().time;
+        const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
+        if (next_ticks > ticks) {
+            ticks = next_ticks;
+        }
         return;
     }
     ticks += 1000U;

From 6b7649128563a69afa08cac34c14a53404d4f1dd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 3 Apr 2020 11:58:43 -0400
Subject: [PATCH 105/122] General: Correct rebase, sync gpu and context
 management.

---
 src/core/core.cpp             |  3 +--
 src/core/cpu_manager.cpp      | 11 ++---------
 src/core/cpu_manager.h        |  7 -------
 src/video_core/gpu.h          |  6 ++++++
 src/video_core/gpu_asynch.cpp |  9 ++++++++-
 src/video_core/gpu_asynch.h   |  2 ++
 src/video_core/gpu_synch.cpp  |  8 +++++++-
 src/video_core/gpu_synch.h    |  2 ++
 src/yuzu/bootmanager.cpp      | 29 +++++++++++++++++------------
 9 files changed, 45 insertions(+), 32 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 40eea297ef..3393c33eb6 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -151,7 +151,6 @@ struct System::Impl {
         cpu_manager.SetMulticore(is_multicore);
         cpu_manager.SetAsyncGpu(is_async_gpu);
         core_timing.SetMulticore(is_multicore);
-        cpu_manager.SetRenderWindow(emu_window);
 
         core_timing.Initialize([&system]() { system.RegisterHostThread(); });
         kernel.Initialize();
@@ -435,7 +434,7 @@ bool System::IsPoweredOn() const {
 }
 
 void System::PrepareReschedule() {
-    //impl->CurrentPhysicalCore().Stop();
+    // impl->CurrentPhysicalCore().Stop();
 }
 
 void System::PrepareReschedule(const u32 core_index) {
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index b7c2a78322..63c5788520 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -9,12 +9,12 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/cpu_manager.h"
-#include "core/frontend/emu_window.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
+#include "video_core/gpu.h"
 
 namespace Core {
 
@@ -25,10 +25,6 @@ void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
     cpu_manager.RunThread(core);
 }
 
-void CpuManager::SetRenderWindow(Core::Frontend::EmuWindow& render_window) {
-    this->render_window = &render_window;
-}
-
 void CpuManager::Initialize() {
     running_mode = true;
     if (is_multicore) {
@@ -354,7 +350,7 @@ void CpuManager::RunThread(std::size_t core) {
         data.is_running = false;
         data.enter_barrier->Wait();
         if (sc_sync_first_use) {
-            render_window->MakeCurrent();
+            system.GPU().ObtainContext();
             sc_sync_first_use = false;
         }
         auto& scheduler = system.Kernel().CurrentScheduler();
@@ -366,9 +362,6 @@ void CpuManager::RunThread(std::size_t core) {
         data.exit_barrier->Wait();
         data.is_paused = false;
     }
-    if (sc_sync) {
-        render_window->DoneCurrent();
-    }
     /// Time to cleanup
     data.host_context->Exit();
     data.enter_barrier.reset();
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index ae55d6427e..35929ed94f 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -16,10 +16,6 @@ class Event;
 class Fiber;
 } // namespace Common
 
-namespace Core::Frontend {
-class EmuWindow;
-} // namespace Core::Frontend
-
 namespace Core {
 
 class System;
@@ -61,8 +57,6 @@ public:
         return current_core.load();
     }
 
-    void SetRenderWindow(Core::Frontend::EmuWindow& render_window);
-
 private:
     static void GuestThreadFunction(void* cpu_manager);
     static void GuestRewindFunction(void* cpu_manager);
@@ -106,7 +100,6 @@ private:
     std::size_t preemption_count{};
     std::size_t idle_count{};
     static constexpr std::size_t max_cycle_runs = 5;
-    Core::Frontend::EmuWindow* render_window;
 
     System& system;
 };
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a1b4c305ca..2c42483bd8 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -284,6 +284,12 @@ public:
     /// core timing events.
     virtual void Start() = 0;
 
+    /// Obtain the CPU Context
+    virtual void ObtainContext() = 0;
+
+    /// Release the CPU Context
+    virtual void ReleaseContext() = 0;
+
     /// Push GPU command entries to be processed
     virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
 
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab436..7b855f63e6 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa
 GPUAsynch::~GPUAsynch() = default;
 
 void GPUAsynch::Start() {
-    cpu_context->MakeCurrent();
     gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
 }
 
+void GPUAsynch::ObtainContext() {
+    cpu_context->MakeCurrent();
+}
+
+void GPUAsynch::ReleaseContext() {
+    cpu_context->DoneCurrent();
+}
+
 void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
     gpu_thread.SubmitList(std::move(entries));
 }
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 5176586127..15e9f1d380 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,8 @@ public:
     ~GPUAsynch() override;
 
     void Start() override;
+    void ObtainContext() override;
+    void ReleaseContext() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
     void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a2..aaeb9811d6 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase
 
 GPUSynch::~GPUSynch() = default;
 
-void GPUSynch::Start() {
+void GPUSynch::Start() {}
+
+void GPUSynch::ObtainContext() {
     context->MakeCurrent();
 }
 
+void GPUSynch::ReleaseContext() {
+    context->DoneCurrent();
+}
+
 void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
     dma_pusher->Push(std::move(entries));
     dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d7..762c20aa55 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,8 @@ public:
     ~GPUSynch() override;
 
     void Start() override;
+    void ObtainContext() override;
+    void ReleaseContext() override;
     void PushGPUEntries(Tegra::CommandList&& entries) override;
     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
     void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index b1305a9392..079a254e0d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -45,24 +45,29 @@ void EmuThread::run() {
     MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
 
+    auto& system = Core::System::GetInstance();
+
+    system.RegisterHostThread();
+
+    auto& gpu = system.GPU();
+
     // Main process has been loaded. Make the context current to this thread and begin GPU and CPU
     // execution.
-    Core::System::GetInstance().GPU().Start();
+    gpu.Start();
+
+    gpu.ObtainContext();
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
 
-    Core::System::GetInstance().RegisterHostThread();
-
-    context.MakeCurrent();
-
-    Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
+    system.Renderer().Rasterizer().LoadDiskResources(
         stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
             emit LoadProgress(stage, value, total);
         });
 
     emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
 
-    context.DoneCurrent();
+    gpu.ReleaseContext();
+
 
     // Holds whether the cpu was running during the last iteration,
     // so that the DebugModeLeft signal can be emitted before the
@@ -75,18 +80,18 @@ void EmuThread::run() {
             }
 
             running_guard = true;
-            Core::System::ResultStatus result = Core::System::GetInstance().Run();
+            Core::System::ResultStatus result = system.Run();
             if (result != Core::System::ResultStatus::Success) {
                 running_guard = false;
                 this->SetRunning(false);
-                emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
+                emit ErrorThrown(result, system.GetStatusDetails());
             }
             running_wait.Wait();
-            result = Core::System::GetInstance().Pause();
+            result = system.Pause();
             if (result != Core::System::ResultStatus::Success) {
                 running_guard = false;
                 this->SetRunning(false);
-                emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
+                emit ErrorThrown(result, system.GetStatusDetails());
             }
             running_guard = false;
 
@@ -103,7 +108,7 @@ void EmuThread::run() {
     }
 
     // Shutdown the core emulation
-    Core::System::GetInstance().Shutdown();
+    system.Shutdown();
 
 #if MICROPROFILE_ENABLED
     MicroProfileOnThreadExit();

From a7db2b07fcaeeed20536d5de2b88fde0dc0c937c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 3 Apr 2020 14:11:04 -0400
Subject: [PATCH 106/122] Dynarmic Interface: don't clear cache if JIT has not
 been created.

---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 3 +++
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index b094812318..bd0aa3a7b9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -200,6 +200,9 @@ void ARM_Dynarmic_32::PrepareReschedule() {
 }
 
 void ARM_Dynarmic_32::ClearInstructionCache() {
+    if (!jit) {
+        return;
+    }
     jit->ClearCache();
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 986b4ce92a..56386c1d05 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -301,6 +301,9 @@ void ARM_Dynarmic_64::PrepareReschedule() {
 }
 
 void ARM_Dynarmic_64::ClearInstructionCache() {
+    if (!jit) {
+        return;
+    }
     jit->ClearCache();
 }
 

From 323740f0e7aa2dbbe8c33eb4cb1ad350fbd0b398 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 5 Apr 2020 09:48:53 -0400
Subject: [PATCH 107/122] General: Tune the priority of main emulation threads
 so they have higher priority than less important helper threads.

---
 src/common/thread.cpp                         | 46 +++++++++++++++++++
 src/common/thread.h                           |  9 ++++
 src/core/core_timing.cpp                      |  1 +
 src/core/cpu_manager.cpp                      |  1 +
 src/video_core/gpu_thread.cpp                 |  1 +
 .../renderer_vulkan/vk_scheduler.cpp          |  2 +
 6 files changed, 60 insertions(+)

diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index c9684aed93..33c8437f52 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -25,6 +25,52 @@
 
 namespace Common {
 
+#ifdef _WIN32
+
+void SetCurrentThreadPriority(ThreadPriority new_priority) {
+    auto handle = GetCurrentThread();
+    int windows_priority = 0;
+    switch (new_priority) {
+        case ThreadPriority::Low:
+            windows_priority = THREAD_PRIORITY_BELOW_NORMAL;
+            break;
+        case ThreadPriority::Normal:
+            windows_priority = THREAD_PRIORITY_NORMAL;
+            break;
+        case ThreadPriority::High:
+            windows_priority = THREAD_PRIORITY_ABOVE_NORMAL;
+            break;
+        case ThreadPriority::VeryHigh:
+            windows_priority = THREAD_PRIORITY_HIGHEST;
+            break;
+        default:
+            windows_priority = THREAD_PRIORITY_NORMAL;
+            break;
+    }
+    SetThreadPriority(handle, windows_priority);
+}
+
+#else
+
+void SetCurrentThreadPriority(ThreadPriority new_priority) {
+    pthread_t this_thread = pthread_self();
+
+    s32 max_prio = sched_get_priority_max(SCHED_OTHER);
+    s32 min_prio = sched_get_priority_min(SCHED_OTHER);
+    u32 level = static_cast<u32>(new_priority) + 1;
+
+    struct sched_param params;
+    if (max_prio > min_prio) {
+        params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4;
+    } else {
+        params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
+    }
+
+    pthread_setschedparam(this_thread, SCHED_OTHER, &params);
+}
+
+#endif
+
 #ifdef _MSC_VER
 
 // Sets the debugger-visible name of the current thread.
diff --git a/src/common/thread.h b/src/common/thread.h
index 127cc7e233..52b3594131 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -86,6 +86,15 @@ private:
     std::size_t generation = 0; // Incremented once each time the barrier is used
 };
 
+enum class ThreadPriority : u32 {
+    Low = 0,
+    Normal = 1,
+    High = 2,
+    VeryHigh = 3,
+};
+
+void SetCurrentThreadPriority(ThreadPriority new_priority);
+
 void SetCurrentThreadName(const char* name);
 
 } // namespace Common
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index b02119494e..032b29e339 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -48,6 +48,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
     std::string name = "yuzu:HostTiming";
     MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
     instance.on_thread_init();
     instance.ThreadLoop();
 }
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 63c5788520..32afcf3ae9 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -337,6 +337,7 @@ void CpuManager::RunThread(std::size_t core) {
     }
     MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     auto& data = core_data[core];
     data.enter_barrier = std::make_unique<Common::Event>();
     data.exit_barrier = std::make_unique<Common::Event>();
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 323185bfcd..738c6f0c1f 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -22,6 +22,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
     std::string name = "yuzu:GPU";
     MicroProfileOnThreadCreate(name.c_str());
     Common::SetCurrentThreadName(name.c_str());
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     system.RegisterHostThread();
 
     // Wait for first GPU command before acquiring the window context
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e5..56524e6f30 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
 #include <utility>
 
 #include "common/microprofile.h"
+#include "common/thread.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
 }
 
 void VKScheduler::WorkerThread() {
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
     std::unique_lock lock{mutex};
     do {
         cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });

From 2a40500903fc9684ee2b07c0c757fa07ee81b374 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 12 Apr 2020 19:25:53 -0400
Subject: [PATCH 108/122] Core: Split Microprofile Dynarmic timing per Core

---
 src/core/core.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 3393c33eb6..50656697fe 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,7 +51,10 @@
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
-MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU0, "ARM JIT", "Dynarmic CPU 0", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU1, "ARM JIT", "Dynarmic CPU 1", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU2, "ARM JIT", "Dynarmic CPU 2", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU3, "ARM JIT", "Dynarmic CPU 3", MP_RGB(255, 64, 64));
 
 namespace Core {
 
@@ -189,6 +192,11 @@ struct System::Impl {
         is_powered_on = true;
         exit_lock = false;
 
+        microprofile_dynarmic[0] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU0);
+        microprofile_dynarmic[1] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU1);
+        microprofile_dynarmic[2] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU2);
+        microprofile_dynarmic[3] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU3);
+
         LOG_DEBUG(Core, "Initialized OK");
 
         return ResultStatus::Success;
@@ -396,6 +404,7 @@ struct System::Impl {
     bool is_async_gpu{};
 
     std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
+    std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_dynarmic{};
 };
 
 System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -747,12 +756,12 @@ void System::RegisterHostThread() {
 
 void System::EnterDynarmicProfile() {
     std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    impl->dynarmic_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(ARM_Jit_Dynarmic));
+    impl->dynarmic_ticks[core] = MicroProfileEnter(impl->microprofile_dynarmic[core]);
 }
 
 void System::ExitDynarmicProfile() {
     std::size_t core = impl->kernel.GetCurrentHostThreadID();
-    MicroProfileLeave(MICROPROFILE_TOKEN(ARM_Jit_Dynarmic), impl->dynarmic_ticks[core]);
+    MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]);
 }
 
 } // namespace Core

From 6a9c53912ac397387f1a7ae3804d5bb6d76bdcac Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 23 Apr 2020 12:58:41 -0400
Subject: [PATCH 109/122] CoreTiming: Correct rebase bugs and other
 miscellaneous things.

---
 src/core/core_timing.cpp       |  2 ++
 src/tests/core/core_timing.cpp | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 032b29e339..1aa89a1ccd 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -56,6 +56,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
 void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
     on_thread_init = std::move(on_thread_init_);
     event_fifo_id = 0;
+    shutting_down = false;
     ticks = 0;
     const auto empty_timed_callback = [](u64, s64) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
@@ -79,6 +80,7 @@ void CoreTiming::Shutdown() {
 
 void CoreTiming::Pause(bool is_paused) {
     paused = is_paused;
+    pause_event.Set();
 }
 
 void CoreTiming::SyncPause(bool is_paused) {
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 21a5840fb5..5aa339731d 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -16,10 +16,10 @@
 
 namespace {
 // Numbers are chosen randomly to make sure the correct one is given.
-static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
-static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
-static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
-static std::array<s64, 5> delays{};
+constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
+std::array<s64, 5> delays{};
 
 std::bitset<CB_IDS.size()> callbacks_ran_flags;
 u64 expected_callback = 0;
@@ -49,6 +49,8 @@ struct ScopeInit final {
     Core::Timing::CoreTiming core_timing;
 };
 
+} // Anonymous namespace
+
 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;

From 50a25daf1d5e8a358cab1a1a54f76a487e795653 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 28 Apr 2020 23:15:04 -0400
Subject: [PATCH 110/122] ARMInterface: Correct rebase errors.

---
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 4 ++--
 src/core/arm/unicorn/arm_unicorn.cpp      | 4 ++--
 src/core/arm/unicorn/arm_unicorn.h        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 56386c1d05..94d96d4d8e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -212,8 +212,8 @@ void ARM_Dynarmic_64::Step() {
 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
                                  bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
                                  std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler, uses_wall_clock},
-      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handler,
+    : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
+      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers,
                                                                       uses_wall_clock,
                                                                       ARM_Unicorn::Arch::AArch64,
                                                                       core_index},
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 1cb71942b9..9f9690454a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -63,9 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return false;
 }
 
-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers,
                          bool uses_wall_clock, Arch architecture, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler, uses_wall_clock}, core_index{core_index} {
+    : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {
     const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
     CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
 
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index a01751e651..9b7d7f6c25 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -20,7 +20,7 @@ public:
         AArch64, // 64-bit ARM
     };
 
-    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+    explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers,
                          bool uses_wall_clock, Arch architecture, std::size_t core_index);
     ~ARM_Unicorn() override;
 

From 4bf50eb240d1191705f7e67e2d74dd3cd636ad88 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 2 May 2020 22:03:09 -0400
Subject: [PATCH 111/122] ARMInterface/Externals: Update dynarmic and fit to
 latest version.

---
 externals/dynarmic                        |  2 +-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/externals/dynarmic b/externals/dynarmic
index e7166e8ba7..f4922a97f6 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit e7166e8ba74d7b9c85e87afc0aaf667e7e84cfe0
+Subproject commit f4922a97f6eb4b93decfbd80a881a7eac89d6890
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 94d96d4d8e..ad0d481738 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -195,7 +195,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
         config.enable_fast_dispatch = false;
     }
 
-    // CNTPCT uses wall clock.
+    // Timing
     config.wall_clock_cntpct = uses_wall_clock;
 
     return std::make_shared<Dynarmic::A64::Jit>(config);
@@ -271,7 +271,7 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
 }
 
 void ARM_Dynarmic_64::ChangeProcessorId(std::size_t new_core_id) {
-    jit->ChangeProcessorId(new_core_id);
+    jit->ChangeProcessorID(new_core_id);
 }
 
 void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
@@ -358,31 +358,31 @@ void DynarmicExclusiveMonitor::ClearExclusive() {
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
-    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, 1, [&](u8 expected) -> bool {
+    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
         return memory.WriteExclusive8(vaddr, value, expected);
     });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
-    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, 2, [&](u16 expected) -> bool {
+    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
         return memory.WriteExclusive16(vaddr, value, expected);
     });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
-    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, 4, [&](u32 expected) -> bool {
+    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
         return memory.WriteExclusive32(vaddr, value, expected);
     });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
-    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, 8, [&](u64 expected) -> bool {
+    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
         return memory.WriteExclusive64(vaddr, value, expected);
     });
 }
 
 bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
-    return monitor.DoExclusiveOperation<u128>(core_index, vaddr, 16, [&](u128 expected) -> bool {
+    return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool {
         return memory.WriteExclusive128(vaddr, value, expected);
     });
 }

From 1be066e06bfa64304917ef9ebeadfda7184ddd30 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 8 May 2020 18:53:13 -0400
Subject: [PATCH 112/122] Clang Format.

---
 src/common/atomic_ops.cpp              | 14 ++++++------
 src/common/atomic_ops.h                |  2 +-
 src/common/thread.cpp                  | 30 +++++++++++++-------------
 src/core/arm/unicorn/arm_unicorn.cpp   |  4 ++--
 src/core/arm/unicorn/arm_unicorn.h     |  4 ++--
 src/core/core.h                        |  2 --
 src/core/hle/kernel/mutex.cpp          | 11 +++++-----
 src/core/hle/kernel/mutex.h            |  3 ++-
 src/core/hle/kernel/physical_core.h    |  7 +++---
 src/core/hle/kernel/server_session.cpp |  2 +-
 src/yuzu/bootmanager.cpp               |  1 -
 src/yuzu/bootmanager.h                 |  3 ++-
 src/yuzu/debugger/wait_tree.cpp        |  5 +++--
 src/yuzu/main.cpp                      |  9 +++++---
 14 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
index 65cdfb4fd5..6b22361141 100644
--- a/src/common/atomic_ops.cpp
+++ b/src/common/atomic_ops.cpp
@@ -35,26 +35,26 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
 }
 
 bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
-    return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], (__int64*)expected.data()) != 0;
+    return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0],
+                                          (__int64*)expected.data()) != 0;
 }
 
-
 #else
 
 bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
-    return __sync_bool_compare_and_swap (pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, value, expected);
 }
 
 bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
-    return __sync_bool_compare_and_swap (pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, value, expected);
 }
 
 bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
-    return __sync_bool_compare_and_swap (pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, value, expected);
 }
 
 bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
-    return __sync_bool_compare_and_swap (pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, value, expected);
 }
 
 bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
@@ -62,7 +62,7 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
     unsigned __int128 expected_a;
     std::memcpy(&value_a, value.data(), sizeof(u128));
     std::memcpy(&expected_a, expected.data(), sizeof(u128));
-    return __sync_bool_compare_and_swap ((unsigned __int128*)pointer, value_a, expected_a);
+    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, value_a, expected_a);
 }
 
 #endif
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
index 22cb3a402b..e6181d5218 100644
--- a/src/common/atomic_ops.h
+++ b/src/common/atomic_ops.h
@@ -8,7 +8,7 @@
 
 namespace Common {
 
-bool AtomicCompareAndSwap(u8 volatile * pointer, u8 value, u8 expected);
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected);
 bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected);
 bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected);
 bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected);
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 33c8437f52..8e5935e6aa 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -31,21 +31,21 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
     auto handle = GetCurrentThread();
     int windows_priority = 0;
     switch (new_priority) {
-        case ThreadPriority::Low:
-            windows_priority = THREAD_PRIORITY_BELOW_NORMAL;
-            break;
-        case ThreadPriority::Normal:
-            windows_priority = THREAD_PRIORITY_NORMAL;
-            break;
-        case ThreadPriority::High:
-            windows_priority = THREAD_PRIORITY_ABOVE_NORMAL;
-            break;
-        case ThreadPriority::VeryHigh:
-            windows_priority = THREAD_PRIORITY_HIGHEST;
-            break;
-        default:
-            windows_priority = THREAD_PRIORITY_NORMAL;
-            break;
+    case ThreadPriority::Low:
+        windows_priority = THREAD_PRIORITY_BELOW_NORMAL;
+        break;
+    case ThreadPriority::Normal:
+        windows_priority = THREAD_PRIORITY_NORMAL;
+        break;
+    case ThreadPriority::High:
+        windows_priority = THREAD_PRIORITY_ABOVE_NORMAL;
+        break;
+    case ThreadPriority::VeryHigh:
+        windows_priority = THREAD_PRIORITY_HIGHEST;
+        break;
+    default:
+        windows_priority = THREAD_PRIORITY_NORMAL;
+        break;
     }
     SetThreadPriority(handle, windows_priority);
 }
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 9f9690454a..35e8f42e86 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -63,8 +63,8 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
     return false;
 }
 
-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers,
-                         bool uses_wall_clock, Arch architecture, std::size_t core_index)
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+                         Arch architecture, std::size_t core_index)
     : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {
     const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
     CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 9b7d7f6c25..8ace8b86fa 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -20,8 +20,8 @@ public:
         AArch64, // 64-bit ARM
     };
 
-    explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers,
-                         bool uses_wall_clock, Arch architecture, std::size_t core_index);
+    explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+                         Arch architecture, std::size_t core_index);
     ~ARM_Unicorn() override;
 
     void SetPC(u64 pc) override;
diff --git a/src/core/core.h b/src/core/core.h
index 87df79d573..d2d1fcc5be 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -148,8 +148,6 @@ public:
      */
     ResultStatus Pause();
 
-
-
     /**
      * Step the CPU one instruction
      * @return Result status, indicating whether or not the operation succeeded.
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 32dc1ffaef..8f6c944d17 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -9,7 +9,6 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -126,11 +125,11 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
 
 std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
                                                              VAddr address) {
-     // The mutex address must be 4-byte aligned
-     if ((address % sizeof(u32)) != 0) {
-         LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
-         return {ERR_INVALID_ADDRESS, nullptr};
-     }
+    // The mutex address must be 4-byte aligned
+    if ((address % sizeof(u32)) != 0) {
+        LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
+        return {ERR_INVALID_ADDRESS, nullptr};
+    }
 
     auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
     if (new_owner == nullptr) {
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index bce06ecea9..3b81dc3dfa 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -29,7 +29,8 @@ public:
                           Handle requesting_thread_handle);
 
     /// Unlocks a mutex for owner at address
-    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner, VAddr address);
+    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner,
+                                                          VAddr address);
 
     /// Releases the mutex at the specified address.
     ResultCode Release(VAddr address);
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 751b994a78..85f6dec055 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -10,7 +10,7 @@
 #include "core/arm/cpu_interrupt_handler.h"
 
 namespace Common {
-    class SpinLock;
+class SpinLock;
 }
 
 namespace Kernel {
@@ -27,9 +27,8 @@ namespace Kernel {
 
 class PhysicalCore {
 public:
-    PhysicalCore(Core::System& system, std::size_t id,
-                               Kernel::Scheduler& scheduler,
-                               Core::CPUInterruptHandler& interrupt_handler);
+    PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
+                 Core::CPUInterruptHandler& interrupt_handler);
     ~PhysicalCore();
 
     PhysicalCore(const PhysicalCore&) = delete;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index e988a3f222..7b23a6889e 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -17,9 +17,9 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
-#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/memory.h"
 
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 079a254e0d..2467c3e159 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -68,7 +68,6 @@ void EmuThread::run() {
 
     gpu.ReleaseContext();
 
-
     // Holds whether the cpu was running during the last iteration,
     // so that the DebugModeLeft signal can be emitted before the
     // next execution step
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 768568b3e9..6c59b4d5c7 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -62,7 +62,8 @@ public:
         if (!running) {
             running_wait.Set();
             /// Wait until effectively paused
-            while (running_guard);
+            while (running_guard)
+                ;
         }
     }
 
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 0226ae2e28..9bb0a01097 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -127,11 +127,12 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
         return list;
     }
 
-    auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), thread.GetContext64());
+    auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(),
+                                                                  thread.GetContext64());
 
     for (auto& entry : backtrace) {
         std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address,
-                  entry.original_address, entry.offset, entry.name);
+                                    entry.original_address, entry.offset, entry.name);
         list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s)));
     }
 
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 0e4e4b83f0..c187a18ad4 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -520,7 +520,8 @@ void GMainWindow::InitializeWidgets() {
         if (emulation_running) {
             return;
         }
-        bool is_async = !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+        bool is_async =
+            !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
         Settings::values.use_asynchronous_gpu_emulation = is_async;
         async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
         Settings::Apply();
@@ -538,7 +539,8 @@ void GMainWindow::InitializeWidgets() {
             return;
         }
         Settings::values.use_multi_core = !Settings::values.use_multi_core;
-        bool is_async = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+        bool is_async =
+            Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
         Settings::values.use_asynchronous_gpu_emulation = is_async;
         async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
         multicore_status_button->setChecked(Settings::values.use_multi_core);
@@ -1933,7 +1935,8 @@ void GMainWindow::OnConfigure() {
 
     dock_status_button->setChecked(Settings::values.use_docked_mode);
     multicore_status_button->setChecked(Settings::values.use_multi_core);
-    Settings::values.use_asynchronous_gpu_emulation = Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+    Settings::values.use_asynchronous_gpu_emulation =
+        Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
     async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
 
 #ifdef HAS_VULKAN

From 57f04b5931e5536b7702f4c695ae5c1092d02718 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 9 May 2020 15:02:23 -0400
Subject: [PATCH 113/122] YuzuQT: Hide Speed UI on Multicore.

---
 src/yuzu/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index c187a18ad4..dc8e0ffa24 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -2065,7 +2065,7 @@ void GMainWindow::UpdateStatusBar() {
     game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0));
     emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
 
-    emu_speed_label->setVisible(true);
+    emu_speed_label->setVisible(!Settings::values.use_multi_core);
     game_fps_label->setVisible(true);
     emu_frametime_label->setVisible(true);
 }

From 8b7e0c345ff1e5c9baeb45c9a71e6f8ca4ea0167 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 13 May 2020 14:17:34 -0400
Subject: [PATCH 114/122] Kernel: Correct Host Context on Threads and
 Scheduler.

---
 src/core/hle/kernel/scheduler.cpp | 16 ++++++++--------
 src/core/hle/kernel/scheduler.h   |  2 +-
 src/core/hle/kernel/thread.cpp    |  2 +-
 src/core/hle/kernel/thread.h      |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 43c924fa02..61b8a396a8 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -736,15 +736,15 @@ void Scheduler::SwitchContext() {
         previous_thread->context_guard.unlock();
     }
 
-    std::shared_ptr<Common::Fiber> old_context;
+    std::shared_ptr<Common::Fiber>* old_context;
     if (previous_thread != nullptr) {
-        old_context = previous_thread->GetHostContext();
+        old_context = &previous_thread->GetHostContext();
     } else {
-        old_context = idle_thread->GetHostContext();
+        old_context = &idle_thread->GetHostContext();
     }
     guard.unlock();
 
-    Common::Fiber::YieldTo(old_context, switch_fiber);
+    Common::Fiber::YieldTo(*old_context, switch_fiber);
     /// When a thread wakes up, the scheduler may have changed to other in another core.
     auto& next_scheduler = system.Kernel().CurrentScheduler();
     next_scheduler.SwitchContextStep2();
@@ -774,13 +774,13 @@ void Scheduler::SwitchToCurrent() {
                     break;
                 }
             }
-            std::shared_ptr<Common::Fiber> next_context;
+            std::shared_ptr<Common::Fiber>* next_context;
             if (current_thread != nullptr) {
-                next_context = current_thread->GetHostContext();
+                next_context = &current_thread->GetHostContext();
             } else {
-                next_context = idle_thread->GetHostContext();
+                next_context = &idle_thread->GetHostContext();
             }
-            Common::Fiber::YieldTo(switch_fiber, next_context);
+            Common::Fiber::YieldTo(switch_fiber, *next_context);
         }
     }
 }
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 10dc4b832a..3481071603 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -236,7 +236,7 @@ public:
 
     void OnThreadStart();
 
-    std::shared_ptr<Common::Fiber> ControlContext() {
+    std::shared_ptr<Common::Fiber>& ControlContext() {
         return switch_fiber;
     }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index fba2a9c85a..2b10926978 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -150,7 +150,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
     context.fpcr = 0;
 }
 
-std::shared_ptr<Common::Fiber> Thread::GetHostContext() const {
+std::shared_ptr<Common::Fiber>& Thread::GetHostContext() {
     return host_context;
 }
 
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 3ae0df6ef8..c0342c4625 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -342,7 +342,7 @@ public:
         was_running = value;
     }
 
-    std::shared_ptr<Common::Fiber> GetHostContext() const;
+    std::shared_ptr<Common::Fiber>& GetHostContext();
 
     ThreadStatus GetStatus() const {
         return status;

From 5e3b524e869efd269effa1f47759e205c7cc1af0 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 14 May 2020 14:44:03 -0400
Subject: [PATCH 115/122] Common/AtomicOps: Correct GCC Intrinsic argument
 ordering.

---
 src/common/atomic_ops.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
index 6b22361141..1098e21ffe 100644
--- a/src/common/atomic_ops.cpp
+++ b/src/common/atomic_ops.cpp
@@ -42,19 +42,19 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
 #else
 
 bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
-    return __sync_bool_compare_and_swap(pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, expected, value);
 }
 
 bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
-    return __sync_bool_compare_and_swap(pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, expected, value);
 }
 
 bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
-    return __sync_bool_compare_and_swap(pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, expected, value);
 }
 
 bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
-    return __sync_bool_compare_and_swap(pointer, value, expected);
+    return __sync_bool_compare_and_swap(pointer, expected, value);
 }
 
 bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
@@ -62,7 +62,7 @@ bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
     unsigned __int128 expected_a;
     std::memcpy(&value_a, value.data(), sizeof(u128));
     std::memcpy(&expected_a, expected.data(), sizeof(u128));
-    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, value_a, expected_a);
+    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
 }
 
 #endif

From 2b183f879832a918c9a6a01acb394632012a549d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 14 May 2020 14:48:50 -0400
Subject: [PATCH 116/122] ARMDynarmicInterface: Correct GCC Build Errors.

---
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +++---
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index bd0aa3a7b9..2dd913077e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -90,11 +90,11 @@ public:
     u64 GetTicksRemaining() override {
         if (parent.uses_wall_clock) {
             if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-                return std::max<s64>(1000U, 0);
+                return 1000U;
             }
-            return 0ULL;
+            return 0U;
         }
-        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
+        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
     }
 
     ARM_Dynarmic_32& parent;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index ad0d481738..987c633af1 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -143,11 +143,11 @@ public:
     u64 GetTicksRemaining() override {
         if (parent.uses_wall_clock) {
             if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-                return std::max<s64>(1000U, 0);
+                return 1000U;
             }
-            return 0ULL;
+            return 0U;
         }
-        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
+        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
     }
 
     u64 GetCNTPCT() override {

From 19ccc9ffa1b1026cce0efd891a0532abc129ce0e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 14 May 2020 16:17:44 -0400
Subject: [PATCH 117/122] Tests/CoreTiming: Correct host timing tests.

---
 src/tests/core/core_timing.cpp |  29 ++++---
 src/tests/core/host_timing.cpp | 142 ---------------------------------
 2 files changed, 14 insertions(+), 157 deletions(-)
 delete mode 100644 src/tests/core/host_timing.cpp

diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 5aa339731d..e66db1940e 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -23,7 +23,6 @@ std::array<s64, 5> delays{};
 
 std::bitset<CB_IDS.size()> callbacks_ran_flags;
 u64 expected_callback = 0;
-s64 lateness = 0;
 
 template <unsigned int IDX>
 void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
@@ -35,8 +34,6 @@ void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
     ++expected_callback;
 }
 
-u64 callbacks_done = 0;
-
 struct ScopeInit final {
     ScopeInit() {
         core_timing.SetMulticore(true);
@@ -49,6 +46,20 @@ struct ScopeInit final {
     Core::Timing::CoreTiming core_timing;
 };
 
+#pragma optimize("", off)
+
+u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) {
+    u64 start = core_timing.GetGlobalTimeNs().count();
+    u64 placebo = 0;
+    for (std::size_t i = 0; i < 1000; i++) {
+        placebo += core_timing.GetGlobalTimeNs().count();
+    }
+    u64 end = core_timing.GetGlobalTimeNs().count();
+    return (end - start);
+}
+
+#pragma optimize("", on)
+
 } // Anonymous namespace
 
 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
@@ -89,18 +100,6 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
     }
 }
 
-#pragma optimize("", off)
-u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) {
-    u64 start = core_timing.GetGlobalTimeNs().count();
-    u64 placebo = 0;
-    for (std::size_t i = 0; i < 1000; i++) {
-        placebo += core_timing.GetGlobalTimeNs().count();
-    }
-    u64 end = core_timing.GetGlobalTimeNs().count();
-    return (end - start);
-}
-#pragma optimize("", on)
-
 TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") {
     ScopeInit guard;
     auto& core_timing = guard.core_timing;
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
deleted file mode 100644
index 5562540983..0000000000
--- a/src/tests/core/host_timing.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include <catch2/catch.hpp>
-
-#include <array>
-#include <bitset>
-#include <cstdlib>
-#include <memory>
-#include <string>
-
-#include "common/file_util.h"
-#include "core/core.h"
-#include "core/host_timing.h"
-
-// Numbers are chosen randomly to make sure the correct one is given.
-static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
-static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
-static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
-static std::array<s64, 5> delays{};
-
-static std::bitset<CB_IDS.size()> callbacks_ran_flags;
-static u64 expected_callback = 0;
-
-template <unsigned int IDX>
-void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
-    static_assert(IDX < CB_IDS.size(), "IDX out of range");
-    callbacks_ran_flags.set(IDX);
-    REQUIRE(CB_IDS[IDX] == userdata);
-    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
-    delays[IDX] = nanoseconds_late;
-    ++expected_callback;
-}
-
-struct ScopeInit final {
-    ScopeInit() {
-        core_timing.Initialize();
-    }
-    ~ScopeInit() {
-        core_timing.Shutdown();
-    }
-
-    Core::HostTiming::CoreTiming core_timing;
-};
-
-#pragma optimize("", off)
-
-static u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
-    u64 start = core_timing.GetGlobalTimeNs().count();
-    u64 placebo = 0;
-    for (std::size_t i = 0; i < 1000; i++) {
-        placebo += core_timing.GetGlobalTimeNs().count();
-    }
-    u64 end = core_timing.GetGlobalTimeNs().count();
-    return (end - start);
-}
-
-#pragma optimize("", on)
-
-TEST_CASE("HostTiming[BasicOrder]", "[core]") {
-    ScopeInit guard;
-    auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
-    };
-
-    expected_callback = 0;
-
-    core_timing.SyncPause(true);
-
-    u64 one_micro = 1000U;
-    for (std::size_t i = 0; i < events.size(); i++) {
-        u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
-    }
-    /// test pause
-    REQUIRE(callbacks_ran_flags.none());
-
-    core_timing.Pause(false); // No need to sync
-
-    while (core_timing.HasPendingEvents())
-        ;
-
-    REQUIRE(callbacks_ran_flags.all());
-
-    for (std::size_t i = 0; i < delays.size(); i++) {
-        const double delay = static_cast<double>(delays[i]);
-        const double micro = delay / 1000.0f;
-        const double mili = micro / 1000.0f;
-        printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
-    }
-}
-
-TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
-    ScopeInit guard;
-    auto& core_timing = guard.core_timing;
-    std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
-        Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
-        Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
-        Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
-        Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
-        Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
-    };
-
-    core_timing.SyncPause(true);
-    core_timing.SyncPause(false);
-
-    expected_callback = 0;
-
-    u64 start = core_timing.GetGlobalTimeNs().count();
-    u64 one_micro = 1000U;
-    for (std::size_t i = 0; i < events.size(); i++) {
-        u64 order = calls_order[i];
-        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
-    }
-    u64 end = core_timing.GetGlobalTimeNs().count();
-    const double scheduling_time = static_cast<double>(end - start);
-    const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
-
-    while (core_timing.HasPendingEvents())
-        ;
-
-    REQUIRE(callbacks_ran_flags.all());
-
-    for (std::size_t i = 0; i < delays.size(); i++) {
-        const double delay = static_cast<double>(delays[i]);
-        const double micro = delay / 1000.0f;
-        const double mili = micro / 1000.0f;
-        printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
-    }
-
-    const double micro = scheduling_time / 1000.0f;
-    const double mili = micro / 1000.0f;
-    printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
-    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
-           timer_time / 1000000.f);
-}

From 3c2924e4d2d5d085bf2dbce78ce04f231981a2c8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 18 May 2020 13:08:53 -0400
Subject: [PATCH 118/122] Common/NativeClockx86: Reduce native clock accuracy
 further.

---
 src/common/x64/native_clock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index e853094d20..891a3bbfd8 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -34,7 +34,7 @@ private:
     /// value used to reduce the native clocks accuracy as some apss rely on
     /// undefined behavior where the level of accuracy in the clock shouldn't
     /// be higher.
-    static constexpr u64 inaccuracy_mask = ~(0x100 - 1);
+    static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
 
     SpinLock rtsc_serialize{};
     u64 last_measure{};

From 4ea6508657f588e312749dc7881832bb5afd2e74 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 May 2020 13:31:47 -0400
Subject: [PATCH 119/122] Externals: Update Dynarmic.

---
 externals/dynarmic | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/externals/dynarmic b/externals/dynarmic
index f4922a97f6..3a50d444dc 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit f4922a97f6eb4b93decfbd80a881a7eac89d6890
+Subproject commit 3a50d444dcb66c868528dd12057f63dc623d09a5

From b0517b0e7ab0acd847eedca6411d08800d57ea71 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 May 2020 15:00:17 -0400
Subject: [PATCH 120/122] Services/NvFlinger: Do vSync in a sepparate thread on
 Multicore.

---
 src/core/core.cpp                            |  8 +++-
 src/core/core.h                              |  3 ++
 src/core/hle/service/nvflinger/nvflinger.cpp | 49 ++++++++++++++++++--
 src/core/hle/service/nvflinger/nvflinger.h   | 14 ++++++
 4 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 50656697fe..8256ec0fc5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -294,8 +294,6 @@ struct System::Impl {
         service_manager.reset();
         cheat_engine.reset();
         telemetry_session.reset();
-        perf_stats.reset();
-        gpu_core.reset();
         device_memory.reset();
 
         // Close all CPU/threading state
@@ -307,6 +305,8 @@ struct System::Impl {
 
         // Close app loader
         app_loader.reset();
+        gpu_core.reset();
+        perf_stats.reset();
 
         // Clear all applets
         applet_manager.ClearAll();
@@ -764,4 +764,8 @@ void System::ExitDynarmicProfile() {
     MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]);
 }
 
+bool System::IsMulticore() const {
+    return impl->is_multicore;
+}
+
 } // namespace Core
diff --git a/src/core/core.h b/src/core/core.h
index d2d1fcc5be..133ecb8e1c 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -381,6 +381,9 @@ public:
     /// Exit Dynarmic Microprofile
     void ExitDynarmicProfile();
 
+    /// Tells if system is running on multicore.
+    bool IsMulticore() const;
+
 private:
     System();
 
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index b97f713500..2f44d37794 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
+#include "common/thread.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
@@ -30,6 +31,33 @@ namespace Service::NVFlinger {
 constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60);
 constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30);
 
+void NVFlinger::VSyncThread(NVFlinger& nv_flinger) {
+    nv_flinger.SplitVSync();
+}
+
+void NVFlinger::SplitVSync() {
+    system.RegisterHostThread();
+    std::string name = "yuzu:VSyncThread";
+    MicroProfileOnThreadCreate(name.c_str());
+    Common::SetCurrentThreadName(name.c_str());
+    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+    s64 delay = 0;
+    while (is_running) {
+        guard->lock();
+        const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
+        Compose();
+        const auto ticks = GetNextTicks();
+        const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count();
+        const s64 time_passed = time_end - time_start;
+        const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
+        guard->unlock();
+        if (next_time > 0) {
+            wait_event->WaitFor(std::chrono::nanoseconds{next_time});
+        }
+        delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time;
+    }
+}
+
 NVFlinger::NVFlinger(Core::System& system) : system(system) {
     displays.emplace_back(0, "Default", system);
     displays.emplace_back(1, "External", system);
@@ -47,12 +75,25 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
             this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),
                                                     composition_event);
         });
-
-    system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
+    if (system.IsMulticore()) {
+        is_running = true;
+        wait_event = std::make_unique<Common::Event>();
+        vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
+    } else {
+        system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
+    }
 }
 
 NVFlinger::~NVFlinger() {
-    system.CoreTiming().UnscheduleEvent(composition_event, 0);
+    if (system.IsMulticore()) {
+        is_running = false;
+        wait_event->Set();
+        vsync_thread->join();
+        vsync_thread.reset();
+        wait_event.reset();
+    } else {
+        system.CoreTiming().UnscheduleEvent(composition_event, 0);
+    }
 }
 
 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
@@ -200,10 +241,12 @@ void NVFlinger::Compose() {
 
         auto& gpu = system.GPU();
         const auto& multi_fence = buffer->get().multi_fence;
+        guard->unlock();
         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
             const auto& fence = multi_fence.fences[fence_id];
             gpu.WaitFence(fence.id, fence.value);
         }
+        guard->lock();
 
         MicroProfileFlip();
 
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 02c081494d..4bc3d7ab2e 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,16 +4,22 @@
 
 #pragma once
 
+#include <atomic>
 #include <memory>
 #include <mutex>
 #include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
+#include <thread>
 
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 
+namespace Common {
+class Event;
+} // namespace Common
+
 namespace Core::Timing {
 class CoreTiming;
 struct EventType;
@@ -97,6 +103,10 @@ private:
     /// Finds the layer identified by the specified ID in the desired display.
     const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
 
+    static void VSyncThread(NVFlinger& nv_flinger);
+
+    void SplitVSync();
+
     std::shared_ptr<Nvidia::Module> nvdrv;
 
     std::vector<VI::Display> displays;
@@ -116,6 +126,10 @@ private:
     std::shared_ptr<std::mutex> guard;
 
     Core::System& system;
+
+    std::unique_ptr<std::thread> vsync_thread;
+    std::unique_ptr<Common::Event> wait_event;
+    std::atomic<bool> is_running{};
 };
 
 } // namespace Service::NVFlinger

From 155de533e653a146aa7ead6084656b68760846ff Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 May 2020 17:37:37 -0400
Subject: [PATCH 121/122] Common/Kernel: Corrections and small bug fixing.

---
 src/common/wall_clock.cpp   | 7 +------
 src/core/hle/kernel/svc.cpp | 4 ++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index a46db6bbfb..3afbdb898d 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -68,12 +68,7 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
     const auto& caps = GetCPUCaps();
     u64 rtsc_frequency = 0;
     if (caps.invariant_tsc) {
-        if (caps.base_frequency != 0) {
-            rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
-        }
-        if (rtsc_frequency == 0) {
-            rtsc_frequency = EstimateRDTSCFrequency();
-        }
+        rtsc_frequency = EstimateRDTSCFrequency();
     }
     if (rtsc_frequency == 0) {
         return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 781032cd18..013ae9e34b 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -344,9 +344,9 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
             SchedulerLock lock(system.Kernel());
             auto* sync_object = thread->GetHLESyncObject();
             sync_object->RemoveWaitingThread(SharedFrom(thread));
-
-            thread->InvokeHLECallback(SharedFrom(thread));
         }
+
+        thread->InvokeHLECallback(SharedFrom(thread));
     }
 
     return thread->GetSignalingResult();

From d2243eec14c2b214cb133dbd58894205631cb719 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 May 2020 17:37:57 -0400
Subject: [PATCH 122/122] Audio: Correct buffer release for host timing.

---
 src/audio_core/stream.cpp | 15 ++++++++++++++-
 src/audio_core/stream.h   |  3 +++
 src/core/core_timing.h    |  5 +++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index ad3c27e69b..92e19c5376 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -66,6 +66,15 @@ s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const {
     return ns.count();
 }
 
+s64 Stream::GetBufferReleaseNSHostTiming(const Buffer& buffer) const {
+    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
+    /// DSP signals before playing the last sample, in HLE we emulate this in this way
+    s64 base_samples = std::max<s64>(static_cast<s64>(num_samples) - 1, 0);
+    const auto ns =
+        std::chrono::nanoseconds((static_cast<u64>(base_samples) * 1000000000ULL) / sample_rate);
+    return ns.count();
+}
+
 static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
     const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)};
 
@@ -105,7 +114,11 @@ void Stream::PlayNextBuffer() {
 
     sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
 
-    core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {});
+    if (core_timing.IsHostTiming()) {
+        core_timing.ScheduleEvent(GetBufferReleaseNSHostTiming(*active_buffer), release_event, {});
+    } else {
+        core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {});
+    }
 }
 
 void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 0663ce4354..e309d60fe3 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -98,6 +98,9 @@ private:
     /// Gets the number of core cycles when the specified buffer will be released
     s64 GetBufferReleaseNS(const Buffer& buffer) const;
 
+    /// Gets the number of core cycles when the specified buffer will be released
+    s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const;
+
     u32 sample_rate;                  ///< Sample rate of the stream
     Format format;                    ///< Format of the stream
     float game_volume = 1.0f;         ///< The volume the game currently has set
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index ed5de9b97a..72faaab647 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -72,6 +72,11 @@ public:
         this->is_multicore = is_multicore;
     }
 
+    /// Check if it's using host timing.
+    bool IsHostTiming() const {
+        return is_multicore;
+    }
+
     /// Pauses/Unpauses the execution of the timer thread.
     void Pause(bool is_paused);