Texture Cache: Improve documentation

Texture Cache: Address Feedback
Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios.
2019-12-22 12:29:23 -04:00 · 2019-12-22 12:24:34 -04:00 · 2019-12-22 12:24:34 -04:00 · 2019-12-22 11:23:09 -04:00 · 2019-12-22 11:20:55 -04:00 · 2019-12-21 22:50:28 -05:00
142 changed files with 5518 additions and 2847 deletions
--- a/.appveyor/UtilityFunctions.ps1
+++ b/.appveyor/UtilityFunctions.ps1
@@ -1,39 +0,0 @@
-# Set-up Visual Studio Command Prompt environment for PowerShell
-pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\"
-cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach {
-    if ($_ -match "=") {
-        $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])"
-    }
-}
-popd
-
-function Which ($search_path, $name) {
-    ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName
-}
-
-function GetDeps ($search_path, $binary) {
-    ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach {
-        Which $search_path $_.Value
-    }
-}
-
-function RecursivelyGetDeps ($search_path, $binary) {
-    $final_deps = @()
-    $deps_to_process = GetDeps $search_path $binary
-    while ($deps_to_process.Count -gt 0) {
-        $current, $deps_to_process = $deps_to_process
-        if ($final_deps -contains $current) { continue }
-
-        # Is this a system dll file?
-        # We use the same algorithm that cmake uses to determine this.
-        if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue }
-        if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue }
-        if ($current -match "\\msvc[^\\]+dll") { continue }
-        if ($current -match "\\api-ms-win-[^\\]+dll") { continue }
-
-        $final_deps += $current
-        $new_deps = GetDeps $search_path $current
-        $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)})
-    }
-    return $final_deps
-}
--- a/.ci/scripts/windows/upload.ps1
+++ b/.ci/scripts/windows/upload.ps1
@@ -39,6 +39,7 @@ mkdir "artifacts"
 # Build a tar.xz for the source of the release
 Copy-Item .\license.txt -Destination $MSVC_SOURCE
 Copy-Item .\README.md -Destination $MSVC_SOURCE
+Copy-Item .\CMakeLists.txt -Destination $MSVC_SOURCE
 Copy-Item .\src -Recurse -Destination $MSVC_SOURCE
 Copy-Item .\externals -Recurse -Destination $MSVC_SOURCE
 Copy-Item .\dist -Recurse -Destination $MSVC_SOURCE
@@ -60,4 +61,4 @@ Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Ite

 Get-ChildItem . -Filter "*.zip" | Copy-Item -destination "artifacts"
 Get-ChildItem . -Filter "*.7z" | Copy-Item -destination "artifacts"
-Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
+Get-ChildItem . -Filter "*.tar.xz" | Copy-Item -destination "artifacts"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,7 @@ option(ENABLE_VULKAN "Enables Vulkan backend" ON)

 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

-if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
+if(EXISTS ${PROJECT_SOURCE_DIR}/hooks/pre-commit AND NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
    message(STATUS "Copying pre-commit hook")
    file(COPY hooks/pre-commit
        DESTINATION ${PROJECT_SOURCE_DIR}/.git/hooks)
@@ -49,7 +49,10 @@ function(check_submodules_present)
        endif()
    endforeach()
 endfunction()
-check_submodules_present()
+
+if(EXISTS ${PROJECT_SOURCE_DIR}/.gitmodules)
+    check_submodules_present()
+endif()

 configure_file(${PROJECT_SOURCE_DIR}/dist/compatibility_list/compatibility_list.qrc
               ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,178 +0,0 @@
-# shallow clone
-clone_depth: 10
-
-cache:
-  - C:\ProgramData\chocolatey\bin -> appveyor.yml
-  - C:\ProgramData\chocolatey\lib -> appveyor.yml
-
-os: Visual Studio 2017
-
-environment:
-  # Tell msys2 to add mingw64 to the path
-  MSYSTEM: MINGW64
-  # Tell msys2 to inherit the current directory when starting the shell
-  CHERE_INVOKING: 1
-  matrix:
-    - BUILD_TYPE: msvc
-    - BUILD_TYPE: mingw
-
-platform:
-  - x64
-
-configuration:
-  - Release
-
-install:
-  - git submodule update --init --recursive
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'mingw') {
-          $dependencies = "mingw64/mingw-w64-x86_64-cmake",
-                          "mingw64/mingw-w64-x86_64-qt5",
-                          "mingw64/mingw-w64-x86_64-SDL2"
-          # redirect err to null to prevent warnings from becoming errors
-          # workaround to prevent pacman from failing due to cyclical dependencies
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S mingw64/mingw-w64-x86_64-freetype mingw64/mingw-w64-x86_64-fontconfig" 2> $null
-          C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S $dependencies" 2> $null
-        }
-
-before_build:
-  - mkdir %BUILD_TYPE%_build
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1 && exit 0'
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON .. 2>&1"
-        }
-  - cd ..
-
-build_script:
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # https://www.appveyor.com/docs/build-phase
-          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
-        }
-
-after_build:
-  - ps: |
-        $GITDATE = $(git show -s --date=short --format='%ad') -replace "-",""
-        $GITREV = $(git show -s --format='%h')
-
-        # Find out which kind of release we are producing by tag name
-        if ($env:APPVEYOR_REPO_TAG_NAME) {
-          $RELEASE_DIST, $RELEASE_VERSION = $env:APPVEYOR_REPO_TAG_NAME.split('-')
-        } else {
-          # There is no repo tag - make assumptions
-          $RELEASE_DIST = "head"
-        }
-
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          # Where are these spaces coming from? Regardless, let's remove them
-          $MSVC_BUILD_ZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace " ", ""
-          $MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
-
-          # set the build names as env vars so the artifacts can upload them
-          $env:BUILD_ZIP = $MSVC_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
-          $env:BUILD_UPDATE = $MSVC_SEVENZIP
-
-          $BUILD_DIR = ".\msvc_build\bin\Release"
-
-          # Make a debug symbol upload
-          mkdir pdb
-          Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
-          7z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
-          rm "$BUILD_DIR\*.pdb"
-
-          mkdir $RELEASE_DIST
-          # get rid of extra exes by copying everything over, then deleting all the exes, then copying just the exes we want
-          Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
-          rm "$RELEASE_DIST\*.exe"
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item .\license.txt -Destination $RELEASE_DIST
-          Copy-Item .\README.md -Destination $RELEASE_DIST
-          7z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MSVC_SEVENZIP $RELEASE_DIST
-        } else {
-          $MINGW_BUILD_ZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.zip" -replace " ", ""
-          $MINGW_SEVENZIP = "yuzu-windows-mingw-$GITDATE-$GITREV.7z" -replace " ", ""
-          # not going to bother adding separate debug symbols for mingw, so just upload a README for it
-          # if someone wants to add them, change mingw to compile with -g and use objdump and strip to separate the symbols from the binary
-          $MINGW_NO_DEBUG_SYMBOLS = "README_No_Debug_Symbols.txt"
-          Set-Content -Path $MINGW_NO_DEBUG_SYMBOLS -Value "This is a workaround for Appveyor since msvc has debug symbols but mingw doesnt" -Force
-
-          # store the build information in env vars so we can use them as artifacts
-          $env:BUILD_ZIP = $MINGW_BUILD_ZIP
-          $env:BUILD_SYMBOLS = $MINGW_NO_DEBUG_SYMBOLS
-          $env:BUILD_UPDATE = $MINGW_SEVENZIP
-
-          $CMAKE_SOURCE_DIR = "$env:APPVEYOR_BUILD_FOLDER"
-          $CMAKE_BINARY_DIR = "$CMAKE_SOURCE_DIR/mingw_build/bin"
-          $RELEASE_DIST = $RELEASE_DIST + "-mingw"
-
-          mkdir $RELEASE_DIST
-          mkdir $RELEASE_DIST/platforms
-          mkdir $RELEASE_DIST/styles
-          mkdir $RELEASE_DIST/imageformats
-
-          # copy the compiled binaries and other release files to the release folder
-          Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST
-          Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST
-
-          # copy the qt windows plugin dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/platforms/qwindows.dll" -force -destination "$RELEASE_DIST/platforms"
-
-          # copy the qt windows vista style dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/styles/qwindowsvistastyle.dll" -force -destination "$RELEASE_DIST/styles"
-
-          # copy the qt jpeg imageformat dll to platforms
-          Copy-Item -path "C:/msys64/mingw64/share/qt5/plugins/imageformats/qjpeg.dll" -force -destination "$RELEASE_DIST/imageformats"
-
-          # copy all the dll dependencies to the release folder
-          . "./.appveyor/UtilityFunctions.ps1"
-          $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH"
-          $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\yuzu_cmd.exe"
-          $MingwDLLs += RecursivelyGetDeps $DLLSearchPath  "$RELEASE_DIST\imageformats\qjpeg.dll"
-          Write-Host "Detected the following dependencies:"
-          Write-Host $MingwDLLs
-          foreach ($file in $MingwDLLs) {
-            Copy-Item -path "$file" -force -destination "$RELEASE_DIST"
-          }
-
-          7z a -tzip $MINGW_BUILD_ZIP $RELEASE_DIST\*
-          7z a $MINGW_SEVENZIP $RELEASE_DIST
-        }
-
-test_script:
-  - cd %BUILD_TYPE%_build
-  - ps: |
-        if ($env:BUILD_TYPE -eq 'msvc') {
-          ctest -VV -C Release
-        } else {
-          C:\msys64\usr\bin\bash.exe -lc "ctest -VV -C Release"
-        }
-  - cd ..
-
-artifacts:
-  - path: $(BUILD_ZIP)
-    name: build
-    type: zip
-
-deploy:
-  provider: GitHub
-  release: $(appveyor_repo_tag_name)
-  auth_token:
-    secure: QqePPnXbkzmXct5c8hZ2X5AbsthbI6cS1Sr+VBzcD8oUOIjfWJJKXVAQGUbQAbb0
-  artifact: update,build
-  draft: false
-  prerelease: false
-  on:
-    appveyor_repo_tag: true
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/externals/boost
+++ b/externals/boost
--- a/externals/sirit
+++ b/externals/sirit
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -36,9 +36,9 @@ public:
    }

    void SetWaveIndex(std::size_t index);
-    std::vector<s16> DequeueSamples(std::size_t sample_count);
+    std::vector<s16> DequeueSamples(std::size_t sample_count, Memory::Memory& memory);
    void UpdateState();
-    void RefreshBuffer();
+    void RefreshBuffer(Memory::Memory& memory);

 private:
    bool is_in_use{};
@@ -66,17 +66,18 @@ public:
        return info;
    }

-    void UpdateState();
+    void UpdateState(Memory::Memory& memory);

 private:
    EffectOutStatus out_status{};
    EffectInStatus info{};
 };
-AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Memory::Memory& memory_,
+                             AudioRendererParameter params,
                             std::shared_ptr<Kernel::WritableEvent> buffer_event,
                             std::size_t instance_number)
    : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
-      effects(params.effect_count) {
+      effects(params.effect_count), memory{memory_} {

    audio_out = std::make_unique<AudioCore::AudioOut>();
    stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
@@ -162,7 +163,7 @@ std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_
    }

    for (auto& effect : effects) {
-        effect.UpdateState();
+        effect.UpdateState(memory);
    }

    // Release previous buffers and queue next ones for playback
@@ -206,13 +207,14 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
    is_refresh_pending = true;
 }

-std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count) {
+std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count,
+                                                           Memory::Memory& memory) {
    if (!IsPlaying()) {
        return {};
    }

    if (is_refresh_pending) {
-        RefreshBuffer();
+        RefreshBuffer(memory);
    }

    const std::size_t max_size{samples.size() - offset};
@@ -256,10 +258,11 @@ void AudioRenderer::VoiceState::UpdateState() {
    is_in_use = info.is_in_use;
 }

-void AudioRenderer::VoiceState::RefreshBuffer() {
-    std::vector<s16> new_samples(info.wave_buffer[wave_index].buffer_sz / sizeof(s16));
-    Memory::ReadBlock(info.wave_buffer[wave_index].buffer_addr, new_samples.data(),
-                      info.wave_buffer[wave_index].buffer_sz);
+void AudioRenderer::VoiceState::RefreshBuffer(Memory::Memory& memory) {
+    const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr;
+    const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz;
+    std::vector<s16> new_samples(wave_buffer_size / sizeof(s16));
+    memory.ReadBlock(wave_buffer_address, new_samples.data(), wave_buffer_size);

    switch (static_cast<Codec::PcmFormat>(info.sample_format)) {
    case Codec::PcmFormat::Int16: {
@@ -269,7 +272,7 @@ void AudioRenderer::VoiceState::RefreshBuffer() {
    case Codec::PcmFormat::Adpcm: {
        // Decode ADPCM to PCM16
        Codec::ADPCM_Coeff coeffs;
-        Memory::ReadBlock(info.additional_params_addr, coeffs.data(), sizeof(Codec::ADPCM_Coeff));
+        memory.ReadBlock(info.additional_params_addr, coeffs.data(), sizeof(Codec::ADPCM_Coeff));
        new_samples = Codec::DecodeADPCM(reinterpret_cast<u8*>(new_samples.data()),
                                         new_samples.size() * sizeof(s16), coeffs, adpcm_state);
        break;
@@ -307,18 +310,18 @@ void AudioRenderer::VoiceState::RefreshBuffer() {
    is_refresh_pending = false;
 }

-void AudioRenderer::EffectState::UpdateState() {
+void AudioRenderer::EffectState::UpdateState(Memory::Memory& memory) {
    if (info.is_new) {
        out_status.state = EffectStatus::New;
    } else {
        if (info.type == Effect::Aux) {
-            ASSERT_MSG(Memory::Read32(info.aux_info.return_buffer_info) == 0,
+            ASSERT_MSG(memory.Read32(info.aux_info.return_buffer_info) == 0,
                       "Aux buffers tried to update");
-            ASSERT_MSG(Memory::Read32(info.aux_info.send_buffer_info) == 0,
+            ASSERT_MSG(memory.Read32(info.aux_info.send_buffer_info) == 0,
                       "Aux buffers tried to update");
-            ASSERT_MSG(Memory::Read32(info.aux_info.return_buffer_base) == 0,
+            ASSERT_MSG(memory.Read32(info.aux_info.return_buffer_base) == 0,
                       "Aux buffers tried to update");
-            ASSERT_MSG(Memory::Read32(info.aux_info.send_buffer_base) == 0,
+            ASSERT_MSG(memory.Read32(info.aux_info.send_buffer_base) == 0,
                       "Aux buffers tried to update");
        }
    }
@@ -340,7 +343,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
        std::size_t offset{};
        s64 samples_remaining{BUFFER_SIZE};
        while (samples_remaining > 0) {
-            const std::vector<s16> samples{voice.DequeueSamples(samples_remaining)};
+            const std::vector<s16> samples{voice.DequeueSamples(samples_remaining, memory)};

            if (samples.empty()) {
                break;
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -22,6 +22,10 @@ namespace Kernel {
 class WritableEvent;
 }

+namespace Memory {
+class Memory;
+}
+
 namespace AudioCore {

 class AudioOut;
@@ -217,7 +221,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size

 class AudioRenderer {
 public:
-    AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
+    AudioRenderer(Core::Timing::CoreTiming& core_timing, Memory::Memory& memory_,
+                  AudioRendererParameter params,
                  std::shared_ptr<Kernel::WritableEvent> buffer_event, std::size_t instance_number);
    ~AudioRenderer();

@@ -238,7 +243,8 @@ private:
    std::vector<VoiceState> voices;
    std::vector<EffectState> effects;
    std::unique_ptr<AudioOut> audio_out;
-    AudioCore::StreamPtr stream;
+    StreamPtr stream;
+    Memory::Memory& memory;
 };

 } // namespace AudioCore
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -37,7 +37,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo
    : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {

-    release_event = core_timing.RegisterEvent(
+    release_event = Core::Timing::CreateEvent(
        name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(); });
 }

--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -98,18 +98,19 @@ private:
    /// Gets the number of core cycles when the specified buffer will be released
    s64 GetBufferReleaseCycles(const Buffer& buffer) const;

-    u32 sample_rate;                          ///< Sample rate of the stream
-    Format format;                            ///< Format of the stream
-    float game_volume = 1.0f;                 ///< The volume the game currently has set
-    ReleaseCallback release_callback;         ///< Buffer release callback for the stream
-    State state{State::Stopped};              ///< Playback state of the stream
-    Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
-    BufferPtr active_buffer;                  ///< Actively playing buffer in the stream
-    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
-    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
-    SinkStream& sink_stream;                  ///< Output sink for the stream
-    Core::Timing::CoreTiming& core_timing;    ///< Core timing instance.
-    std::string name;                         ///< Name of the stream, must be unique
+    u32 sample_rate;                  ///< Sample rate of the stream
+    Format format;                    ///< Format of the stream
+    float game_volume = 1.0f;         ///< The volume the game currently has set
+    ReleaseCallback release_callback; ///< Buffer release callback for the stream
+    State state{State::Stopped};      ///< Playback state of the stream
+    std::shared_ptr<Core::Timing::EventType>
+        release_event;                      ///< Core timing release event for the stream
+    BufferPtr active_buffer;                ///< Actively playing buffer in the stream
+    std::queue<BufferPtr> queued_buffers;   ///< Buffers queued to be played in the stream
+    std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
+    SinkStream& sink_stream;                ///< Output sink for the stream
+    Core::Timing::CoreTiming& core_timing;  ///< Core timing instance.
+    std::string name;                       ///< Name of the stream, must be unique
 };

 using StreamPtr = std::shared_ptr<Stream>;
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,9 +46,16 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        cv.notify_one();

-        ++size;
+        const size_t previous_size{size++};
+
+        // Acquire the mutex and then immediately release it as a fence.
+        // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
+        // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
+        if (previous_size == 0) {
+            std::lock_guard lock{cv_mutex};
+        }
+        cv.notify_one();
    }

    void Pop() {
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -170,6 +170,7 @@ add_library(core STATIC
    hle/kernel/server_port.h
    hle/kernel/server_session.cpp
    hle/kernel/server_session.h
+    hle/kernel/session.cpp
    hle/kernel/session.h
    hle/kernel/shared_memory.cpp
    hle/kernel/shared_memory.h
@@ -509,7 +510,6 @@ add_library(core STATIC
    memory/dmnt_cheat_vm.h
    memory.cpp
    memory.h
-    memory_setup.h
    perf_stats.cpp
    perf_stats.h
    reporter.cpp
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -13,7 +13,6 @@
 #include "core/memory.h"

 namespace Core {
-
 namespace {

 constexpr u64 ELF_DYNAMIC_TAG_NULL = 0;
@@ -61,15 +60,15 @@ static_assert(sizeof(ELFSymbol) == 0x18, "ELFSymbol has incorrect size.");

 using Symbols = std::vector<std::pair<ELFSymbol, std::string>>;

-Symbols GetSymbols(VAddr text_offset) {
-    const auto mod_offset = text_offset + Memory::Read32(text_offset + 4);
+Symbols GetSymbols(VAddr text_offset, Memory::Memory& memory) {
+    const auto mod_offset = text_offset + memory.Read32(text_offset + 4);

    if (mod_offset < text_offset || (mod_offset & 0b11) != 0 ||
-        Memory::Read32(mod_offset) != Common::MakeMagic('M', 'O', 'D', '0')) {
+        memory.Read32(mod_offset) != Common::MakeMagic('M', 'O', 'D', '0')) {
        return {};
    }

-    const auto dynamic_offset = Memory::Read32(mod_offset + 0x4) + mod_offset;
+    const auto dynamic_offset = memory.Read32(mod_offset + 0x4) + mod_offset;

    VAddr string_table_offset{};
    VAddr symbol_table_offset{};
@@ -77,8 +76,8 @@ Symbols GetSymbols(VAddr text_offset) {

    VAddr dynamic_index = dynamic_offset;
    while (true) {
-        const auto tag = Memory::Read64(dynamic_index);
-        const auto value = Memory::Read64(dynamic_index + 0x8);
+        const u64 tag = memory.Read64(dynamic_index);
+        const u64 value = memory.Read64(dynamic_index + 0x8);
        dynamic_index += 0x10;

        if (tag == ELF_DYNAMIC_TAG_NULL) {
@@ -106,11 +105,11 @@ Symbols GetSymbols(VAddr text_offset) {
    VAddr symbol_index = symbol_table_address;
    while (symbol_index < string_table_address) {
        ELFSymbol symbol{};
-        Memory::ReadBlock(symbol_index, &symbol, sizeof(ELFSymbol));
+        memory.ReadBlock(symbol_index, &symbol, sizeof(ELFSymbol));

        VAddr string_offset = string_table_address + symbol.name_index;
        std::string name;
-        for (u8 c = Memory::Read8(string_offset); c != 0; c = Memory::Read8(++string_offset)) {
+        for (u8 c = memory.Read8(string_offset); c != 0; c = memory.Read8(++string_offset)) {
            name += static_cast<char>(c);
        }

@@ -142,28 +141,28 @@ constexpr u64 SEGMENT_BASE = 0x7100000000ull;

 std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {
    std::vector<BacktraceEntry> out;
+    auto& memory = system.Memory();

    auto fp = GetReg(29);
    auto lr = GetReg(30);
-
    while (true) {
        out.push_back({"", 0, lr, 0});
        if (!fp) {
            break;
        }
-        lr = Memory::Read64(fp + 8) - 4;
-        fp = Memory::Read64(fp);
+        lr = memory.Read64(fp + 8) - 4;
+        fp = memory.Read64(fp);
    }

    std::map<VAddr, std::string> modules;
-    auto& loader{System::GetInstance().GetAppLoader()};
+    auto& loader{system.GetAppLoader()};
    if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) {
        return {};
    }

    std::map<std::string, Symbols> symbols;
    for (const auto& module : modules) {
-        symbols.insert_or_assign(module.second, GetSymbols(module.first));
+        symbols.insert_or_assign(module.second, GetSymbols(module.first, memory));
    }

    for (auto& entry : out) {
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -17,11 +17,13 @@ enum class VMAPermission : u8;
 }

 namespace Core {
+class System;

 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    virtual ~ARM_Interface() {}
+    explicit ARM_Interface(System& system_) : system{system_} {}
+    virtual ~ARM_Interface() = default;

    struct ThreadContext {
        std::array<u64, 31> cpu_registers;
@@ -163,6 +165,10 @@ public:
    /// fp+0 : pointer to previous frame record
    /// fp+8 : value of lr for frame
    void LogBacktrace() const;
+
+protected:
+    /// System context that this ARM interface is running under.
+    System& system;
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -28,36 +28,38 @@ public:
    explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {}

    u8 MemoryRead8(u64 vaddr) override {
-        return Memory::Read8(vaddr);
+        return parent.system.Memory().Read8(vaddr);
    }
    u16 MemoryRead16(u64 vaddr) override {
-        return Memory::Read16(vaddr);
+        return parent.system.Memory().Read16(vaddr);
    }
    u32 MemoryRead32(u64 vaddr) override {
-        return Memory::Read32(vaddr);
+        return parent.system.Memory().Read32(vaddr);
    }
    u64 MemoryRead64(u64 vaddr) override {
-        return Memory::Read64(vaddr);
+        return parent.system.Memory().Read64(vaddr);
    }
    Vector MemoryRead128(u64 vaddr) override {
-        return {Memory::Read64(vaddr), Memory::Read64(vaddr + 8)};
+        auto& memory = parent.system.Memory();
+        return {memory.Read64(vaddr), memory.Read64(vaddr + 8)};
    }

    void MemoryWrite8(u64 vaddr, u8 value) override {
-        Memory::Write8(vaddr, value);
+        parent.system.Memory().Write8(vaddr, value);
    }
    void MemoryWrite16(u64 vaddr, u16 value) override {
-        Memory::Write16(vaddr, value);
+        parent.system.Memory().Write16(vaddr, value);
    }
    void MemoryWrite32(u64 vaddr, u32 value) override {
-        Memory::Write32(vaddr, value);
+        parent.system.Memory().Write32(vaddr, value);
    }
    void MemoryWrite64(u64 vaddr, u64 value) override {
-        Memory::Write64(vaddr, value);
+        parent.system.Memory().Write64(vaddr, value);
    }
    void MemoryWrite128(u64 vaddr, Vector value) override {
-        Memory::Write64(vaddr, value[0]);
-        Memory::Write64(vaddr + 8, value[1]);
+        auto& memory = parent.system.Memory();
+        memory.Write64(vaddr, value[0]);
+        memory.Write64(vaddr + 8, value[1]);
    }

    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
@@ -171,9 +173,10 @@ void ARM_Dynarmic::Step() {

 ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
                           std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
-      core_index{core_index}, system{system},
-      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+    : ARM_Interface{system},
+      cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
+      core_index{core_index}, exclusive_monitor{
+                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

 ARM_Dynarmic::~ARM_Dynarmic() = default;

@@ -264,7 +267,9 @@ void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table,
    jit = MakeJit(page_table, new_address_space_size_in_bits);
 }

-DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
+DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count)
+    : monitor(core_count), memory{memory_} {}
+
 DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;

 void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
@@ -277,29 +282,28 @@ void DynarmicExclusiveMonitor::ClearExclusive() {
 }

 bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
-    return monitor.DoExclusiveOperation(core_index, vaddr, 1,
-                                        [&] { Memory::Write8(vaddr, value); });
+    return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); });
 }

 bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
    return monitor.DoExclusiveOperation(core_index, vaddr, 2,
-                                        [&] { Memory::Write16(vaddr, value); });
+                                        [&] { memory.Write16(vaddr, value); });
 }

 bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
    return monitor.DoExclusiveOperation(core_index, vaddr, 4,
-                                        [&] { Memory::Write32(vaddr, value); });
+                                        [&] { memory.Write32(vaddr, value); });
 }

 bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
    return monitor.DoExclusiveOperation(core_index, vaddr, 8,
-                                        [&] { Memory::Write64(vaddr, value); });
+                                        [&] { memory.Write64(vaddr, value); });
 }

 bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
    return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
-        Memory::Write64(vaddr + 0, value[0]);
-        Memory::Write64(vaddr + 8, value[1]);
+        memory.Write64(vaddr + 0, value[0]);
+        memory.Write64(vaddr + 8, value[1]);
    });
 }

--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,6 +12,10 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"

+namespace Memory {
+class Memory;
+}
+
 namespace Core {

 class ARM_Dynarmic_Callbacks;
@@ -58,13 +62,12 @@ private:
    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
-    System& system;
    DynarmicExclusiveMonitor& exclusive_monitor;
 };

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
 public:
-    explicit DynarmicExclusiveMonitor(std::size_t core_count);
+    explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count);
    ~DynarmicExclusiveMonitor() override;

    void SetExclusive(std::size_t core_index, VAddr addr) override;
@@ -79,6 +82,7 @@ public:
 private:
    friend class ARM_Dynarmic;
    Dynarmic::A64::ExclusiveMonitor monitor;
+    Memory::Memory& memory;
 };

 } // namespace Core
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -60,7 +60,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return false;
 }

-ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
+ARM_Unicorn::ARM_Unicorn(System& system) : ARM_Interface{system} {
    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -45,7 +45,6 @@ private:
    static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);

    uc_engine* uc{};
-    System& system;
    GDBStub::BreakpointAddress last_bkpt{};
    bool last_bkpt_hit = false;
 };
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -39,6 +39,7 @@
 #include "core/hle/service/service.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/loader/loader.h"
+#include "core/memory.h"
 #include "core/memory/cheat_engine.h"
 #include "core/perf_stats.h"
 #include "core/reporter.h"
@@ -112,8 +113,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
 }
 struct System::Impl {
    explicit Impl(System& system)
-        : kernel{system}, fs_controller{system}, cpu_core_manager{system}, reporter{system},
-          applet_manager{system} {}
+        : kernel{system}, fs_controller{system}, memory{system},
+          cpu_core_manager{system}, reporter{system}, applet_manager{system} {}

    Cpu& CurrentCpuCore() {
        return cpu_core_manager.GetCurrentCore();
@@ -341,7 +342,8 @@ struct System::Impl {
    std::unique_ptr<VideoCore::RendererBase> renderer;
    std::unique_ptr<Tegra::GPU> gpu_core;
    std::shared_ptr<Tegra::DebugContext> debug_context;
-    std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager;
+    std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
+    Memory::Memory memory;
    CpuCoreManager cpu_core_manager;
    bool is_powered_on = false;
    bool exit_lock = false;
@@ -498,6 +500,14 @@ const ExclusiveMonitor& System::Monitor() const {
    return impl->cpu_core_manager.GetExclusiveMonitor();
 }

+Memory::Memory& System::Memory() {
+    return impl->memory;
+}
+
+const Memory::Memory& System::Memory() const {
+    return impl->memory;
+}
+
 Tegra::GPU& System::GPU() {
    return *impl->gpu_core;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -86,6 +86,10 @@ namespace Core::Hardware {
 class InterruptManager;
 }

+namespace Memory {
+class Memory;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -225,6 +229,12 @@ public:
    /// Gets a constant reference to the exclusive monitor
    const ExclusiveMonitor& Monitor() const;

+    /// Gets a mutable reference to the system memory instance.
+    Memory::Memory& Memory();
+
+    /// Gets a constant reference to the system memory instance.
+    const Memory::Memory& Memory() const;
+
    /// Gets a mutable reference to the GPU interface
    Tegra::GPU& GPU();

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -66,9 +66,10 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba

 Cpu::~Cpu() = default;

-std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
+std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(
+    [[maybe_unused]] Memory::Memory& memory, [[maybe_unused]] std::size_t num_cores) {
 #ifdef ARCHITECTURE_x86_64
-    return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
+    return std::make_unique<DynarmicExclusiveMonitor>(memory, num_cores);
 #else
    // TODO(merry): Passthrough exclusive monitor
    return nullptr;
@@ -95,6 +96,8 @@ void Cpu::RunLoop(bool tight_loop) {
        } else {
            arm_interface->Step();
        }
+        // We are stopping a run, exclusive state must be cleared
+        arm_interface->ClearExclusiveState();
    }
    core_timing.Advance();

--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -24,6 +24,10 @@ namespace Core::Timing {
 class CoreTiming;
 }

+namespace Memory {
+class Memory;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -86,7 +90,19 @@ public:

    void Shutdown();

-    static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(std::size_t num_cores);
+    /**
+     * Creates an exclusive monitor to handle exclusive reads/writes.
+     *
+     * @param memory The current memory subsystem that the monitor may wish
+     *               to keep track of.
+     *
+     * @param num_cores The number of cores to assume about the CPU.
+     *
+     * @returns The constructed exclusive monitor instance, or nullptr if the current
+     *          CPU backend is unable to use an exclusive monitor.
+     */
+    static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory,
+                                                                  std::size_t num_cores);

 private:
    void Reschedule();
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -17,11 +17,15 @@ namespace Core::Timing {

 constexpr int MAX_SLICE_LENGTH = 10000;

+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
+    return std::make_shared<EventType>(std::move(callback), std::move(name));
+}
+
 struct CoreTiming::Event {
    s64 time;
    u64 fifo_order;
    u64 userdata;
-    const EventType* type;
+    std::weak_ptr<EventType> type;

    // Sort by time, unless the times are the same, in which case sort by
    // the order added to the queue
@@ -54,36 +58,15 @@ void CoreTiming::Initialize() {
    event_fifo_id = 0;

    const auto empty_timed_callback = [](u64, s64) {};
-    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
+    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
 }

 void CoreTiming::Shutdown() {
    ClearPendingEvents();
-    UnregisterAllEvents();
 }

-EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
-    std::lock_guard guard{inner_mutex};
-    // check for existing type with same name.
-    // we want event type names to remain unique so that we can use them for serialization.
-    ASSERT_MSG(event_types.find(name) == event_types.end(),
-               "CoreTiming Event \"{}\" is already registered. Events should only be registered "
-               "during Init to avoid breaking save states.",
-               name.c_str());
-
-    auto info = event_types.emplace(name, EventType{callback, nullptr});
-    EventType* event_type = &info.first->second;
-    event_type->name = &info.first->first;
-    return event_type;
-}
-
-void CoreTiming::UnregisterAllEvents() {
-    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
-    event_types.clear();
-}
-
-void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
-    ASSERT(event_type != nullptr);
+void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
+                               u64 userdata) {
    std::lock_guard guard{inner_mutex};
    const s64 timeout = GetTicks() + cycles_into_future;

@@ -93,13 +76,15 @@ void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_ty
    }

    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+
    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
 }

-void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
+void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
    std::lock_guard guard{inner_mutex};
+
    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
-        return e.type == event_type && e.userdata == userdata;
+        return e.type.lock().get() == event_type.get() && e.userdata == userdata;
    });

    // Removing random items breaks the invariant so we have to re-establish it.
@@ -130,10 +115,12 @@ void CoreTiming::ClearPendingEvents() {
    event_queue.clear();
 }

-void CoreTiming::RemoveEvent(const EventType* event_type) {
+void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
    std::lock_guard guard{inner_mutex};
-    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
-                                    [&](const Event& e) { return e.type == event_type; });
+
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+        return e.type.lock().get() == event_type.get();
+    });

    // Removing random items breaks the invariant so we have to re-establish it.
    if (itr != event_queue.end()) {
@@ -181,7 +168,11 @@ void CoreTiming::Advance() {
        std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
        event_queue.pop_back();
        inner_mutex.unlock();
-        evt.type->callback(evt.userdata, global_timer - evt.time);
+
+        if (auto event_type{evt.type.lock()}) {
+            event_type->callback(evt.userdata, global_timer - evt.time);
+        }
+
        inner_mutex.lock();
    }

--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -6,11 +6,12 @@

 #include <chrono>
 #include <functional>
+#include <memory>
 #include <mutex>
 #include <optional>
 #include <string>
-#include <unordered_map>
 #include <vector>
+
 #include "common/common_types.h"
 #include "common/threadsafe_queue.h"

@@ -21,10 +22,13 @@ using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;

 /// Contains the characteristics of a particular event.
 struct EventType {
+    EventType(TimedCallback&& callback, std::string&& name)
+        : callback{std::move(callback)}, name{std::move(name)} {}
+
    /// The event's callback function.
    TimedCallback callback;
    /// A pointer to the name of the event.
-    const std::string* name;
+    const std::string name;
 };

 /**
@@ -57,31 +61,17 @@ public:
    /// Tears down all timing related functionality.
    void Shutdown();

-    /// Registers a core timing event with the given name and callback.
-    ///
-    /// @param name     The name of the core timing event to register.
-    /// @param callback The callback to execute for the event.
-    ///
-    /// @returns An EventType instance representing the registered event.
-    ///
-    /// @pre The name of the event being registered must be unique among all
-    ///      registered events.
-    ///
-    EventType* RegisterEvent(const std::string& name, TimedCallback callback);
-
-    /// Unregisters all registered events thus far. Note: not thread unsafe
-    void UnregisterAllEvents();
-
    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
    /// event is scheduled earlier than the current values.
    ///
    /// Scheduling from a callback will not update the downcount until the Advance() completes.
-    void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
+    void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
+                       u64 userdata = 0);

-    void UnscheduleEvent(const EventType* event_type, u64 userdata);
+    void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);

    /// We only permit one event of each type in the queue at a time.
-    void RemoveEvent(const EventType* event_type);
+    void RemoveEvent(const std::shared_ptr<EventType>& event_type);

    void ForceExceptionCheck(s64 cycles);

@@ -148,13 +138,18 @@ private:
    std::vector<Event> event_queue;
    u64 event_fifo_id = 0;

-    // Stores each element separately as a linked list node so pointers to elements
-    // remain stable regardless of rehashes/resizing.
-    std::unordered_map<std::string, EventType> event_types;
-
-    EventType* ev_lost = nullptr;
+    std::shared_ptr<EventType> ev_lost;

    std::mutex inner_mutex;
 };

+/// Creates a core timing event with the given name and callback.
+///
+/// @param name     The name of the core timing event to create.
+/// @param callback The callback to execute for the event.
+///
+/// @returns An EventType instance representing the created event.
+///
+std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
+
 } // namespace Core::Timing
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -25,7 +25,7 @@ CpuCoreManager::~CpuCoreManager() = default;

 void CpuCoreManager::Initialize() {
    barrier = std::make_unique<CpuBarrier>();
-    exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
+    exclusive_monitor = Cpu::MakeExclusiveMonitor(system.Memory(), cores.size());

    for (std::size_t index = 0; index < cores.size(); ++index) {
        cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
--- a/src/core/file_sys/directory.h
+++ b/src/core/file_sys/directory.h
@@ -15,7 +15,7 @@

 namespace FileSys {

-enum EntryType : u8 {
+enum class EntryType : u8 {
    Directory = 0,
    File = 1,
 };
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -76,7 +76,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {

    const auto& disabled = Settings::values.disabled_addons[title_id];
    const auto update_disabled =
-        std::find(disabled.begin(), disabled.end(), "Update") != disabled.end();
+        std::find(disabled.cbegin(), disabled.cend(), "Update") != disabled.cend();

    // Game Updates
    const auto update_tid = GetUpdateTitleID(title_id);
@@ -127,7 +127,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
    std::vector<VirtualFile> out;
    out.reserve(patch_dirs.size());
    for (const auto& subdir : patch_dirs) {
-        if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
+        if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
            continue;

        auto exefs_dir = subdir->GetSubdirectory("exefs");
@@ -284,12 +284,17 @@ std::vector<Memory::CheatEntry> PatchManager::CreateCheatList(
        return {};
    }

+    const auto& disabled = Settings::values.disabled_addons[title_id];
    auto patch_dirs = load_dir->GetSubdirectories();
    std::sort(patch_dirs.begin(), patch_dirs.end(),
              [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });

    std::vector<Memory::CheatEntry> out;
    for (const auto& subdir : patch_dirs) {
+        if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) {
+            continue;
+        }
+
        auto cheats_dir = subdir->GetSubdirectory("cheats");
        if (cheats_dir != nullptr) {
            auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
@@ -331,8 +336,9 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
    layers.reserve(patch_dirs.size() + 1);
    layers_ext.reserve(patch_dirs.size() + 1);
    for (const auto& subdir : patch_dirs) {
-        if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
+        if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) {
            continue;
+        }

        auto romfs_dir = subdir->GetSubdirectory("romfs");
        if (romfs_dir != nullptr)
@@ -381,7 +387,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content

    const auto& disabled = Settings::values.disabled_addons[title_id];
    const auto update_disabled =
-        std::find(disabled.begin(), disabled.end(), "Update") != disabled.end();
+        std::find(disabled.cbegin(), disabled.cend(), "Update") != disabled.cend();

    if (!update_disabled && update != nullptr) {
        const auto new_nca = std::make_shared<NCA>(update, romfs, ivfc_offset);
@@ -431,7 +437,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
    auto [nacp, discard_icon_file] = update.GetControlMetadata();

    const auto update_disabled =
-        std::find(disabled.begin(), disabled.end(), "Update") != disabled.end();
+        std::find(disabled.cbegin(), disabled.cend(), "Update") != disabled.cend();
    const auto update_label = update_disabled ? "[D] Update" : "Update";

    if (nacp != nullptr) {
--- a/src/core/file_sys/romfs.cpp
+++ b/src/core/file_sys/romfs.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <memory>
+
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/file_sys/fsmitm_romfsbuild.h"
@@ -12,7 +14,7 @@
 #include "core/file_sys/vfs_vector.h"

 namespace FileSys {
-
+namespace {
 constexpr u32 ROMFS_ENTRY_EMPTY = 0xFFFFFFFF;

 struct TableLocation {
@@ -51,7 +53,7 @@ struct FileEntry {
 static_assert(sizeof(FileEntry) == 0x20, "FileEntry has incorrect size.");

 template <typename Entry>
-static std::pair<Entry, std::string> GetEntry(const VirtualFile& file, std::size_t offset) {
+std::pair<Entry, std::string> GetEntry(const VirtualFile& file, std::size_t offset) {
    Entry entry{};
    if (file->ReadObject(&entry, offset) != sizeof(Entry))
        return {};
@@ -99,6 +101,7 @@ void ProcessDirectory(VirtualFile file, std::size_t dir_offset, std::size_t file
        this_dir_offset = entry.first.sibling;
    }
 }
+} // Anonymous namespace

 VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
    RomFSHeader header{};
--- a/src/core/file_sys/romfs.h
+++ b/src/core/file_sys/romfs.h
@@ -5,10 +5,6 @@
 #pragma once

 #include <array>
-#include <map>
-#include "common/common_funcs.h"
-#include "common/common_types.h"
-#include "common/swap.h"
 #include "core/file_sys/vfs.h"

 namespace FileSys {
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -508,8 +508,9 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
              bp->second.len, bp->second.addr, static_cast<int>(type));

    if (type == BreakpointType::Execute) {
-        Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
-        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+        auto& system = Core::System::GetInstance();
+        system.Memory().WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
+        system.InvalidateCpuInstructionCaches();
    }
    p.erase(addr);
 }
@@ -969,12 +970,13 @@ static void ReadMemory() {
        SendReply("E01");
    }

-    if (!Memory::IsValidVirtualAddress(addr)) {
+    auto& memory = Core::System::GetInstance().Memory();
+    if (!memory.IsValidVirtualAddress(addr)) {
        return SendReply("E00");
    }

    std::vector<u8> data(len);
-    Memory::ReadBlock(addr, data.data(), len);
+    memory.ReadBlock(addr, data.data(), len);

    MemToGdbHex(reply, data.data(), len);
    reply[len * 2] = '\0';
@@ -984,22 +986,23 @@ static void ReadMemory() {
 /// Modify location in memory with data received from the gdb client.
 static void WriteMemory() {
    auto start_offset = command_buffer + 1;
-    auto addr_pos = std::find(start_offset, command_buffer + command_length, ',');
-    VAddr addr = HexToLong(start_offset, static_cast<u64>(addr_pos - start_offset));
+    const auto addr_pos = std::find(start_offset, command_buffer + command_length, ',');
+    const VAddr addr = HexToLong(start_offset, static_cast<u64>(addr_pos - start_offset));

    start_offset = addr_pos + 1;
-    auto len_pos = std::find(start_offset, command_buffer + command_length, ':');
-    u64 len = HexToLong(start_offset, static_cast<u64>(len_pos - start_offset));
+    const auto len_pos = std::find(start_offset, command_buffer + command_length, ':');
+    const u64 len = HexToLong(start_offset, static_cast<u64>(len_pos - start_offset));

-    if (!Memory::IsValidVirtualAddress(addr)) {
+    auto& system = Core::System::GetInstance();
+    auto& memory = system.Memory();
+    if (!memory.IsValidVirtualAddress(addr)) {
        return SendReply("E00");
    }

    std::vector<u8> data(len);
-
    GdbHexToMem(data.data(), len_pos + 1, len);
-    Memory::WriteBlock(addr, data.data(), len);
-    Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    memory.WriteBlock(addr, data.data(), len);
+    system.InvalidateCpuInstructionCaches();
    SendReply("OK");
 }

@@ -1055,12 +1058,15 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
    breakpoint.active = true;
    breakpoint.addr = addr;
    breakpoint.len = len;
-    Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
+
+    auto& system = Core::System::GetInstance();
+    auto& memory = system.Memory();
+    memory.ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());

    static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
    if (type == BreakpointType::Execute) {
-        Memory::WriteBlock(addr, btrap.data(), btrap.size());
-        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+        memory.WriteBlock(addr, btrap.data(), btrap.size());
+        system.InvalidateCpuInstructionCaches();
    }
    p.insert({addr, breakpoint});

--- a/src/core/hardware_interrupt_manager.cpp
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -11,13 +11,12 @@
 namespace Core::Hardware {

 InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
-    gpu_interrupt_event =
-        system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) {
-            auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
-            const u32 syncpt = static_cast<u32>(message >> 32);
-            const u32 value = static_cast<u32>(message);
-            nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
-        });
+    gpu_interrupt_event = Core::Timing::CreateEvent("GPUInterrupt", [this](u64 message, s64) {
+        auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
+        const u32 syncpt = static_cast<u32>(message >> 32);
+        const u32 value = static_cast<u32>(message);
+        nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
+    });
 }

 InterruptManager::~InterruptManager() = default;
--- a/src/core/hardware_interrupt_manager.h
+++ b/src/core/hardware_interrupt_manager.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <memory>
+
 #include "common/common_types.h"

 namespace Core {
@@ -25,7 +27,7 @@ public:

 private:
    Core::System& system;
-    Core::Timing::EventType* gpu_interrupt_event{};
+    std::shared_ptr<Core::Timing::EventType> gpu_interrupt_event;
 };

 } // namespace Core::Hardware
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,6 +19,7 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/session.h"
 #include "core/hle/result.h"

 namespace IPC {
@@ -139,10 +140,9 @@ public:
            context->AddDomainObject(std::move(iface));
        } else {
            auto& kernel = Core::System::GetInstance().Kernel();
-            auto [server, client] =
-                Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
-            iface->ClientConnected(server);
+            auto [client, server] = Kernel::Session::Create(kernel, iface->GetServiceName());
            context->AddMoveObject(std::move(client));
+            iface->ClientConnected(std::move(server));
        }
    }

--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -11,18 +11,16 @@
 #include "core/core_cpu.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/object.h"
-#include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"

 namespace Kernel {
-namespace {
+
 // Wake up num_to_wake (or all) threads in a vector.
-void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake) {
-    auto& system = Core::System::GetInstance();
+void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
+                                 s32 num_to_wake) {
    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
    // them all.
    std::size_t last = waiting_threads.size();
@@ -34,12 +32,12 @@ void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s3
    for (std::size_t i = 0; i < last; i++) {
        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        RemoveThread(waiting_threads[i]);
        waiting_threads[i]->SetArbiterWaitAddress(0);
        waiting_threads[i]->ResumeFromWait();
        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
    }
 }
-} // Anonymous namespace

 AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
 AddressArbiter::~AddressArbiter() = default;
@@ -67,23 +65,27 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {

 ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
                                                              s32 num_to_wake) {
+    auto& memory = system.Memory();
+
    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
+    if (!memory.IsValidVirtualAddress(address)) {
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (static_cast<s32>(Memory::Read32(address)) != value) {
+    if (static_cast<s32>(memory.Read32(address)) != value) {
        return ERR_INVALID_STATE;
    }

-    Memory::Write32(address, static_cast<u32>(value + 1));
+    memory.Write32(address, static_cast<u32>(value + 1));
    return SignalToAddressOnly(address, num_to_wake);
 }

 ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
                                                                         s32 num_to_wake) {
+    auto& memory = system.Memory();
+
    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
+    if (!memory.IsValidVirtualAddress(address)) {
        return ERR_INVALID_ADDRESS_STATE;
    }

@@ -109,11 +111,11 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
        }
    }

-    if (static_cast<s32>(Memory::Read32(address)) != value) {
+    if (static_cast<s32>(memory.Read32(address)) != value) {
        return ERR_INVALID_STATE;
    }

-    Memory::Write32(address, static_cast<u32>(updated_value));
+    memory.Write32(address, static_cast<u32>(updated_value));
    WakeThreads(waiting_threads, num_to_wake);
    return RESULT_SUCCESS;
 }
@@ -134,18 +136,20 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s

 ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
                                                    bool should_decrement) {
+    auto& memory = system.Memory();
+
    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
+    if (!memory.IsValidVirtualAddress(address)) {
        return ERR_INVALID_ADDRESS_STATE;
    }

-    const s32 cur_value = static_cast<s32>(Memory::Read32(address));
+    const s32 cur_value = static_cast<s32>(memory.Read32(address));
    if (cur_value >= value) {
        return ERR_INVALID_STATE;
    }

    if (should_decrement) {
-        Memory::Write32(address, static_cast<u32>(cur_value - 1));
+        memory.Write32(address, static_cast<u32>(cur_value - 1));
    }

    // Short-circuit without rescheduling, if timeout is zero.
@@ -157,15 +161,19 @@ ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s6
 }

 ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+    auto& memory = system.Memory();
+
    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
+    if (!memory.IsValidVirtualAddress(address)) {
        return ERR_INVALID_ADDRESS_STATE;
    }
+
    // Only wait for the address if equal.
-    if (static_cast<s32>(Memory::Read32(address)) != value) {
+    if (static_cast<s32>(memory.Read32(address)) != value) {
        return ERR_INVALID_STATE;
    }
-    // Short-circuit without rescheduling, if timeout is zero.
+
+    // Short-circuit without rescheduling if timeout is zero.
    if (timeout == 0) {
        return RESULT_TIMEOUT;
    }
@@ -176,6 +184,7 @@ ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 t
 ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
+    InsertThread(SharedFrom(current_thread));
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();
    current_thread->WakeAfterDelay(timeout);
@@ -184,26 +193,51 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
    return RESULT_TIMEOUT;
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
-    VAddr address) const {
+void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
+    RemoveThread(thread);
+    thread->SetArbiterWaitAddress(0);
+}

-    // Retrieve all threads that are waiting for this address.
-    std::vector<std::shared_ptr<Thread>> threads;
-    const auto& scheduler = system.GlobalScheduler();
-    const auto& thread_list = scheduler.GetThreadList();
-
-    for (const auto& thread : thread_list) {
-        if (thread->GetArbiterWaitAddress() == address) {
-            threads.push_back(thread);
+void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread->GetPriority() >= thread->GetPriority()) {
+            thread_list.insert(it, thread);
+            return;
        }
+        ++it;
    }
+    thread_list.push_back(std::move(thread));
+}

-    // Sort them by priority, such that the highest priority ones come first.
-    std::sort(threads.begin(), threads.end(),
-              [](const std::shared_ptr<Thread>& lhs, const std::shared_ptr<Thread>& rhs) {
-                  return lhs->GetPriority() < rhs->GetPriority();
-              });
+void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
+    const VAddr arb_addr = thread->GetArbiterWaitAddress();
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        const std::shared_ptr<Thread>& current_thread = *it;
+        if (current_thread.get() == thread.get()) {
+            thread_list.erase(it);
+            return;
+        }
+        ++it;
+    }
+    UNREACHABLE();
+}

-    return threads;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
+    std::vector<std::shared_ptr<Thread>> result;
+    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
+    auto it = thread_list.begin();
+    while (it != thread_list.end()) {
+        std::shared_ptr<Thread> current_thread = *it;
+        result.push_back(std::move(current_thread));
+        ++it;
+    }
+    return result;
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,10 +4,12 @@

 #pragma once

+#include <list>
+#include <memory>
+#include <unordered_map>
 #include <vector>

 #include "common/common_types.h"
-#include "core/hle/kernel/object.h"

 union ResultCode;

@@ -48,6 +50,9 @@ public:
    /// Waits on an address with a particular arbitration type.
    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);

+    /// Removes a thread from the container and resets its address arbiter adress to 0
+    void HandleWakeupThread(std::shared_ptr<Thread> thread);
+
 private:
    /// Signals an address being waited on.
    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
@@ -71,8 +76,20 @@ private:
    // Waits on the given address with a timeout in nanoseconds
    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);

+    /// Wake up num_to_wake (or all) threads in a vector.
+    void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
+
+    /// Insert a thread into the address arbiter container
+    void InsertThread(std::shared_ptr<Thread> thread);
+
+    /// Removes a thread from the address arbiter container
+    void RemoveThread(std::shared_ptr<Thread> thread);
+
    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+
+    /// List of threads waiting for a address arbiter
+    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;

    Core::System& system;
 };
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -8,7 +8,7 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
-#include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/session.h"

 namespace Kernel {

@@ -20,28 +20,23 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {
 }

 ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
-    // Note: Threads do not wait for the server endpoint to call
-    // AcceptSession before returning from this call.
-
    if (active_sessions >= max_sessions) {
        return ERR_MAX_CONNECTIONS_REACHED;
    }
    active_sessions++;

-    // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
-    auto [server, client] =
-        ServerSession::CreateSessionPair(kernel, server_port->GetName(), SharedFrom(this));
+    auto [client, server] = Kernel::Session::Create(kernel, name);

    if (server_port->HasHLEHandler()) {
-        server_port->GetHLEHandler()->ClientConnected(server);
+        server_port->GetHLEHandler()->ClientConnected(std::move(server));
    } else {
-        server_port->AppendPendingSession(server);
+        server_port->AppendPendingSession(std::move(server));
    }

    // Wake the threads waiting on the ServerPort
    server_port->WakeupAllWaitingThreads();

-    return MakeResult(client);
+    return MakeResult(std::move(client));
 }

 void ClientPort::ConnectionClosed() {
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <memory>
 #include <string>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/result.h"
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -1,4 +1,4 @@
-// Copyright 2016 Citra Emulator Project
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -12,24 +12,44 @@

 namespace Kernel {

-ClientSession::ClientSession(KernelCore& kernel) : Object{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {}
+
 ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
    // the emulated application.
-    if (parent->server) {
-        parent->server->ClientDisconnected();
+    if (parent->Server()) {
+        parent->Server()->ClientDisconnected();
    }
-
-    parent->client = nullptr;
 }

-ResultCode ClientSession::SendSyncRequest(Thread* thread) {
+bool ClientSession::ShouldWait(const Thread* thread) const {
+    UNIMPLEMENTED();
+    return {};
+}
+
+void ClientSession::Acquire(Thread* thread) {
+    UNIMPLEMENTED();
+}
+
+ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel,
+                                                                std::shared_ptr<Session> parent,
+                                                                std::string name) {
+    std::shared_ptr<ClientSession> client_session{std::make_shared<ClientSession>(kernel)};
+
+    client_session->name = std::move(name);
+    client_session->parent = std::move(parent);
+
+    return MakeResult(std::move(client_session));
+}
+
+ResultCode ClientSession::SendSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory) {
    // Keep ServerSession alive until we're done working with it.
-    if (parent->server == nullptr)
+    if (!parent->Server()) {
        return ERR_SESSION_CLOSED_BY_REMOTE;
+    }

    // Signal the server session that new data is available
-    return parent->server->HandleSyncRequest(SharedFrom(thread));
+    return parent->Server()->HandleSyncRequest(std::move(thread), memory);
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -1,4 +1,4 @@
-// Copyright 2016 Citra Emulator Project
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -6,23 +6,28 @@

 #include <memory>
 #include <string>
-#include "core/hle/kernel/object.h"
+
+#include "core/hle/kernel/wait_object.h"
+#include "core/hle/result.h"

 union ResultCode;

+namespace Memory {
+class Memory;
+}
+
 namespace Kernel {

 class KernelCore;
 class Session;
-class ServerSession;
 class Thread;

-class ClientSession final : public Object {
+class ClientSession final : public WaitObject {
 public:
    explicit ClientSession(KernelCore& kernel);
    ~ClientSession() override;

-    friend class ServerSession;
+    friend class Session;

    std::string GetTypeName() const override {
        return "ClientSession";
@@ -37,9 +42,17 @@ public:
        return HANDLE_TYPE;
    }

-    ResultCode SendSyncRequest(Thread* thread);
+    ResultCode SendSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory);
+
+    bool ShouldWait(const Thread* thread) const override;
+
+    void Acquire(Thread* thread) override;

 private:
+    static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel,
+                                                            std::shared_ptr<Session> parent,
+                                                            std::string name = "Unknown");
+
    /// The parent session, which links to the server endpoint.
    std::shared_ptr<Session> parent;

--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -6,6 +6,8 @@

 #include <array>
 #include <cstddef>
+#include <memory>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/result.h"
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -74,6 +74,8 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
        thread->WakeAfterDelay(timeout);
    }

+    is_thread_waiting = true;
+
    return writable_event;
 }

@@ -214,10 +216,11 @@ ResultCode HLERequestContext::PopulateFromIncomingCommandBuffer(const HandleTabl
 ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
    auto& owner_process = *thread.GetOwnerProcess();
    auto& handle_table = owner_process.GetHandleTable();
+    auto& memory = Core::System::GetInstance().Memory();

    std::array<u32, IPC::COMMAND_BUFFER_LENGTH> dst_cmdbuf;
-    Memory::ReadBlock(owner_process, thread.GetTLSAddress(), dst_cmdbuf.data(),
-                      dst_cmdbuf.size() * sizeof(u32));
+    memory.ReadBlock(owner_process, thread.GetTLSAddress(), dst_cmdbuf.data(),
+                     dst_cmdbuf.size() * sizeof(u32));

    // The header was already built in the internal command buffer. Attempt to parse it to verify
    // the integrity and then copy it over to the target command buffer.
@@ -273,8 +276,8 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
    }

    // Copy the translated command buffer back into the thread's command buffer area.
-    Memory::WriteBlock(owner_process, thread.GetTLSAddress(), dst_cmdbuf.data(),
-                       dst_cmdbuf.size() * sizeof(u32));
+    memory.WriteBlock(owner_process, thread.GetTLSAddress(), dst_cmdbuf.data(),
+                      dst_cmdbuf.size() * sizeof(u32));

    return RESULT_SUCCESS;
 }
@@ -282,15 +285,14 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
 std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
    std::vector<u8> buffer;
    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
+    auto& memory = Core::System::GetInstance().Memory();

    if (is_buffer_a) {
        buffer.resize(BufferDescriptorA()[buffer_index].Size());
-        Memory::ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(),
-                          buffer.size());
+        memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
    } else {
        buffer.resize(BufferDescriptorX()[buffer_index].Size());
-        Memory::ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(),
-                          buffer.size());
+        memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
    }

    return buffer;
@@ -311,10 +313,11 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
        size = buffer_size; // TODO(bunnei): This needs to be HW tested
    }

+    auto& memory = Core::System::GetInstance().Memory();
    if (is_buffer_b) {
-        Memory::WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
+        memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
    } else {
-        Memory::WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
+        memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
    }

    return size;
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -264,6 +264,18 @@ public:

    std::string Description() const;

+    Thread& GetThread() {
+        return *thread;
+    }
+
+    const Thread& GetThread() const {
+        return *thread;
+    }
+
+    bool IsThreadWaiting() const {
+        return is_thread_waiting;
+    }
+
 private:
    void ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, bool incoming);

@@ -290,6 +302,7 @@ private:
    u32_le command{};

    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+    bool is_thread_waiting{};
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -13,7 +13,6 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
-#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
@@ -79,9 +78,9 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
        }
    }

-    if (thread->GetArbiterWaitAddress() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitArb);
-        thread->SetArbiterWaitAddress(0);
+    if (thread->GetStatus() == ThreadStatus::WaitArb) {
+        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
+        address_arbiter.HandleWakeupThread(thread);
    }

    if (resume) {
@@ -139,12 +138,12 @@ struct KernelCore::Impl {

    void InitializeThreads() {
        thread_wakeup_event_type =
-            system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    void InitializePreemption() {
-        preemption_event = system.CoreTiming().RegisterEvent(
-            "PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
+        preemption_event =
+            Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
                global_scheduler.PreemptThreads();
                s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
                system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
@@ -154,6 +153,16 @@ struct KernelCore::Impl {
        system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
    }

+    void MakeCurrentProcess(Process* process) {
+        current_process = process;
+
+        if (process == nullptr) {
+            return;
+        }
+
+        system.Memory().SetCurrentPageTable(*process);
+    }
+
    std::atomic<u32> next_object_id{0};
    std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin};
    std::atomic<u64> next_user_process_id{Process::ProcessIDMin};
@@ -166,8 +175,9 @@ struct KernelCore::Impl {

    std::shared_ptr<ResourceLimit> system_resource_limit;

-    Core::Timing::EventType* thread_wakeup_event_type = nullptr;
-    Core::Timing::EventType* preemption_event = nullptr;
+    std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;
+    std::shared_ptr<Core::Timing::EventType> preemption_event;
+
    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
    // allowing us to simply use a pool index or similar.
    Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -207,13 +217,7 @@ void KernelCore::AppendNewProcess(std::shared_ptr<Process> process) {
 }

 void KernelCore::MakeCurrentProcess(Process* process) {
-    impl->current_process = process;
-
-    if (process == nullptr) {
-        return;
-    }
-
-    Memory::SetCurrentPageTable(*process);
+    impl->MakeCurrentProcess(process);
 }

 Process* KernelCore::CurrentProcess() {
@@ -269,7 +273,7 @@ u64 KernelCore::CreateNewUserProcessID() {
    return impl->next_user_process_id++;
 }

-Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
+const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const {
    return impl->thread_wakeup_event_type;
 }

--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -113,7 +114,7 @@ private:
    u64 CreateNewThreadID();

    /// Retrieves the event type used for thread wakeup callbacks.
-    Core::Timing::EventType* ThreadWakeupCallbackEventType() const;
+    const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const;

    /// Provides a reference to the thread wakeup callback handle table.
    Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <memory>
 #include <utility>
 #include <vector>

@@ -79,7 +80,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
    // thread.
    ASSERT(requesting_thread == current_thread);

-    const u32 addr_value = Memory::Read32(address);
+    const u32 addr_value = system.Memory().Read32(address);

    // If the mutex isn't being held, just return success.
    if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
@@ -117,7 +118,7 @@ ResultCode Mutex::Release(VAddr address) {

    // There are no more threads waiting for the mutex, release it completely.
    if (thread == nullptr) {
-        Memory::Write32(address, 0);
+        system.Memory().Write32(address, 0);
        return RESULT_SUCCESS;
    }

@@ -132,7 +133,7 @@ ResultCode Mutex::Release(VAddr address) {
    }

    // Grant the mutex to the next waiting thread and resume it.
-    Memory::Write32(address, mutex_value);
+    system.Memory().Write32(address, mutex_value);

    ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
    thread->ResumeFromWait();
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -27,6 +27,7 @@ bool Object::IsWaitable() const {
    case HandleType::ResourceLimit:
    case HandleType::ClientPort:
    case HandleType::ClientSession:
+    case HandleType::Session:
        return false;
    }

--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -29,6 +29,7 @@ enum class HandleType : u32 {
    ServerPort,
    ClientSession,
    ServerSession,
+    Session,
 };

 class Object : NonCopyable, public std::enable_shared_from_this<Object> {
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -5,6 +5,8 @@
 #pragma once

 #include <array>
+#include <memory>
+
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -458,7 +458,6 @@ void Scheduler::SwitchContext() {
        cpu_core.LoadContext(new_thread->GetContext());
        cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
        cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
-        cpu_core.ClearExclusiveState();
    } else {
        current_thread = nullptr;
        // Note: We do not reset the current process and current page table when idling because
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -4,11 +4,12 @@

 #pragma once

-#include <mutex>
+#include <atomic>
+#include <memory>
 #include <vector>
+
 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
-#include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"

 namespace Core {
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -1,4 +1,4 @@
-// Copyright 2016 Citra Emulator Project
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/core.h"
+#include "core/core_timing.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -19,36 +20,32 @@
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
 #include "core/hle/kernel/thread.h"
+#include "core/memory.h"

 namespace Kernel {

 ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {}
-ServerSession::~ServerSession() {
-    // This destructor will be called automatically when the last ServerSession handle is closed by
-    // the emulated application.
-
-    // Decrease the port's connection count.
-    if (parent->port) {
-        parent->port->ConnectionClosed();
-    }
-
-    parent->server = nullptr;
-}
+ServerSession::~ServerSession() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
+                                                                std::shared_ptr<Session> parent,
                                                                std::string name) {
-    std::shared_ptr<ServerSession> server_session = std::make_shared<ServerSession>(kernel);
+    std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)};

-    server_session->name = std::move(name);
-    server_session->parent = nullptr;
+    session->request_event = Core::Timing::CreateEvent(
+        name, [session](u64 userdata, s64 cycles_late) { session->CompleteSyncRequest(); });
+    session->name = std::move(name);
+    session->parent = std::move(parent);

-    return MakeResult(std::move(server_session));
+    return MakeResult(std::move(session));
 }

 bool ServerSession::ShouldWait(const Thread* thread) const {
    // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
-    if (parent->client == nullptr)
+    if (!parent->Client()) {
        return false;
+    }
+
    // Wait if we have no pending requests, or if we're currently handling a request.
    return pending_requesting_threads.empty() || currently_handling != nullptr;
 }
@@ -127,13 +124,21 @@ ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& con
    return RESULT_SUCCESS;
 }

-ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread) {
-    // The ServerSession received a sync request, this means that there's new data available
-    // from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
-    // similar.
-    Kernel::HLERequestContext context(SharedFrom(this), thread);
-    u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
-    context.PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
+ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory) {
+    u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(thread->GetTLSAddress()))};
+    std::shared_ptr<Kernel::HLERequestContext> context{
+        std::make_shared<Kernel::HLERequestContext>(SharedFrom(this), std::move(thread))};
+
+    context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
+    request_queue.Push(std::move(context));
+
+    return RESULT_SUCCESS;
+}
+
+ResultCode ServerSession::CompleteSyncRequest() {
+    ASSERT(!request_queue.Empty());
+
+    auto& context = *request_queue.Front();

    ResultCode result = RESULT_SUCCESS;
    // If the session has been converted to a domain, handle the domain request
@@ -145,61 +150,27 @@ ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread) {
        result = hle_handler->HandleSyncRequest(context);
    }

-    if (thread->GetStatus() == ThreadStatus::Running) {
-        // Put the thread to sleep until the server replies, it will be awoken in
-        // svcReplyAndReceive for LLE servers.
-        thread->SetStatus(ThreadStatus::WaitIPC);
-
-        if (hle_handler != nullptr) {
-            // For HLE services, we put the request threads to sleep for a short duration to
-            // simulate IPC overhead, but only if the HLE handler didn't put the thread to sleep for
-            // other reasons like an async callback. The IPC overhead is needed to prevent
-            // starvation when a thread only does sync requests to HLE services while a
-            // lower-priority thread is waiting to run.
-
-            // This delay was approximated in a homebrew application by measuring the average time
-            // it takes for svcSendSyncRequest to return when performing the SetLcdForceBlack IPC
-            // request to the GSP:GPU service in a n3DS with firmware 11.6. The measured values have
-            // a high variance and vary between models.
-            static constexpr u64 IPCDelayNanoseconds = 39000;
-            thread->WakeAfterDelay(IPCDelayNanoseconds);
-        } else {
-            // Add the thread to the list of threads that have issued a sync request with this
-            // server.
-            pending_requesting_threads.push_back(std::move(thread));
-        }
-    }
-
-    // If this ServerSession does not have an HLE implementation, just wake up the threads waiting
-    // on it.
-    WakeupAllWaitingThreads();
-
-    // Handle scenario when ConvertToDomain command was issued, as we must do the conversion at the
-    // end of the command such that only commands following this one are handled as domains
    if (convert_to_domain) {
        ASSERT_MSG(IsSession(), "ServerSession is already a domain instance.");
        domain_request_handlers = {hle_handler};
        convert_to_domain = false;
    }

+    // Some service requests require the thread to block
+    if (!context.IsThreadWaiting()) {
+        context.GetThread().ResumeFromWait();
+        context.GetThread().SetWaitSynchronizationResult(result);
+    }
+
+    request_queue.Pop();
+
    return result;
 }

-ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
-                                                            const std::string& name,
-                                                            std::shared_ptr<ClientPort> port) {
-    auto server_session = ServerSession::Create(kernel, name + "_Server").Unwrap();
-    std::shared_ptr<ClientSession> client_session = std::make_shared<ClientSession>(kernel);
-    client_session->name = name + "_Client";
-
-    std::shared_ptr<Session> parent(new Session);
-    parent->client = client_session.get();
-    parent->server = server_session.get();
-    parent->port = std::move(port);
-
-    client_session->parent = parent;
-    server_session->parent = parent;
-
-    return std::make_pair(std::move(server_session), std::move(client_session));
+ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
+                                            Memory::Memory& memory) {
+    Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {});
+    return QueueSyncRequest(std::move(thread), memory);
 }
+
 } // namespace Kernel
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -1,4 +1,4 @@
-// Copyright 2014 Citra Emulator Project
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -9,17 +9,22 @@
 #include <utility>
 #include <vector>

-#include "core/hle/kernel/object.h"
+#include "common/threadsafe_queue.h"
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"

+namespace Memory {
+class Memory;
+}
+
+namespace Core::Timing {
+struct EventType;
+}
+
 namespace Kernel {

-class ClientPort;
-class ClientSession;
 class HLERequestContext;
 class KernelCore;
-class ServerSession;
 class Session;
 class SessionRequestHandler;
 class Thread;
@@ -41,6 +46,12 @@ public:
    explicit ServerSession(KernelCore& kernel);
    ~ServerSession() override;

+    friend class Session;
+
+    static ResultVal<std::shared_ptr<ServerSession>> Create(KernelCore& kernel,
+                                                            std::shared_ptr<Session> parent,
+                                                            std::string name = "Unknown");
+
    std::string GetTypeName() const override {
        return "ServerSession";
    }
@@ -62,18 +73,6 @@ public:
        return parent.get();
    }

-    using SessionPair = std::pair<std::shared_ptr<ServerSession>, std::shared_ptr<ClientSession>>;
-
-    /**
-     * Creates a pair of ServerSession and an associated ClientSession.
-     * @param kernel      The kernal instance to create the session pair under.
-     * @param name        Optional name of the ports.
-     * @param client_port Optional The ClientPort that spawned this session.
-     * @return The created session tuple
-     */
-    static SessionPair CreateSessionPair(KernelCore& kernel, const std::string& name = "Unknown",
-                                         std::shared_ptr<ClientPort> client_port = nullptr);
-
    /**
     * Sets the HLE handler for the session. This handler will be called to service IPC requests
     * instead of the regular IPC machinery. (The regular IPC machinery is currently not
@@ -85,10 +84,13 @@ public:

    /**
     * Handle a sync request from the emulated application.
+     *
     * @param thread Thread that initiated the request.
+     * @param memory Memory context to handle the sync request under.
+     *
     * @returns ResultCode from the operation.
     */
-    ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread);
+    ResultCode HandleSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory);

    bool ShouldWait(const Thread* thread) const override;

@@ -121,15 +123,11 @@ public:
    }

 private:
-    /**
-     * Creates a server session. The server session can have an optional HLE handler,
-     * which will be invoked to handle the IPC requests that this session receives.
-     * @param kernel The kernel instance to create this server session under.
-     * @param name Optional name of the server session.
-     * @return The created server session
-     */
-    static ResultVal<std::shared_ptr<ServerSession>> Create(KernelCore& kernel,
-                                                            std::string name = "Unknown");
+    /// Queues a sync request from the emulated application.
+    ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Memory::Memory& memory);
+
+    /// Completes a sync request from the emulated application.
+    ResultCode CompleteSyncRequest();

    /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an
    /// object handle.
@@ -159,6 +157,12 @@ private:

    /// The name of this session (optional)
    std::string name;
+
+    /// Core timing event used to schedule the service request at some point in the future
+    std::shared_ptr<Core::Timing::EventType> request_event;
+
+    /// Queue of scheduled service requests
+    Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -1,12 +1,36 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
+#include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
-#include "core/hle/kernel/thread.h"

 namespace Kernel {

-Session::Session() {}
-Session::~Session() {}
+Session::Session(KernelCore& kernel) : WaitObject{kernel} {}
+Session::~Session() = default;
+
+Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
+    auto session{std::make_shared<Session>(kernel)};
+    auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
+    auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
+
+    session->name = std::move(name);
+    session->client = client_session;
+    session->server = server_session;
+
+    return std::make_pair(std::move(client_session), std::move(server_session));
+}
+
+bool Session::ShouldWait(const Thread* thread) const {
+    UNIMPLEMENTED();
+    return {};
+}
+
+void Session::Acquire(Thread* thread) {
+    UNIMPLEMENTED();
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -1,27 +1,64 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2019 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

 #pragma once

-#include "core/hle/kernel/object.h"
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "core/hle/kernel/wait_object.h"

 namespace Kernel {

 class ClientSession;
-class ClientPort;
 class ServerSession;

 /**
 * Parent structure to link the client and server endpoints of a session with their associated
- * client port. The client port need not exist, as is the case for portless sessions like the
- * FS File and Directory sessions. When one of the endpoints of a session is destroyed, its
- * corresponding field in this structure will be set to nullptr.
+ * client port.
 */
-class Session final {
+class Session final : public WaitObject {
 public:
-    ClientSession* client = nullptr;  ///< The client endpoint of the session.
-    ServerSession* server = nullptr;  ///< The server endpoint of the session.
-    std::shared_ptr<ClientPort> port; ///< The port that this session is associated with (optional).
+    explicit Session(KernelCore& kernel);
+    ~Session() override;
+
+    using SessionPair = std::pair<std::shared_ptr<ClientSession>, std::shared_ptr<ServerSession>>;
+
+    static SessionPair Create(KernelCore& kernel, std::string name = "Unknown");
+
+    std::string GetName() const override {
+        return name;
+    }
+
+    static constexpr HandleType HANDLE_TYPE = HandleType::Session;
+    HandleType GetHandleType() const override {
+        return HANDLE_TYPE;
+    }
+
+    bool ShouldWait(const Thread* thread) const override;
+
+    void Acquire(Thread* thread) override;
+
+    std::shared_ptr<ClientSession> Client() {
+        if (auto result{client.lock()}) {
+            return result;
+        }
+        return {};
+    }
+
+    std::shared_ptr<ServerSession> Server() {
+        if (auto result{server.lock()}) {
+            return result;
+        }
+        return {};
+    }
+
+private:
+    std::string name;
+    std::weak_ptr<ClientSession> client;
+    std::weak_ptr<ServerSession> server;
 };
+
 } // namespace Kernel
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -6,7 +6,6 @@

 #include <memory>
 #include <string>
-#include <vector>

 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -332,7 +332,9 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
 /// Connect to an OS service given the port name, returns the handle to the port to out
 static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
                                     VAddr port_name_address) {
-    if (!Memory::IsValidVirtualAddress(port_name_address)) {
+    auto& memory = system.Memory();
+
+    if (!memory.IsValidVirtualAddress(port_name_address)) {
        LOG_ERROR(Kernel_SVC,
                  "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
                  port_name_address);
@@ -341,7 +343,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,

    static constexpr std::size_t PortNameMaxLength = 11;
    // Read 1 char beyond the max allowed port name to detect names that are too long.
-    std::string port_name = Memory::ReadCString(port_name_address, PortNameMaxLength + 1);
+    const std::string port_name = memory.ReadCString(port_name_address, PortNameMaxLength + 1);
    if (port_name.size() > PortNameMaxLength) {
        LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
                  port_name.size());
@@ -379,11 +381,12 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {

    LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());

-    system.PrepareReschedule();
+    auto thread = system.CurrentScheduler().GetCurrentThread();
+    thread->InvalidateWakeupCallback();
+    thread->SetStatus(ThreadStatus::WaitIPC);
+    system.PrepareReschedule(thread->GetProcessorID());

-    // TODO(Subv): svcSendSyncRequest should put the caller thread to sleep while the server
-    // responds and cause a reschedule.
-    return session->SendSyncRequest(system.CurrentScheduler().GetCurrentThread());
+    return session->SendSyncRequest(SharedFrom(thread), system.Memory());
 }

 /// Get the ID for the specified thread.
@@ -452,7 +455,8 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
    LOG_TRACE(Kernel_SVC, "called handles_address=0x{:X}, handle_count={}, nano_seconds={}",
              handles_address, handle_count, nano_seconds);

-    if (!Memory::IsValidVirtualAddress(handles_address)) {
+    auto& memory = system.Memory();
+    if (!memory.IsValidVirtualAddress(handles_address)) {
        LOG_ERROR(Kernel_SVC,
                  "Handle address is not a valid virtual address, handle_address=0x{:016X}",
                  handles_address);
@@ -474,7 +478,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();

    for (u64 i = 0; i < handle_count; ++i) {
-        const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle));
+        const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
        const auto object = handle_table.Get<WaitObject>(handle);

        if (object == nullptr) {
@@ -616,13 +620,15 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
            return;
        }

+        auto& memory = system.Memory();
+
        // This typically is an error code so we're going to assume this is the case
        if (sz == sizeof(u32)) {
-            LOG_CRITICAL(Debug_Emulated, "debug_buffer_err_code={:X}", Memory::Read32(addr));
+            LOG_CRITICAL(Debug_Emulated, "debug_buffer_err_code={:X}", memory.Read32(addr));
        } else {
            // We don't know what's in here so we'll hexdump it
            debug_buffer.resize(sz);
-            Memory::ReadBlock(addr, debug_buffer.data(), sz);
+            memory.ReadBlock(addr, debug_buffer.data(), sz);
            std::string hexdump;
            for (std::size_t i = 0; i < debug_buffer.size(); i++) {
                hexdump += fmt::format("{:02X} ", debug_buffer[i]);
@@ -712,7 +718,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre
    }

    std::string str(len, '\0');
-    Memory::ReadBlock(address, str.data(), str.size());
+    system.Memory().ReadBlock(address, str.data(), str.size());
    LOG_DEBUG(Debug_Emulated, "{}", str);
 }

@@ -1115,7 +1121,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
        std::fill(ctx.vector_registers.begin() + 16, ctx.vector_registers.end(), u128{});
    }

-    Memory::WriteBlock(thread_context, &ctx, sizeof(ctx));
+    system.Memory().WriteBlock(thread_context, &ctx, sizeof(ctx));
    return RESULT_SUCCESS;
 }

@@ -1275,20 +1281,21 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
        return ERR_INVALID_HANDLE;
    }

+    auto& memory = system.Memory();
    const auto& vm_manager = process->VMManager();
    const MemoryInfo memory_info = vm_manager.QueryMemory(address);

-    Memory::Write64(memory_info_address, memory_info.base_address);
-    Memory::Write64(memory_info_address + 8, memory_info.size);
-    Memory::Write32(memory_info_address + 16, memory_info.state);
-    Memory::Write32(memory_info_address + 20, memory_info.attributes);
-    Memory::Write32(memory_info_address + 24, memory_info.permission);
-    Memory::Write32(memory_info_address + 32, memory_info.ipc_ref_count);
-    Memory::Write32(memory_info_address + 28, memory_info.device_ref_count);
-    Memory::Write32(memory_info_address + 36, 0);
+    memory.Write64(memory_info_address, memory_info.base_address);
+    memory.Write64(memory_info_address + 8, memory_info.size);
+    memory.Write32(memory_info_address + 16, memory_info.state);
+    memory.Write32(memory_info_address + 20, memory_info.attributes);
+    memory.Write32(memory_info_address + 24, memory_info.permission);
+    memory.Write32(memory_info_address + 32, memory_info.ipc_ref_count);
+    memory.Write32(memory_info_address + 28, memory_info.device_ref_count);
+    memory.Write32(memory_info_address + 36, 0);

    // Page info appears to be currently unused by the kernel and is always set to zero.
-    Memory::Write32(page_info_address, 0);
+    memory.Write32(page_info_address, 0);

    return RESULT_SUCCESS;
 }
@@ -1643,8 +1650,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 }

 /// Signal process wide key
-static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr,
-                                       s32 target) {
+static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_addr, s32 target) {
    LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
              condition_variable_addr, target);

@@ -1672,6 +1678,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var

        const std::size_t current_core = system.CurrentCoreIndex();
        auto& monitor = system.Monitor();
+        auto& memory = system.Memory();

        // Atomically read the value of the mutex.
        u32 mutex_val = 0;
@@ -1681,7 +1688,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            monitor.SetExclusive(current_core, mutex_address);

            // If the mutex is not yet acquired, acquire it.
-            mutex_val = Memory::Read32(mutex_address);
+            mutex_val = memory.Read32(mutex_address);

            if (mutex_val != 0) {
                update_val = mutex_val | Mutex::MutexHasWaitersFlag;
@@ -1718,8 +1725,6 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
            system.PrepareReschedule(thread->GetProcessorID());
        }
    }
-
-    return RESULT_SUCCESS;
 }

 // Wait for an address (via Address Arbiter)
@@ -1773,6 +1778,17 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

+static void KernelDebug([[maybe_unused]] Core::System& system,
+                        [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
+                        [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
+static void ChangeKernelTraceState([[maybe_unused]] Core::System& system,
+                                   [[maybe_unused]] u32 trace_state) {
+    // Intentionally do nothing, as this does nothing in released kernel binaries.
+}
+
 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick(Core::System& system) {
    LOG_TRACE(Kernel_SVC, "called");
@@ -2284,12 +2300,13 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
        return ERR_INVALID_ADDRESS_STATE;
    }

+    auto& memory = system.Memory();
    const auto& process_list = kernel.GetProcessList();
    const auto num_processes = process_list.size();
    const auto copy_amount = std::min(std::size_t{out_process_ids_size}, num_processes);

    for (std::size_t i = 0; i < copy_amount; ++i) {
-        Memory::Write64(out_process_ids, process_list[i]->GetProcessID());
+        memory.Write64(out_process_ids, process_list[i]->GetProcessID());
        out_process_ids += sizeof(u64);
    }

@@ -2323,13 +2340,14 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
        return ERR_INVALID_ADDRESS_STATE;
    }

+    auto& memory = system.Memory();
    const auto& thread_list = current_process->GetThreadList();
    const auto num_threads = thread_list.size();
    const auto copy_amount = std::min(std::size_t{out_thread_ids_size}, num_threads);

    auto list_iter = thread_list.cbegin();
    for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) {
-        Memory::Write64(out_thread_ids, (*list_iter)->GetThreadID());
+        memory.Write64(out_thread_ids, (*list_iter)->GetThreadID());
        out_thread_ids += sizeof(u64);
    }

@@ -2408,8 +2426,8 @@ static const FunctionDef SVC_Table[] = {
    {0x39, nullptr, "Unknown"},
    {0x3A, nullptr, "Unknown"},
    {0x3B, nullptr, "Unknown"},
-    {0x3C, nullptr, "DumpInfo"},
-    {0x3D, nullptr, "DumpInfoNew"},
+    {0x3C, SvcWrap<KernelDebug>, "KernelDebug"},
+    {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"},
    {0x3E, nullptr, "Unknown"},
    {0x3F, nullptr, "Unknown"},
    {0x40, nullptr, "CreateSession"},
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -112,11 +112,6 @@ void SvcWrap(Core::System& system) {
    FuncReturn(system, retval);
 }

-template <ResultCode func(Core::System&, u64, s32)>
-void SvcWrap(Core::System& system) {
-    FuncReturn(system, func(system, Param(system, 0), static_cast<s32>(Param(system, 1))).raw);
-}
-
 template <ResultCode func(Core::System&, u64, u32)>
 void SvcWrap(Core::System& system) {
    FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
@@ -311,11 +306,27 @@ void SvcWrap(Core::System& system) {
    func(system);
 }

+template <void func(Core::System&, u32)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)));
+}
+
+template <void func(Core::System&, u32, u64, u64, u64)>
+void SvcWrap(Core::System& system) {
+    func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
+         Param(system, 3));
+}
+
 template <void func(Core::System&, s64)>
 void SvcWrap(Core::System& system) {
    func(system, static_cast<s64>(Param(system, 0)));
 }

+template <void func(Core::System&, u64, s32)>
+void SvcWrap(Core::System& system) {
+    func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
+}
+
 template <void func(Core::System&, u64, u64)>
 void SvcWrap(Core::System& system) {
    func(system, Param(system, 0), Param(system, 1));
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -162,13 +162,13 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
        return ERR_INVALID_PROCESSOR_ID;
    }

-    if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
+    auto& system = Core::System::GetInstance();
+    if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) {
        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
        // TODO (bunnei): Find the correct error code to use here
        return RESULT_UNKNOWN;
    }

-    auto& system = Core::System::GetInstance();
    std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);

    thread->thread_id = kernel.CreateNewThreadID();
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -5,7 +5,6 @@
 #pragma once

 #include <memory>
-#include <vector>

 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/physical_memory.h"
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -16,7 +16,6 @@
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_setup.h"

 namespace Kernel {
 namespace {
@@ -786,19 +785,21 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
 }

 void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
+    auto& memory = system.Memory();
+
    switch (vma.type) {
    case VMAType::Free:
-        Memory::UnmapRegion(page_table, vma.base, vma.size);
+        memory.UnmapRegion(page_table, vma.base, vma.size);
        break;
    case VMAType::AllocatedMemoryBlock:
-        Memory::MapMemoryRegion(page_table, vma.base, vma.size,
-                                vma.backing_block->data() + vma.offset);
+        memory.MapMemoryRegion(page_table, vma.base, vma.size,
+                               vma.backing_block->data() + vma.offset);
        break;
    case VMAType::BackingMemory:
-        Memory::MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
+        memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
        break;
    case VMAType::MMIO:
-        Memory::MapIoRegion(page_table, vma.base, vma.size, vma.mmio_handler);
+        memory.MapIoRegion(page_table, vma.base, vma.size, vma.mmio_handler);
        break;
    }
 }
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -4,8 +4,9 @@

 #pragma once

+#include <memory>
 #include <vector>
-#include <boost/smart_ptr/intrusive_ptr.hpp>
+
 #include "core/hle/kernel/object.h"

 namespace Kernel {
--- a/src/core/hle/kernel/writable_event.h
+++ b/src/core/hle/kernel/writable_event.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <memory>
+
 #include "core/hle/kernel/object.h"

 namespace Kernel {
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -43,7 +43,8 @@ public:
    IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core,
              std::string&& device_name, std::string&& unique_name)
        : ServiceFramework("IAudioOut"), audio_core(audio_core),
-          device_name(std::move(device_name)), audio_params(audio_params) {
+          device_name(std::move(device_name)),
+          audio_params(audio_params), main_memory{system.Memory()} {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, &IAudioOut::GetAudioOutState, "GetAudioOutState"},
@@ -137,7 +138,7 @@ private:
        const u64 tag{rp.Pop<u64>()};

        std::vector<s16> samples(audio_buffer.buffer_size / sizeof(s16));
-        Memory::ReadBlock(audio_buffer.buffer, samples.data(), audio_buffer.buffer_size);
+        main_memory.ReadBlock(audio_buffer.buffer, samples.data(), audio_buffer.buffer_size);

        if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
            IPC::ResponseBuilder rb{ctx, 2};
@@ -209,6 +210,7 @@ private:

    /// This is the event handle used to check if the audio buffer was released
    Kernel::EventPair buffer_event;
+    Memory::Memory& main_memory;
 };

 AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} {
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -49,8 +49,9 @@ public:

        system_event =
            Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(
-            system.CoreTiming(), audren_params, system_event.writable, instance_number);
+        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(),
+                                                              audren_params, system_event.writable,
+                                                              instance_number);
    }

 private:
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -256,8 +256,8 @@ public:

        // TODO(DarkLordZach): Verify that this is the correct behavior.
        // Build entry index now to save time later.
-        BuildEntryIndex(entries, backend->GetFiles(), FileSys::File);
-        BuildEntryIndex(entries, backend->GetSubdirectories(), FileSys::Directory);
+        BuildEntryIndex(entries, backend->GetFiles(), FileSys::EntryType::File);
+        BuildEntryIndex(entries, backend->GetSubdirectories(), FileSys::EntryType::Directory);
    }

 private:
@@ -391,13 +391,10 @@ public:
    }

    void RenameFile(Kernel::HLERequestContext& ctx) {
-        std::vector<u8> buffer;
-        buffer.resize(ctx.BufferDescriptorX()[0].Size());
-        Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
+        std::vector<u8> buffer = ctx.ReadBuffer(0);
        const std::string src_name = Common::StringFromBuffer(buffer);

-        buffer.resize(ctx.BufferDescriptorX()[1].Size());
-        Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
+        buffer = ctx.ReadBuffer(1);
        const std::string dst_name = Common::StringFromBuffer(buffer);

        LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name);
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -77,15 +77,14 @@ IAppletResource::IAppletResource(Core::System& system)
    GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);

    // Register update callbacks
-    auto& core_timing = system.CoreTiming();
    pad_update_event =
-        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
+        Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
            UpdateControllers(userdata, cycles_late);
        });

    // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)

-    core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);
+    system.CoreTiming().ScheduleEvent(pad_update_ticks, pad_update_event);

    ReloadInputDevices();
 }
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -69,7 +69,7 @@ private:

    std::shared_ptr<Kernel::SharedMemory> shared_mem;

-    Core::Timing::EventType* pad_update_event;
+    std::shared_ptr<Core::Timing::EventType> pad_update_event;
    Core::System& system;

    std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -140,9 +140,10 @@ public:
            rb.Push(ERROR_INVALID_SIZE);
            return;
        }
+
        // Read NRR data from memory
        std::vector<u8> nrr_data(nrr_size);
-        Memory::ReadBlock(nrr_address, nrr_data.data(), nrr_size);
+        system.Memory().ReadBlock(nrr_address, nrr_data.data(), nrr_size);
        NRRHeader header;
        std::memcpy(&header, nrr_data.data(), sizeof(NRRHeader));

@@ -291,7 +292,7 @@ public:

        // Read NRO data from memory
        std::vector<u8> nro_data(nro_size);
-        Memory::ReadBlock(nro_address, nro_data.data(), nro_size);
+        system.Memory().ReadBlock(nro_address, nro_data.data(), nro_size);

        SHA256Hash hash{};
        mbedtls_sha256_ret(nro_data.data(), nro_data.size(), hash.data(), 0);
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -17,7 +17,8 @@ namespace Service::LM {

 class ILogger final : public ServiceFramework<ILogger> {
 public:
-    ILogger(Manager& manager) : ServiceFramework("ILogger"), manager(manager) {
+    explicit ILogger(Manager& manager_, Memory::Memory& memory_)
+        : ServiceFramework("ILogger"), manager{manager_}, memory{memory_} {
        static const FunctionInfo functions[] = {
            {0, &ILogger::Log, "Log"},
            {1, &ILogger::SetDestination, "SetDestination"},
@@ -35,15 +36,15 @@ private:
        MessageHeader header{};
        VAddr addr{ctx.BufferDescriptorX()[0].Address()};
        const VAddr end_addr{addr + ctx.BufferDescriptorX()[0].size};
-        Memory::ReadBlock(addr, &header, sizeof(MessageHeader));
+        memory.ReadBlock(addr, &header, sizeof(MessageHeader));
        addr += sizeof(MessageHeader);

        FieldMap fields;
        while (addr < end_addr) {
-            const auto field = static_cast<Field>(Memory::Read8(addr++));
-            const auto length = Memory::Read8(addr++);
+            const auto field = static_cast<Field>(memory.Read8(addr++));
+            const auto length = memory.Read8(addr++);

-            if (static_cast<Field>(Memory::Read8(addr)) == Field::Skip) {
+            if (static_cast<Field>(memory.Read8(addr)) == Field::Skip) {
                ++addr;
            }

@@ -54,7 +55,7 @@ private:
            }

            std::vector<u8> data(length);
-            Memory::ReadBlock(addr, data.data(), length);
+            memory.ReadBlock(addr, data.data(), length);
            fields.emplace(field, std::move(data));
        }

@@ -74,11 +75,13 @@ private:
    }

    Manager& manager;
+    Memory::Memory& memory;
 };

 class LM final : public ServiceFramework<LM> {
 public:
-    explicit LM(Manager& manager) : ServiceFramework{"lm"}, manager(manager) {
+    explicit LM(Manager& manager_, Memory::Memory& memory_)
+        : ServiceFramework{"lm"}, manager{manager_}, memory{memory_} {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, &LM::OpenLogger, "OpenLogger"},
@@ -94,14 +97,16 @@ private:

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ILogger>(manager);
+        rb.PushIpcInterface<ILogger>(manager, memory);
    }

    Manager& manager;
+    Memory::Memory& memory;
 };

 void InstallInterfaces(Core::System& system) {
-    std::make_shared<LM>(system.GetLogManager())->InstallAsService(system.ServiceManager());
+    std::make_shared<LM>(system.GetLogManager(), system.Memory())
+        ->InstallAsService(system.ServiceManager());
 }

 } // namespace Service::LM
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -189,7 +189,7 @@ private:
        LOG_DEBUG(Service_NFP, "called");

        auto nfc_event = nfp_interface.GetNFCEvent();
-        if (!nfc_event->ShouldWait(Kernel::GetCurrentThread()) && !has_attached_handle) {
+        if (!nfc_event->ShouldWait(&ctx.GetThread()) && !has_attached_handle) {
            device_state = DeviceState::TagFound;
            nfc_event->Clear();
        }
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -191,8 +191,8 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
        std::memcpy(entries.data(), input2.data(),
                    params.num_entries * sizeof(Tegra::CommandListHeader));
    } else {
-        Memory::ReadBlock(params.address, entries.data(),
-                          params.num_entries * sizeof(Tegra::CommandListHeader));
+        system.Memory().ReadBlock(params.address, entries.data(),
+                                  params.num_entries * sizeof(Tegra::CommandListHeader));
    }
    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -37,8 +37,8 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
    displays.emplace_back(4, "Null", system);

    // Schedule the screen composition events
-    composition_event = system.CoreTiming().RegisterEvent(
-        "ScreenComposition", [this](u64 userdata, s64 cycles_late) {
+    composition_event =
+        Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
            Compose();
            const auto ticks =
                Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -103,7 +103,7 @@ private:
    u32 swap_interval = 1;

    /// Event that handles screen composition.
-    Core::Timing::EventType* composition_event;
+    std::shared_ptr<Core::Timing::EventType> composition_event;

    Core::System& system;
 };
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -186,7 +186,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
        UNIMPLEMENTED_MSG("command_type={}", static_cast<int>(context.GetCommandType()));
    }

-    context.WriteToOutgoingCommandBuffer(*Kernel::GetCurrentThread());
+    context.WriteToOutgoingCommandBuffer(context.GetThread());

    return RESULT_SUCCESS;
 }
@@ -201,7 +201,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system);
    system.GetFileSystemController().CreateFactories(*system.GetFilesystem(), false);

-    SM::ServiceManager::InstallInterfaces(sm);
+    SM::ServiceManager::InstallInterfaces(sm, system.Kernel());

    Account::InstallInterfaces(system);
    AM::InstallInterfaces(*sm, nv_flinger, system);
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,10 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
    rb.Push(RESULT_SUCCESS);
-    std::shared_ptr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
-    rb.PushMoveObjects(session);
-
-    LOG_DEBUG(Service, "session={}", session->GetObjectId());
+    rb.PushMoveObjects(ctx.Session()->GetParent()->Client());
 }

 void Controller::DuplicateSessionEx(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -36,10 +36,11 @@ static ResultCode ValidateServiceName(const std::string& name) {
    return RESULT_SUCCESS;
 }

-void ServiceManager::InstallInterfaces(std::shared_ptr<ServiceManager> self) {
+void ServiceManager::InstallInterfaces(std::shared_ptr<ServiceManager> self,
+                                       Kernel::KernelCore& kernel) {
    ASSERT(self->sm_interface.expired());

-    auto sm = std::make_shared<SM>(self);
+    auto sm = std::make_shared<SM>(self, kernel);
    sm->InstallAsNamedPort();
    self->sm_interface = sm;
    self->controller_interface = std::make_unique<Controller>();
@@ -114,8 +115,6 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {

    std::string name(name_buf.begin(), end);

-    // TODO(yuriks): Permission checks go here
-
    auto client_port = service_manager->GetServicePort(name);
    if (client_port.Failed()) {
        IPC::ResponseBuilder rb{ctx, 2};
@@ -127,14 +126,22 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
        return;
    }

-    auto session = client_port.Unwrap()->Connect();
-    ASSERT(session.Succeeded());
-    if (session.Succeeded()) {
-        LOG_DEBUG(Service_SM, "called service={} -> session={}", name, (*session)->GetObjectId());
-        IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
-        rb.Push(session.Code());
-        rb.PushMoveObjects(std::move(session).Unwrap());
+    auto [client, server] = Kernel::Session::Create(kernel, name);
+
+    const auto& server_port = client_port.Unwrap()->GetServerPort();
+    if (server_port->GetHLEHandler()) {
+        server_port->GetHLEHandler()->ClientConnected(server);
+    } else {
+        server_port->AppendPendingSession(server);
    }
+
+    // Wake the threads waiting on the ServerPort
+    server_port->WakeupAllWaitingThreads();
+
+    LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushMoveObjects(std::move(client));
 }

 void SM::RegisterService(Kernel::HLERequestContext& ctx) {
@@ -178,8 +185,8 @@ void SM::UnregisterService(Kernel::HLERequestContext& ctx) {
    rb.Push(service_manager->UnregisterService(name));
 }

-SM::SM(std::shared_ptr<ServiceManager> service_manager)
-    : ServiceFramework("sm:", 4), service_manager(std::move(service_manager)) {
+SM::SM(std::shared_ptr<ServiceManager> service_manager, Kernel::KernelCore& kernel)
+    : ServiceFramework{"sm:", 4}, service_manager{std::move(service_manager)}, kernel{kernel} {
    static const FunctionInfo functions[] = {
        {0x00000000, &SM::Initialize, "Initialize"},
        {0x00000001, &SM::GetService, "GetService"},
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -18,6 +18,7 @@
 namespace Kernel {
 class ClientPort;
 class ClientSession;
+class KernelCore;
 class ServerPort;
 class SessionRequestHandler;
 } // namespace Kernel
@@ -29,7 +30,7 @@ class Controller;
 /// Interface to "sm:" service
 class SM final : public ServiceFramework<SM> {
 public:
-    explicit SM(std::shared_ptr<ServiceManager> service_manager);
+    explicit SM(std::shared_ptr<ServiceManager> service_manager, Kernel::KernelCore& kernel);
    ~SM() override;

 private:
@@ -39,11 +40,12 @@ private:
    void UnregisterService(Kernel::HLERequestContext& ctx);

    std::shared_ptr<ServiceManager> service_manager;
+    Kernel::KernelCore& kernel;
 };

 class ServiceManager {
 public:
-    static void InstallInterfaces(std::shared_ptr<ServiceManager> self);
+    static void InstallInterfaces(std::shared_ptr<ServiceManager> self, Kernel::KernelCore& kernel);

    ServiceManager();
    ~ServiceManager();
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,8 +5,18 @@
 #pragma once

 #include <cstddef>
+#include <memory>
 #include <string>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}
+
+namespace Core {
+class System;
+}

 namespace Kernel {
 class Process;
@@ -36,41 +46,369 @@ enum : VAddr {
    KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };

-/// Changes the currently active page table to that of
-/// the given process instance.
-void SetCurrentPageTable(Kernel::Process& process);
+/// Central class that handles all memory operations and state.
+class Memory {
+public:
+    explicit Memory(Core::System& system);
+    ~Memory();
+
+    Memory(const Memory&) = delete;
+    Memory& operator=(const Memory&) = delete;
+
+    Memory(Memory&&) = default;
+    Memory& operator=(Memory&&) = default;
+
+    /**
+     * Changes the currently active page table to that of the given process instance.
+     *
+     * @param process The process to use the page table of.
+     */
+    void SetCurrentPageTable(Kernel::Process& process);
+
+    /**
+     * Maps an allocated buffer onto a region of the emulated process address space.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The address to start mapping at. Must be page-aligned.
+     * @param size       The amount of bytes to map. Must be page-aligned.
+     * @param target     Buffer with the memory backing the mapping. Must be of length at least
+     *                   `size`.
+     */
+    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
+
+    /**
+     * Maps a region of the emulated process address space as a IO region.
+     *
+     * @param page_table   The page table of the emulated process.
+     * @param base         The address to start mapping at. Must be page-aligned.
+     * @param size         The amount of bytes to map. Must be page-aligned.
+     * @param mmio_handler The handler that backs the mapping.
+     */
+    void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer mmio_handler);
+
+    /**
+     * Unmaps a region of the emulated process address space.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The address to begin unmapping at.
+     * @param size       The amount of bytes to unmap.
+     */
+    void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
+
+    /**
+     * Adds a memory hook to intercept reads and writes to given region of memory.
+     *
+     * @param page_table The page table of the emulated process
+     * @param base       The starting address to apply the hook to.
+     * @param size       The size of the memory region to apply the hook to, in bytes.
+     * @param hook       The hook to apply to the region of memory.
+     */
+    void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                      Common::MemoryHookPointer hook);
+
+    /**
+     * Removes a memory hook from a given range of memory.
+     *
+     * @param page_table The page table of the emulated process.
+     * @param base       The starting address to remove the hook from.
+     * @param size       The size of the memory region to remove the hook from, in bytes.
+     * @param hook       The hook to remove from the specified region of memory.
+     */
+    void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                         Common::MemoryHookPointer hook);
+
+    /**
+     * Checks whether or not the supplied address is a valid virtual
+     * address for the given process.
+     *
+     * @param process The emulated process to check the address against.
+     * @param vaddr   The virtual address to check the validity of.
+     *
+     * @returns True if the given virtual address is valid, false otherwise.
+     */
+    bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr) const;
+
+    /**
+     * Checks whether or not the supplied address is a valid virtual
+     * address for the current process.
+     *
+     * @param vaddr The virtual address to check the validity of.
+     *
+     * @returns True if the given virtual address is valid, false otherwise.
+     */
+    bool IsValidVirtualAddress(VAddr vaddr) const;
+
+    /**
+     * Gets a pointer to the given address.
+     *
+     * @param vaddr Virtual address to retrieve a pointer to.
+     *
+     * @returns The pointer to the given address, if the address is valid.
+     *          If the address is not valid, nullptr will be returned.
+     */
+    u8* GetPointer(VAddr vaddr);
+
+    /**
+     * Gets a pointer to the given address.
+     *
+     * @param vaddr Virtual address to retrieve a pointer to.
+     *
+     * @returns The pointer to the given address, if the address is valid.
+     *          If the address is not valid, nullptr will be returned.
+     */
+    const u8* GetPointer(VAddr vaddr) const;
+
+    /**
+     * Reads an 8-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 8-bit value from.
+     *
+     * @returns the read 8-bit unsigned value.
+     */
+    u8 Read8(VAddr addr);
+
+    /**
+     * Reads a 16-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 16-bit value from.
+     *
+     * @returns the read 16-bit unsigned value.
+     */
+    u16 Read16(VAddr addr);
+
+    /**
+     * Reads a 32-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 32-bit value from.
+     *
+     * @returns the read 32-bit unsigned value.
+     */
+    u32 Read32(VAddr addr);
+
+    /**
+     * Reads a 64-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 64-bit value from.
+     *
+     * @returns the read 64-bit value.
+     */
+    u64 Read64(VAddr addr);
+
+    /**
+     * Writes an 8-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 8-bit unsigned integer to.
+     * @param data The 8-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory at the given virtual address contains the specified data value.
+     */
+    void Write8(VAddr addr, u8 data);
+
+    /**
+     * Writes a 16-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 16-bit unsigned integer to.
+     * @param data The 16-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    void Write16(VAddr addr, u16 data);
+
+    /**
+     * Writes a 32-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 32-bit unsigned integer to.
+     * @param data The 32-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    void Write32(VAddr addr, u32 data);
+
+    /**
+     * Writes a 64-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 64-bit unsigned integer to.
+     * @param data The 64-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    void Write64(VAddr addr, u64 data);
+
+    /**
+     * Reads a null-terminated string from the given virtual address.
+     * This function will continually read characters until either:
+     *
+     * - A null character ('\0') is reached.
+     * - max_length characters have been read.
+     *
+     * @note The final null-terminating character (if found) is not included
+     *       in the returned string.
+     *
+     * @param vaddr      The address to begin reading the string from.
+     * @param max_length The maximum length of the string to read in characters.
+     *
+     * @returns The read string.
+     */
+    std::string ReadCString(VAddr vaddr, std::size_t max_length);
+
+    /**
+     * Reads a contiguous block of bytes from a specified process' address space.
+     *
+     * @param process     The process to read the data from.
+     * @param src_addr    The virtual address to begin reading from.
+     * @param dest_buffer The buffer to place the read bytes into.
+     * @param size        The amount of data to read, in bytes.
+     *
+     * @note If a size of 0 is specified, then this function reads nothing and
+     *       no attempts to access memory are made at all.
+     *
+     * @pre dest_buffer must be at least size bytes in length, otherwise a
+     *      buffer overrun will occur.
+     *
+     * @post The range [dest_buffer, size) contains the read bytes from the
+     *       process' address space.
+     */
+    void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
+                   std::size_t size);
+
+    /**
+     * Reads a contiguous block of bytes from the current process' address space.
+     *
+     * @param src_addr    The virtual address to begin reading from.
+     * @param dest_buffer The buffer to place the read bytes into.
+     * @param size        The amount of data to read, in bytes.
+     *
+     * @note If a size of 0 is specified, then this function reads nothing and
+     *       no attempts to access memory are made at all.
+     *
+     * @pre dest_buffer must be at least size bytes in length, otherwise a
+     *      buffer overrun will occur.
+     *
+     * @post The range [dest_buffer, size) contains the read bytes from the
+     *       current process' address space.
+     */
+    void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
+
+    /**
+     * Writes a range of bytes into a given process' address space at the specified
+     * virtual address.
+     *
+     * @param process    The process to write data into the address space of.
+     * @param dest_addr  The destination virtual address to begin writing the data at.
+     * @param src_buffer The data to write into the process' address space.
+     * @param size       The size of the data to write, in bytes.
+     *
+     * @post The address range [dest_addr, size) in the process' address space
+     *       contains the data that was within src_buffer.
+     *
+     * @post If an attempt is made to write into an unmapped region of memory, the writes
+     *       will be ignored and an error will be logged.
+     *
+     * @post If a write is performed into a region of memory that is considered cached
+     *       rasterizer memory, will cause the currently active rasterizer to be notified
+     *       and will mark that region as invalidated to caches that the active
+     *       graphics backend may be maintaining over the course of execution.
+     */
+    void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
+                    std::size_t size);
+
+    /**
+     * Writes a range of bytes into the current process' address space at the specified
+     * virtual address.
+     *
+     * @param dest_addr  The destination virtual address to begin writing the data at.
+     * @param src_buffer The data to write into the current process' address space.
+     * @param size       The size of the data to write, in bytes.
+     *
+     * @post The address range [dest_addr, size) in the current process' address space
+     *       contains the data that was within src_buffer.
+     *
+     * @post If an attempt is made to write into an unmapped region of memory, the writes
+     *       will be ignored and an error will be logged.
+     *
+     * @post If a write is performed into a region of memory that is considered cached
+     *       rasterizer memory, will cause the currently active rasterizer to be notified
+     *       and will mark that region as invalidated to caches that the active
+     *       graphics backend may be maintaining over the course of execution.
+     */
+    void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
+
+    /**
+     * Fills the specified address range within a process' address space with zeroes.
+     *
+     * @param process   The process that will have a portion of its memory zeroed out.
+     * @param dest_addr The starting virtual address of the range to zero out.
+     * @param size      The size of the address range to zero out, in bytes.
+     *
+     * @post The range [dest_addr, size) within the process' address space is
+     *       filled with zeroes.
+     */
+    void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size);
+
+    /**
+     * Fills the specified address range within the current process' address space with zeroes.
+     *
+     * @param dest_addr The starting virtual address of the range to zero out.
+     * @param size      The size of the address range to zero out, in bytes.
+     *
+     * @post The range [dest_addr, size) within the current process' address space is
+     *       filled with zeroes.
+     */
+    void ZeroBlock(VAddr dest_addr, std::size_t size);
+
+    /**
+     * Copies data within a process' address space to another location within the
+     * same address space.
+     *
+     * @param process   The process that will have data copied within its address space.
+     * @param dest_addr The destination virtual address to begin copying the data into.
+     * @param src_addr  The source virtual address to begin copying the data from.
+     * @param size      The size of the data to copy, in bytes.
+     *
+     * @post The range [dest_addr, size) within the process' address space contains the
+     *       same data within the range [src_addr, size).
+     */
+    void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
+                   std::size_t size);
+
+    /**
+     * Copies data within the current process' address space to another location within the
+     * same address space.
+     *
+     * @param dest_addr The destination virtual address to begin copying the data into.
+     * @param src_addr  The source virtual address to begin copying the data from.
+     * @param size      The size of the data to copy, in bytes.
+     *
+     * @post The range [dest_addr, size) within the current process' address space
+     *       contains the same data within the range [src_addr, size).
+     */
+    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
+
+    /**
+     * Marks each page within the specified address range as cached or uncached.
+     *
+     * @param vaddr  The virtual address indicating the start of the address range.
+     * @param size   The size of the address range in bytes.
+     * @param cached Whether or not any pages within the address range should be
+     *               marked as cached or uncached.
+     */
+    void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl;
+};

-/// Determines if the given VAddr is valid for the specified process.
-bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
-bool IsValidVirtualAddress(VAddr vaddr);
 /// Determines if the given VAddr is a kernel address
 bool IsKernelVirtualAddress(VAddr vaddr);

-u8 Read8(VAddr addr);
-u16 Read16(VAddr addr);
-u32 Read32(VAddr addr);
-u64 Read64(VAddr addr);
-
-void Write8(VAddr addr, u8 data);
-void Write16(VAddr addr, u16 data);
-void Write32(VAddr addr, u32 data);
-void Write64(VAddr addr, u64 data);
-
-void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, std::size_t size);
-void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
-void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
-                std::size_t size);
-void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
-void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size);
-void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
-
-u8* GetPointer(VAddr vaddr);
-
-std::string ReadCString(VAddr vaddr, std::size_t max_length);
-
-/**
- * Mark each page touching the region as cached.
- */
-void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
-
 } // namespace Memory
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -20,18 +20,17 @@ namespace Memory {
 constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;

-StandardVmCallbacks::StandardVmCallbacks(const Core::System& system,
-                                         const CheatProcessMetadata& metadata)
+StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
    : metadata(metadata), system(system) {}

 StandardVmCallbacks::~StandardVmCallbacks() = default;

 void StandardVmCallbacks::MemoryRead(VAddr address, void* data, u64 size) {
-    ReadBlock(SanitizeAddress(address), data, size);
+    system.Memory().ReadBlock(SanitizeAddress(address), data, size);
 }

 void StandardVmCallbacks::MemoryWrite(VAddr address, const void* data, u64 size) {
-    WriteBlock(SanitizeAddress(address), data, size);
+    system.Memory().WriteBlock(SanitizeAddress(address), data, size);
 }

 u64 StandardVmCallbacks::HidKeysDown() {
@@ -186,7 +185,7 @@ CheatEngine::~CheatEngine() {
 }

 void CheatEngine::Initialize() {
-    event = core_timing.RegisterEvent(
+    event = Core::Timing::CreateEvent(
        "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id),
        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
--- a/src/core/memory/cheat_engine.h
+++ b/src/core/memory/cheat_engine.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <atomic>
+#include <memory>
 #include <vector>
 #include "common/common_types.h"
 #include "core/memory/dmnt_cheat_types.h"
@@ -23,7 +24,7 @@ namespace Memory {

 class StandardVmCallbacks : public DmntCheatVm::Callbacks {
 public:
-    StandardVmCallbacks(const Core::System& system, const CheatProcessMetadata& metadata);
+    StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata);
    ~StandardVmCallbacks() override;

    void MemoryRead(VAddr address, void* data, u64 size) override;
@@ -36,7 +37,7 @@ private:
    VAddr SanitizeAddress(VAddr address) const;

    const CheatProcessMetadata& metadata;
-    const Core::System& system;
+    Core::System& system;
 };

 // Intermediary class that parses a text file or other disk format for storing cheats into a
@@ -78,7 +79,7 @@ private:
    std::vector<CheatEntry> cheats;
    std::atomic_bool is_pending_reload{false};

-    Core::Timing::EventType* event{};
+    std::shared_ptr<Core::Timing::EventType> event;
    Core::Timing::CoreTiming& core_timing;
    Core::System& system;
 };
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -1,43 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-#include "common/memory_hook.h"
-
-namespace Common {
-struct PageTable;
-}
-
-namespace Memory {
-
-/**
- * Maps an allocated buffer onto a region of the emulated process address space.
- *
- * @param page_table The page table of the emulated process.
- * @param base The address to start mapping at. Must be page-aligned.
- * @param size The amount of bytes to map. Must be page-aligned.
- * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
- */
-void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
-
-/**
- * Maps a region of the emulated process address space as a IO region.
- * @param page_table The page table of the emulated process.
- * @param base The address to start mapping at. Must be page-aligned.
- * @param size The amount of bytes to map. Must be page-aligned.
- * @param mmio_handler The handler that backs the mapping.
- */
-void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
-                 Common::MemoryHookPointer mmio_handler);
-
-void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
-
-void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                  Common::MemoryHookPointer hook);
-void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                     Common::MemoryHookPointer hook);
-
-} // namespace Memory
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -147,7 +147,7 @@ json GetFullDataAuto(const std::string& timestamp, u64 title_id, Core::System& s
 }

 template <bool read_value, typename DescriptorType>
-json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer) {
+json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer, Memory::Memory& memory) {
    auto buffer_out = json::array();
    for (const auto& desc : buffer) {
        auto entry = json{
@@ -157,7 +157,7 @@ json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer) {

        if constexpr (read_value) {
            std::vector<u8> data(desc.Size());
-            Memory::ReadBlock(desc.Address(), data.data(), desc.Size());
+            memory.ReadBlock(desc.Address(), data.data(), desc.Size());
            entry["data"] = Common::HexToString(data);
        }

@@ -167,7 +167,7 @@ json GetHLEBufferDescriptorData(const std::vector<DescriptorType>& buffer) {
    return buffer_out;
 }

-json GetHLERequestContextData(Kernel::HLERequestContext& ctx) {
+json GetHLERequestContextData(Kernel::HLERequestContext& ctx, Memory::Memory& memory) {
    json out;

    auto cmd_buf = json::array();
@@ -177,10 +177,10 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx) {

    out["command_buffer"] = std::move(cmd_buf);

-    out["buffer_descriptor_a"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorA());
-    out["buffer_descriptor_b"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorB());
-    out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC());
-    out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX());
+    out["buffer_descriptor_a"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorA(), memory);
+    out["buffer_descriptor_b"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorB(), memory);
+    out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC(), memory);
+    out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX(), memory);

    return out;
 }
@@ -259,7 +259,7 @@ void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u
    const auto title_id = system.CurrentProcess()->GetTitleID();
    auto out = GetFullDataAuto(timestamp, title_id, system);

-    auto function_out = GetHLERequestContextData(ctx);
+    auto function_out = GetHLERequestContextData(ctx, system.Memory());
    function_out["command_id"] = command_id;
    function_out["function_name"] = name;
    function_out["service_name"] = service_name;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -165,24 +165,20 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
    Telemetry::AppendOSInfo(field_collection);

    // Log user configuration information
-    AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
-    AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
-             Settings::values.enable_audio_stretching);
-    AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
-             Settings::values.use_multi_core);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
-             Settings::values.resolution_factor);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
-             Settings::values.use_frame_limit);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
-             Settings::values.use_disk_shader_cache);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
+    constexpr auto field_type = Telemetry::FieldType::UserConfig;
+    AddField(field_type, "Audio_SinkId", Settings::values.sink_id);
+    AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
+    AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core);
+    AddField(field_type, "Renderer_Backend", "OpenGL");
+    AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor);
+    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
+    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
+    AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
+    AddField(field_type, "Renderer_UseAccurateGpuEmulation",
             Settings::values.use_accurate_gpu_emulation);
-    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
+    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation);
-    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
-             Settings::values.use_docked_mode);
+    AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
 }

 bool TelemetrySession::SubmitTestcase() {
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -11,40 +11,39 @@
 #include "core/tools/freezer.h"

 namespace Tools {
-
 namespace {

 constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);

-u64 MemoryReadWidth(u32 width, VAddr addr) {
+u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) {
    switch (width) {
    case 1:
-        return Memory::Read8(addr);
+        return memory.Read8(addr);
    case 2:
-        return Memory::Read16(addr);
+        return memory.Read16(addr);
    case 4:
-        return Memory::Read32(addr);
+        return memory.Read32(addr);
    case 8:
-        return Memory::Read64(addr);
+        return memory.Read64(addr);
    default:
        UNREACHABLE();
        return 0;
    }
 }

-void MemoryWriteWidth(u32 width, VAddr addr, u64 value) {
+void MemoryWriteWidth(Memory::Memory& memory, u32 width, VAddr addr, u64 value) {
    switch (width) {
    case 1:
-        Memory::Write8(addr, static_cast<u8>(value));
+        memory.Write8(addr, static_cast<u8>(value));
        break;
    case 2:
-        Memory::Write16(addr, static_cast<u16>(value));
+        memory.Write16(addr, static_cast<u16>(value));
        break;
    case 4:
-        Memory::Write32(addr, static_cast<u32>(value));
+        memory.Write32(addr, static_cast<u32>(value));
        break;
    case 8:
-        Memory::Write64(addr, value);
+        memory.Write64(addr, value);
        break;
    default:
        UNREACHABLE();
@@ -53,8 +52,9 @@ void MemoryWriteWidth(u32 width, VAddr addr, u64 value) {

 } // Anonymous namespace

-Freezer::Freezer(Core::Timing::CoreTiming& core_timing) : core_timing(core_timing) {
-    event = core_timing.RegisterEvent(
+Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Memory::Memory& memory_)
+    : core_timing{core_timing_}, memory{memory_} {
+    event = Core::Timing::CreateEvent(
        "MemoryFreezer::FrameCallback",
        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event);
@@ -89,7 +89,7 @@ void Freezer::Clear() {
 u64 Freezer::Freeze(VAddr address, u32 width) {
    std::lock_guard lock{entries_mutex};

-    const auto current_value = MemoryReadWidth(width, address);
+    const auto current_value = MemoryReadWidth(memory, width, address);
    entries.push_back({address, width, current_value});

    LOG_DEBUG(Common_Memory,
@@ -169,7 +169,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
        LOG_DEBUG(Common_Memory,
                  "Enforcing memory freeze at address={:016X}, value={:016X}, width={:02X}",
                  entry.address, entry.value, entry.width);
-        MemoryWriteWidth(entry.width, entry.address, entry.value);
+        MemoryWriteWidth(memory, entry.width, entry.address, entry.value);
    }

    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event);
@@ -181,7 +181,7 @@ void Freezer::FillEntryReads() {
    LOG_DEBUG(Common_Memory, "Updating memory freeze entries to current values.");

    for (auto& entry : entries) {
-        entry.value = MemoryReadWidth(entry.width, entry.address);
+        entry.value = MemoryReadWidth(memory, entry.width, entry.address);
    }
 }

--- a/src/core/tools/freezer.h
+++ b/src/core/tools/freezer.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <atomic>
+#include <memory>
 #include <mutex>
 #include <optional>
 #include <vector>
@@ -15,6 +16,10 @@ class CoreTiming;
 struct EventType;
 } // namespace Core::Timing

+namespace Memory {
+class Memory;
+}
+
 namespace Tools {

 /**
@@ -33,7 +38,7 @@ public:
        u64 value;
    };

-    explicit Freezer(Core::Timing::CoreTiming& core_timing);
+    explicit Freezer(Core::Timing::CoreTiming& core_timing_, Memory::Memory& memory_);
    ~Freezer();

    // Enables or disables the entire memory freezer.
@@ -75,8 +80,9 @@ private:
    mutable std::mutex entries_mutex;
    std::vector<Entry> entries;

-    Core::Timing::EventType* event;
+    std::shared_ptr<Core::Timing::EventType> event;
    Core::Timing::CoreTiming& core_timing;
+    Memory::Memory& memory;
 };

 } // namespace Tools
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -8,7 +8,6 @@
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
-#include "core/memory_setup.h"
 #include "tests/core/arm/arm_test_common.h"

 namespace ArmTests {
@@ -16,8 +15,9 @@ namespace ArmTests {
 TestEnvironment::TestEnvironment(bool mutable_memory_)
    : mutable_memory(mutable_memory_),
      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
-    auto process = Kernel::Process::Create(Core::System::GetInstance(), "",
-                                           Kernel::Process::ProcessType::Userland);
+    auto& system = Core::System::GetInstance();
+
+    auto process = Kernel::Process::Create(system, "", Kernel::Process::ProcessType::Userland);
    page_table = &process->VMManager().page_table;

    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
@@ -25,15 +25,16 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
    std::fill(page_table->attributes.begin(), page_table->attributes.end(),
              Common::PageType::Unmapped);

-    Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
-    Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
+    system.Memory().MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
+    system.Memory().MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);

    kernel.MakeCurrentProcess(process.get());
 }

 TestEnvironment::~TestEnvironment() {
-    Memory::UnmapRegion(*page_table, 0x80000000, 0x80000000);
-    Memory::UnmapRegion(*page_table, 0x00000000, 0x80000000);
+    auto& system = Core::System::GetInstance();
+    system.Memory().UnmapRegion(*page_table, 0x80000000, 0x80000000);
+    system.Memory().UnmapRegion(*page_table, 0x00000000, 0x80000000);
 }

 void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) {
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -7,7 +7,9 @@
 #include <array>
 #include <bitset>
 #include <cstdlib>
+#include <memory>
 #include <string>
+
 #include "common/file_util.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@@ -65,11 +67,16 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
    ScopeInit guard;
    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
-    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
-    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
-    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
+    std::shared_ptr<Core::Timing::EventType> cb_a =
+        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
+    std::shared_ptr<Core::Timing::EventType> cb_b =
+        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
+    std::shared_ptr<Core::Timing::EventType> cb_c =
+        Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>);
+    std::shared_ptr<Core::Timing::EventType> cb_d =
+        Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>);
+    std::shared_ptr<Core::Timing::EventType> cb_e =
+        Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>);

    // Enter slice 0
    core_timing.ResetRun();
@@ -99,8 +106,8 @@ TEST_CASE("CoreTiming[FairSharing]", "[core]") {
    ScopeInit guard;
    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* empty_callback =
-        core_timing.RegisterEvent("empty_callback", EmptyCallback);
+    std::shared_ptr<Core::Timing::EventType> empty_callback =
+        Core::Timing::CreateEvent("empty_callback", EmptyCallback);

    callbacks_done = 0;
    u64 MAX_CALLBACKS = 10;
@@ -133,8 +140,10 @@ TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
    ScopeInit guard;
    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    std::shared_ptr<Core::Timing::EventType> cb_a =
+        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
+    std::shared_ptr<Core::Timing::EventType> cb_b =
+        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);

    // Enter slice 0
    core_timing.ResetRun();
@@ -145,60 +154,3 @@ TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
    AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10)
    AdvanceAndCheck(core_timing, 1, 1, 50, -50);
 }
-
-namespace ChainSchedulingTest {
-static int reschedules = 0;
-
-static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
-                               s64 cycles_late) {
-    --reschedules;
-    REQUIRE(reschedules >= 0);
-    REQUIRE(lateness == cycles_late);
-
-    if (reschedules > 0) {
-        core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
-                                  userdata);
-    }
-}
-} // namespace ChainSchedulingTest
-
-TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
-    using namespace ChainSchedulingTest;
-
-    ScopeInit guard;
-    auto& core_timing = guard.core_timing;
-
-    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
-    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
-    Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
-        "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
-            RescheduleCallback(core_timing, userdata, cycles_late);
-        });
-
-    // Enter slice 0
-    core_timing.ResetRun();
-
-    core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
-    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
-    core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
-    REQUIRE(800 == core_timing.GetDowncount());
-
-    reschedules = 3;
-    AdvanceAndCheck(core_timing, 0, 0); // cb_a
-    AdvanceAndCheck(core_timing, 1, 1); // cb_b, cb_rs
-    REQUIRE(2 == reschedules);
-
-    core_timing.AddTicks(core_timing.GetDowncount());
-    core_timing.Advance(); // cb_rs
-    core_timing.SwitchContext(3);
-    REQUIRE(1 == reschedules);
-    REQUIRE(200 == core_timing.GetDowncount());
-
-    AdvanceAndCheck(core_timing, 2, 3); // cb_c
-
-    core_timing.AddTicks(core_timing.GetDowncount());
-    core_timing.Advance(); // cb_rs
-    REQUIRE(0 == reschedules);
-}
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -310,6 +310,11 @@ public:
            }
        };

+        enum class DepthMode : u32 {
+            MinusOneToOne = 0,
+            ZeroToOne = 1,
+        };
+
        enum class PrimitiveTopology : u32 {
            Points = 0x0,
            Lines = 0x1,
@@ -491,6 +496,18 @@ public:
            INSERT_UNION_PADDING_WORDS(1);
        };

+        enum class TessellationPrimitive : u32 {
+            Isolines = 0,
+            Triangles = 1,
+            Quads = 2,
+        };
+
+        enum class TessellationSpacing : u32 {
+            Equal = 0,
+            FractionalOdd = 1,
+            FractionalEven = 2,
+        };
+
        struct RenderTargetConfig {
            u32 address_high;
            u32 address_low;
@@ -628,7 +645,19 @@ public:
                    };
                } sync_info;

-                INSERT_UNION_PADDING_WORDS(0x11E);
+                INSERT_UNION_PADDING_WORDS(0x15);
+
+                union {
+                    BitField<0, 2, TessellationPrimitive> prim;
+                    BitField<4, 2, TessellationSpacing> spacing;
+                    BitField<8, 1, u32> cw;
+                    BitField<9, 1, u32> connected;
+                } tess_mode;
+
+                std::array<f32, 4> tess_level_outer;
+                std::array<f32, 2> tess_level_inner;
+
+                INSERT_UNION_PADDING_WORDS(0x102);

                u32 tfb_enabled;

@@ -647,7 +676,7 @@ public:
                    u32 count;
                } vertex_buffer;

-                INSERT_UNION_PADDING_WORDS(1);
+                DepthMode depth_mode;

                float clear_color[4];
                float clear_depth;
@@ -662,7 +691,9 @@ public:
                u32 polygon_offset_line_enable;
                u32 polygon_offset_fill_enable;

-                INSERT_UNION_PADDING_WORDS(0xD);
+                u32 patch_vertices;
+
+                INSERT_UNION_PADDING_WORDS(0xC);

                std::array<ScissorTest, NumViewports> scissor_test;

@@ -1386,17 +1417,22 @@ ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec_upload, 0x6C);
 ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
+ASSERT_REG_POSITION(tess_mode, 0xC8);
+ASSERT_REG_POSITION(tess_level_outer, 0xC9);
+ASSERT_REG_POSITION(tess_level_inner, 0xCD);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform, 0x280);
 ASSERT_REG_POSITION(viewports, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
+ASSERT_REG_POSITION(depth_mode, 0x35F);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
 ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
 ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
 ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
+ASSERT_REG_POSITION(patch_vertices, 0x373);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,10 +98,11 @@ union Attribute {
        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
+        BitField<31, 1, u64> patch;
        BitField<47, 3, AttributeSize> size;

        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
+            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
        }
    } fmt20;

@@ -383,6 +384,15 @@ enum class IsberdMode : u64 {

 enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };

+enum class MembarType : u64 {
+    CTA = 0,
+    GL = 1,
+    SYS = 2,
+    VC = 3,
+};
+
+enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
+
 enum class HalfType : u64 {
    H0_H1 = 0,
    F32 = 1,
@@ -799,6 +809,12 @@ union Instruction {
        BitField<40, 1, u64> invert;
    } popc;

+    union {
+        BitField<41, 1, u64> sh;
+        BitField<40, 1, u64> invert;
+        BitField<48, 1, u64> is_signed;
+    } flo;
+
    union {
        BitField<39, 3, u64> pred;
        BitField<42, 1, u64> neg_pred;
@@ -1276,6 +1292,7 @@ union Instruction {
        BitField<50, 1, u64> dc_flag;
        BitField<51, 1, u64> aoffi_flag;
        BitField<52, 2, u64> component;
+        BitField<55, 1, u64> fp16_flag;

        bool UsesMiscMode(TextureMiscMode mode) const {
            switch (mode) {
@@ -1439,6 +1456,26 @@ union Instruction {
        }
    } tlds;

+    union {
+        BitField<28, 1, u64> is_array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<35, 1, u64> aoffi_flag;
+        BitField<49, 1, u64> nodep_flag;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::AOFFI:
+                return aoffi_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            default:
+                break;
+            }
+            return false;
+        }
+
+    } txd;
+
    union {
        BitField<24, 2, StoreCacheManagement> cache_management;
        BitField<33, 3, ImageType> image_type;
@@ -1518,6 +1555,11 @@ union Instruction {
        BitField<47, 2, IsberdShift> shift;
    } isberd;

+    union {
+        BitField<8, 2, MembarType> type;
+        BitField<0, 2, MembarUnknown> unknown;
+    } membar;
+
    union {
        BitField<48, 1, u64> signed_a;
        BitField<38, 1, u64> is_byte_chunk_a;
@@ -1632,6 +1674,8 @@ public:
        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
        TMML_B, // Texture Mip Map Level
        TMML,   // Texture Mip Map Level
+        TXD,    // Texture Gradient/Load with Derivates
+        TXD_B,  // Texture Gradient/Load with Derivates Bindless
        SUST,   // Surface Store
        SULD,   // Surface Load
        SUATOM, // Surface Atomic Operation
@@ -1640,6 +1684,7 @@ public:
        IPA,
        OUT_R, // Emit vertex/primitive
        ISBERD,
+        MEMBAR,
        VMAD,
        VSETP,
        FFMA_IMM, // Fused Multiply and Add
@@ -1664,6 +1709,9 @@ public:
        ISCADD_C, // Scale and Add
        ISCADD_R,
        ISCADD_IMM,
+        FLO_R,
+        FLO_C,
+        FLO_IMM,
        LEA_R1,
        LEA_R2,
        LEA_RZ,
@@ -1727,6 +1775,10 @@ public:
        SHR_C,
        SHR_R,
        SHR_IMM,
+        SHF_RIGHT_R,
+        SHF_RIGHT_IMM,
+        SHF_LEFT_R,
+        SHF_LEFT_IMM,
        FMNMX_C,
        FMNMX_R,
        FMNMX_IMM,
@@ -1894,7 +1946,7 @@ private:
            INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
            INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
            INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
-            INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
+            INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
            INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
            INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
            INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
@@ -1921,9 +1973,11 @@ private:
            INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
-            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
+            INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
+            INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
            INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
            INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
            INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
@@ -1931,6 +1985,7 @@ private:
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
            INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
            INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -1965,6 +2020,9 @@ private:
            INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
            INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
            INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
+            INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
+            INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
+            INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
@@ -2022,6 +2080,10 @@ private:
            INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
            INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
            INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
+            INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
+            INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
+            INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
+            INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
            INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
            INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
            INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -71,8 +71,7 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
 }

 void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
-                                               : std::optional<const Tegra::FramebufferConfig>{}));
+    PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 }

 void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -52,7 +52,7 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
    const u64 aligned_size{Common::AlignUp(size, page_size)};
    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};

-    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
+    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
    ASSERT(system.CurrentProcess()
               ->VMManager()
               .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
@@ -67,7 +67,7 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)

    const u64 aligned_size{Common::AlignUp(size, page_size)};

-    MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
+    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
    ASSERT(system.CurrentProcess()
               ->VMManager()
               .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -5,6 +5,7 @@
 #include <mutex>

 #include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>

 #include "common/assert.h"
 #include "common/common_types.h"
@@ -22,7 +23,8 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) {

 } // Anonymous namespace

-RasterizerAccelerated::RasterizerAccelerated() = default;
+RasterizerAccelerated::RasterizerAccelerated(Memory::Memory& cpu_memory_)
+    : cpu_memory{cpu_memory_} {}

 RasterizerAccelerated::~RasterizerAccelerated() = default;

@@ -47,9 +49,9 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del
        const u64 interval_size = interval_end_addr - interval_start_addr;

        if (delta > 0 && count == delta) {
-            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+            cpu_memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
        } else if (delta < 0 && count == -delta) {
-            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+            cpu_memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
        } else {
            ASSERT(count >= 0);
        }
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -11,12 +11,16 @@
 #include "common/common_types.h"
 #include "video_core/rasterizer_interface.h"

+namespace Memory {
+class Memory;
+}
+
 namespace VideoCore {

 /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
 class RasterizerAccelerated : public RasterizerInterface {
 public:
-    explicit RasterizerAccelerated();
+    explicit RasterizerAccelerated(Memory::Memory& cpu_memory_);
    ~RasterizerAccelerated() override;

    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
@@ -24,8 +28,9 @@ public:
 private:
    using CachedPageMap = boost::icl::interval_map<u64, int>;
    CachedPageMap cached_pages;
-
    std::mutex pages_mutex;
+
+    Memory::Memory& cpu_memory;
 };

 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <array>
 #include <cstddef>
+#include <cstring>
 #include <optional>
 #include <vector>

@@ -134,10 +135,13 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin

 Device::Device() : base_bindings{BuildBaseBindings()} {
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
+    const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
    const std::vector extensions = GetExtensions();

    const bool is_nvidia = vendor == "NVIDIA Corporation";
+    const bool is_amd = vendor == "ATI Technologies Inc.";
    const bool is_intel = vendor == "Intel";
+    const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;

    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -149,9 +153,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
    has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
    has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
    has_variable_aoffi = TestVariableAoffi();
-    has_component_indexing_bug = TestComponentIndexingBug();
+    has_component_indexing_bug = is_amd;
    has_precise_bug = TestPreciseBug();
-    has_broken_compute = is_intel;
+    has_broken_compute = is_intel_proprietary;
    has_fast_buffer_sub_data = is_nvidia;

    LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
@@ -184,52 +188,6 @@ void main() {
 })");
 }

-bool Device::TestComponentIndexingBug() {
-    const GLchar* COMPONENT_TEST = R"(#version 430 core
-layout (std430, binding = 0) buffer OutputBuffer {
-    uint output_value;
-};
-layout (std140, binding = 0) uniform InputBuffer {
-    uvec4 input_value[4096];
-};
-layout (location = 0) uniform uint idx;
-void main() {
-    output_value = input_value[idx >> 2][idx & 3];
-})";
-    const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
-    SCOPE_EXIT({ glDeleteProgram(shader); });
-    glUseProgram(shader);
-
-    OGLVertexArray vao;
-    vao.Create();
-    glBindVertexArray(vao.handle);
-
-    constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
-    OGLBuffer ubo;
-    ubo.Create();
-    glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
-    glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
-
-    OGLBuffer ssbo;
-    ssbo.Create();
-    glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
-
-    for (GLuint index = 4; index < 8; ++index) {
-        glInvalidateBufferData(ssbo.handle);
-        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
-
-        glProgramUniform1ui(shader, 0, index);
-        glDrawArrays(GL_POINTS, 0, 1);
-
-        GLuint result;
-        glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
-        if (result != values.at(index)) {
-            return true;
-        }
-    }
-    return false;
-}
-
 bool Device::TestPreciseBug() {
    return !TestProgram(R"(#version 430 core
 in vec3 coords;
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -86,7 +86,6 @@ public:

 private:
    static bool TestVariableAoffi();
-    static bool TestComponentIndexingBug();
    static bool TestPreciseBug();

    std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -3,9 +3,12 @@
 // Refer to the license.txt file included.

 #include <tuple>
+#include <unordered_map>
+#include <utility>

-#include "common/cityhash.h"
-#include "common/scope_exit.h"
+#include <glad/glad.h>
+
+#include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
@@ -13,6 +16,7 @@
 namespace OpenGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using VideoCore::Surface::SurfaceType;

 FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;

@@ -35,36 +39,49 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
    local_state.draw.draw_framebuffer = framebuffer.handle;
    local_state.ApplyFramebufferState();

-    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
-        if (key.colors[index]) {
-            key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
-                                      GL_DRAW_FRAMEBUFFER);
-        }
-    }
-    if (key.colors_count) {
-        glDrawBuffers(key.colors_count, key.color_attachments.data());
-    } else {
-        glDrawBuffer(GL_NONE);
+    if (key.zeta) {
+        const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
+        const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+        key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
    }

-    if (key.zeta) {
-        key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
-                         GL_DRAW_FRAMEBUFFER);
+    std::size_t num_buffers = 0;
+    std::array<GLenum, Maxwell::NumRenderTargets> targets;
+
+    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        if (!key.colors[index]) {
+            targets[index] = GL_NONE;
+            continue;
+        }
+        const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
+        key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
+
+        const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
+        targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
+        num_buffers = index + 1;
+    }
+
+    if (num_buffers > 0) {
+        glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
+    } else {
+        glDrawBuffer(GL_NONE);
    }

    return framebuffer;
 }

-std::size_t FramebufferCacheKey::Hash() const {
-    static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
-    return static_cast<std::size_t>(
-        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+std::size_t FramebufferCacheKey::Hash() const noexcept {
+    std::size_t hash = std::hash<View>{}(zeta);
+    for (const auto& color : colors) {
+        hash ^= std::hash<View>{}(color);
+    }
+    hash ^= static_cast<std::size_t>(color_attachments) << 16;
+    return hash;
 }

-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
-    return std::tie(stencil_enable, colors_count, color_attachments, colors, zeta) ==
-           std::tie(rhs.stencil_enable, rhs.colors_count, rhs.color_attachments, rhs.colors,
-                    rhs.zeta);
+bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
+    return std::tie(colors, zeta, color_attachments) ==
+           std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -18,21 +18,24 @@

 namespace OpenGL {

-struct alignas(sizeof(u64)) FramebufferCacheKey {
-    bool stencil_enable = false;
-    u16 colors_count = 0;
+constexpr std::size_t BitsPerAttachment = 4;

-    std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
-    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+struct FramebufferCacheKey {
    View zeta;
+    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+    u32 color_attachments = 0;

-    std::size_t Hash() const;
+    std::size_t Hash() const noexcept;

-    bool operator==(const FramebufferCacheKey& rhs) const;
+    bool operator==(const FramebufferCacheKey& rhs) const noexcept;

-    bool operator!=(const FramebufferCacheKey& rhs) const {
+    bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
        return !operator==(rhs);
    }
+
+    void SetAttachment(std::size_t index, u32 attachment) {
+        color_attachments |= attachment << (BitsPerAttachment * index);
+    }
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -19,6 +19,7 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
+#include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -86,12 +87,12 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,

 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
                                   ScreenInfo& info)
-    : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
-      system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
+      shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
+      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
    shader_program_manager = std::make_unique<GLShader::ProgramManager>();
    state.draw.shader_program = 0;
    state.Apply();
-    clear_framebuffer.Create();

    LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
    CheckExtensions();
@@ -276,6 +277,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            continue;
        }

+        // Currently this stages are not supported in the OpenGL backend.
+        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
+        if (program == Maxwell::ShaderProgram::TesselationControl) {
+            continue;
+        } else if (program == Maxwell::ShaderProgram::TesselationEval) {
+            continue;
+        }
+
        Shader shader{shader_cache.GetStageProgram(program)};

        // Stage indices are 0 - 5
@@ -371,78 +380,58 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
    UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);

    // Bind the framebuffer surfaces
-    FramebufferCacheKey fbkey;
-    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+    FramebufferCacheKey key;
+    const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
+    for (std::size_t index = 0; index < colors_count; ++index) {
        View color_surface{texture_cache.GetColorBufferSurface(index, true)};
-
-        if (color_surface) {
-            // Assume that a surface will be written to if it is used as a framebuffer, even
-            // if the shader doesn't actually write to it.
-            texture_cache.MarkColorBufferInUse(index);
+        if (!color_surface) {
+            continue;
        }
+        // Assume that a surface will be written to if it is used as a framebuffer, even
+        // if the shader doesn't actually write to it.
+        texture_cache.MarkColorBufferInUse(index);

-        fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
-        fbkey.colors[index] = std::move(color_surface);
+        key.SetAttachment(index, regs.rt_control.GetMap(index));
+        key.colors[index] = std::move(color_surface);
    }
-    fbkey.colors_count = static_cast<u16>(regs.rt_control.count);

    if (depth_surface) {
        // Assume that a surface will be written to if it is used as a framebuffer, even if
        // the shader doesn't actually write to it.
        texture_cache.MarkDepthBufferInUse();
-
-        fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
-        fbkey.zeta = std::move(depth_surface);
+        key.zeta = std::move(depth_surface);
    }

    texture_cache.GuardRenderTargets(false);

-    state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
+    state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
    SyncViewport(state);
 }

 void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
                                                 bool using_depth_fb, bool using_stencil_fb) {
+    using VideoCore::Surface::SurfaceType;
+
    auto& gpu = system.GPU().Maxwell3D();
    const auto& regs = gpu.regs;

    texture_cache.GuardRenderTargets(true);
-    View color_surface{};
+    View color_surface;
    if (using_color_fb) {
        color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
    }
-    View depth_surface{};
+    View depth_surface;
    if (using_depth_fb || using_stencil_fb) {
        depth_surface = texture_cache.GetDepthBufferSurface(false);
    }
    texture_cache.GuardRenderTargets(false);

-    current_state.draw.draw_framebuffer = clear_framebuffer.handle;
+    FramebufferCacheKey key;
+    key.colors[0] = color_surface;
+    key.zeta = depth_surface;
+
+    current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
    current_state.ApplyFramebufferState();
-
-    if (color_surface) {
-        color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
-    } else {
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-    }
-
-    if (depth_surface) {
-        const auto& params = depth_surface->GetSurfaceParams();
-        switch (params.type) {
-        case VideoCore::Surface::SurfaceType::Depth:
-            depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-            break;
-        case VideoCore::Surface::SurfaceType::DepthStencil:
-            depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
-            break;
-        default:
-            UNIMPLEMENTED();
-        }
-    } else {
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-    }
 }

 void RasterizerOpenGL::Clear() {
@@ -837,7 +826,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);

    const auto surface{
-        texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
+        texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
    if (!surface) {
        return {};
    }
@@ -1047,6 +1036,10 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        flip_y = !flip_y;
    }
    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+    state.clip_control.depth_mode =
+        regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
+            ? GL_ZERO_TO_ONE
+            : GL_NEGATIVE_ONE_TO_ONE;
 }

 void RasterizerOpenGL::SyncClipEnabled(
--- a/Show More
+++ b/Show More