Revert "video_core: memory_manager: Use GPU interface for cache functions."

Merge pull request #3401 from FernandoS27/synchronization
Set of refactors for Kernel Synchronization and Hardware Constants
2020-02-15 17:47:15 -05:00 · 2020-02-14 14:40:20 -05:00 · 2020-02-14 13:22:53 -05:00 · 2020-02-14 09:11:47 -05:00 · 2020-02-14 05:53:30 -04:00 · 2020-02-13 21:26:13 -05:00
130 changed files with 2426 additions and 1119 deletions
--- a/.ci/scripts/linux/docker.sh
+++ b/.ci/scripts/linux/docker.sh
@@ -5,7 +5,7 @@ cd /yuzu
 ccache -s

 mkdir build || true && cd build
-cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
+cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_VULKAN=No

 ninja

--- a/.ci/scripts/windows/docker.sh
+++ b/.ci/scripts/windows/docker.sh
@@ -13,7 +13,7 @@ echo '' >> /bin/cmd
 chmod +x /bin/cmd

 mkdir build || true && cd build
-cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_VULKAN=No
 ninja

 # Clean up the dirty hacks
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -151,15 +151,16 @@ if (ENABLE_SDL2)
        set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers")
        set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library")
        set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll")
-    else()
-        find_package(SDL2 REQUIRED)
-    endif()

-    if (SDL2_FOUND)
-        # TODO(yuriks): Make FindSDL2.cmake export an IMPORTED library instead
        add_library(SDL2 INTERFACE)
        target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}")
        target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}")
+    else()
+        find_package(SDL2 REQUIRED)
+        include_directories(${SDL2_INCLUDE_DIRS})
+
+        add_library(SDL2 INTERFACE)
+        target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
    endif()
 else()
    set(SDL2_FOUND NO)
--- a/dist/qt_themes/colorful/style.qrc
+++ b/dist/qt_themes/colorful/style.qrc
@@ -10,6 +10,6 @@
        <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
    </qresource>
    <qresource prefix="colorful">
-        <file>style.qss</file>
+        <file alias="style.qss">../default/style.qss</file>
    </qresource>
 </RCC>
--- a/dist/qt_themes/colorful/style.qss
+++ b/dist/qt_themes/colorful/style.qss
@@ -1,4 +0,0 @@
-/*
-    This file is intentionally left blank.
-    We do not want to apply any stylesheet for colorful, only icons.
-*/
--- a/dist/qt_themes/default/default.qrc
+++ b/dist/qt_themes/default/default.qrc
@@ -1,25 +1,18 @@
 <RCC>
    <qresource prefix="icons/default">
        <file alias="index.theme">icons/index.theme</file>
-      
        <file alias="16x16/checked.png">icons/16x16/checked.png</file>
-
        <file alias="16x16/failed.png">icons/16x16/failed.png</file>
-
        <file alias="16x16/lock.png">icons/16x16/lock.png</file>
-
        <file alias="48x48/bad_folder.png">icons/48x48/bad_folder.png</file>
-      
        <file alias="48x48/chip.png">icons/48x48/chip.png</file>
-
        <file alias="48x48/folder.png">icons/48x48/folder.png</file>
-
        <file alias="48x48/plus.png">icons/48x48/plus.png</file>
-      
        <file alias="48x48/sd_card.png">icons/48x48/sd_card.png</file>
-      
        <file alias="256x256/yuzu.png">icons/256x256/yuzu.png</file>
-
        <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
    </qresource>
+    <qresource prefix="default">
+        <file>style.qss</file>
+    </qresource>
 </RCC>
--- a/dist/qt_themes/default/style.qss
+++ b/dist/qt_themes/default/style.qss
@@ -0,0 +1,35 @@
+QPushButton#TogglableStatusBarButton {
+    color: #959595;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#TogglableStatusBarButton:checked {
+    color: #000000;
+}
+
+QPushButton#TogglableStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton {
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#RendererStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton:checked {
+    color: #e85c00;
+}
+
+QPushButton#RendererStatusBarButton:!checked{
+    color: #0066ff;
+}
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -1236,3 +1236,41 @@ QToolButton:disabled,
 QPlainTextEdit:disabled {
    background-color: #2b2e31;
 }
+
+QPushButton#TogglableStatusBarButton {
+    min-width: 0px;
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#TogglableStatusBarButton:checked {
+    color: #ffffff;
+}
+
+QPushButton#TogglableStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton {
+    min-width: 0px;
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#RendererStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton:checked {
+    color: #e85c00;
+}
+
+QPushButton#RendererStatusBarButton:!checked{
+   color: #00ccdd;
+}
--- a/externals/cmake-modules/FindSDL2.cmake
+++ b/externals/cmake-modules/FindSDL2.cmake
@@ -1,239 +0,0 @@
-
-# This module defines
-# SDL2_LIBRARY, the name of the library to link against
-# SDL2_FOUND, if false, do not try to link to SDL2
-# SDL2_INCLUDE_DIR, where to find SDL.h
-# SDL2_DLL_DIR, where to find SDL2.dll if it exists
-#
-# This module responds to the the flag:
-# SDL2_BUILDING_LIBRARY
-# If this is defined, then no SDL2main will be linked in because
-# only applications need main().
-# Otherwise, it is assumed you are building an application and this
-# module will attempt to locate and set the the proper link flags
-# as part of the returned SDL2_LIBRARY variable.
-#
-# Don't forget to include SDLmain.h and SDLmain.m your project for the
-# OS X framework based version. (Other versions link to -lSDL2main which
-# this module will try to find on your behalf.) Also for OS X, this
-# module will automatically add the -framework Cocoa on your behalf.
-#
-#
-# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your configuration
-# and no SDL2_LIBRARY, it means CMake did not find your SDL2 library
-# (SDL2.dll, libsdl2.so, SDL2.framework, etc).
-# Set SDL2_LIBRARY_TEMP to point to your SDL2 library, and configure again.
-# Similarly, if you see an empty SDL2MAIN_LIBRARY, you should set this value
-# as appropriate. These values are used to generate the final SDL2_LIBRARY
-# variable, but when these values are unset, SDL2_LIBRARY does not get created.
-#
-#
-# $SDL2DIR is an environment variable that would
-# correspond to the ./configure --prefix=$SDL2DIR
-# used in building SDL2.
-# l.e.galup  9-20-02
-#
-# Modified by Eric Wing.
-# Added code to assist with automated building by using environmental variables
-# and providing a more controlled/consistent search behavior.
-# Added new modifications to recognize OS X frameworks and
-# additional Unix paths (FreeBSD, etc).
-# Also corrected the header search path to follow "proper" SDL guidelines.
-# Added a search for SDL2main which is needed by some platforms.
-# Added a search for threads which is needed by some platforms.
-# Added needed compile switches for MinGW.
-#
-# On OSX, this will prefer the Framework version (if found) over others.
-# People will have to manually change the cache values of
-# SDL2_LIBRARY to override this selection or set the CMake environment
-# CMAKE_INCLUDE_PATH to modify the search paths.
-#
-# Note that the header path has changed from SDL2/SDL.h to just SDL.h
-# This needed to change because "proper" SDL convention
-# is #include "SDL.h", not <SDL2/SDL.h>. This is done for portability
-# reasons because not all systems place things in SDL2/ (see FreeBSD).
-
-#=============================================================================
-# Copyright 2003-2009 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License").
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# CMake - Cross Platform Makefile Generator
-# Copyright 2000-2016 Kitware, Inc.
-# Copyright 2000-2011 Insight Software Consortium
-# All rights reserved.
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# 
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# 
-# * Neither the names of Kitware, Inc., the Insight Software Consortium,
-#   nor the names of their contributors may be used to endorse or promote
-#   products derived from this software without specific prior written
-#   permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# 
-# ------------------------------------------------------------------------------
-# 
-# The above copyright and license notice applies to distributions of
-# CMake in source and binary form.  Some source files contain additional
-# notices of original copyright by their contributors; see each source
-# for details.  Third-party software packages supplied with CMake under
-# compatible licenses provide their own copyright notices documented in
-# corresponding subdirectories.
-# 
-# ------------------------------------------------------------------------------
-# 
-# CMake was initially developed by Kitware with the following sponsorship:
-# 
-#  * National Library of Medicine at the National Institutes of Health
-#    as part of the Insight Segmentation and Registration Toolkit (ITK).
-# 
-#  * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
-#    Visualization Initiative.
-# 
-#  * National Alliance for Medical Image Computing (NAMIC) is funded by the
-#    National Institutes of Health through the NIH Roadmap for Medical Research,
-#    Grant U54 EB005149.
-# 
-#  * Kitware, Inc.
-#
-
-message("<FindSDL2.cmake>")
-
-SET(SDL2_SEARCH_PATHS
-    ~/Library/Frameworks
-    /Library/Frameworks
-    /usr/local
-    /usr
-    /sw # Fink
-    /opt/local # DarwinPorts
-    /opt/csw # Blastwave
-    /opt
-    ${SDL2_PATH}
-)
-
-if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(VC_LIB_PATH_SUFFIX lib/x64)
-else()
-    set(VC_LIB_PATH_SUFFIX lib/x86)
-endif()
-
-FIND_LIBRARY(SDL2_LIBRARY_TEMP
-    NAMES SDL2
-    HINTS
-    $ENV{SDL2DIR}
-    PATH_SUFFIXES lib64 lib ${VC_LIB_PATH_SUFFIX}
-    PATHS ${SDL2_SEARCH_PATHS}
-)
-
-IF(SDL2_LIBRARY_TEMP)
-    if(MSVC)
-        get_filename_component(SDL2_DLL_DIR_TEMP ${SDL2_LIBRARY_TEMP} DIRECTORY)
-        if(EXISTS ${SDL2_DLL_DIR_TEMP}/SDL2.dll)
-            set(SDL2_DLL_DIR ${SDL2_DLL_DIR_TEMP})
-            unset(SDL2_DLL_DIR_TEMP)
-        endif()
-    endif()
-
-    FIND_PATH(SDL2_INCLUDE_DIR SDL.h
-        HINTS
-        $ENV{SDL2DIR}
-        PATH_SUFFIXES include/SDL2 include
-        PATHS ${SDL2_SEARCH_PATHS}
-    )
-
-    IF(NOT SDL2_BUILDING_LIBRARY)
-        IF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
-            # Non-OS X framework versions expect you to also dynamically link to
-            # SDL2main. This is mainly for Windows and OS X. Other (Unix) platforms
-            # seem to provide SDL2main for compatibility even though they don't
-            # necessarily need it.
-            FIND_LIBRARY(SDL2MAIN_LIBRARY
-                NAMES SDL2main
-                HINTS
-                $ENV{SDL2DIR}
-                PATH_SUFFIXES lib64 lib
-                PATHS ${SDL2_SEARCH_PATHS}
-            )
-        ENDIF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
-    ENDIF(NOT SDL2_BUILDING_LIBRARY)
-
-    # SDL2 may require threads on your system.
-    # The Apple build may not need an explicit flag because one of the
-    # frameworks may already provide it.
-    # But for non-OSX systems, I will use the CMake Threads package.
-    IF(NOT APPLE)
-        FIND_PACKAGE(Threads)
-    ENDIF(NOT APPLE)
-
-    # MinGW needs an additional library, mwindows
-    # It's total link flags should look like -lmingw32 -lSDL2main -lSDL2 -lmwindows
-    # (Actually on second look, I think it only needs one of the m* libraries.)
-    IF(MINGW)
-        SET(MINGW32_LIBRARY mingw32 CACHE STRING "mwindows for MinGW")
-    ENDIF(MINGW)
-
-    # For SDL2main
-    IF(NOT SDL2_BUILDING_LIBRARY)
-        IF(SDL2MAIN_LIBRARY)
-            SET(SDL2_LIBRARY_TEMP ${SDL2MAIN_LIBRARY} ${SDL2_LIBRARY_TEMP})
-        ENDIF(SDL2MAIN_LIBRARY)
-    ENDIF(NOT SDL2_BUILDING_LIBRARY)
-
-    # For OS X, SDL2 uses Cocoa as a backend so it must link to Cocoa.
-    # CMake doesn't display the -framework Cocoa string in the UI even
-    # though it actually is there if I modify a pre-used variable.
-    # I think it has something to do with the CACHE STRING.
-    # So I use a temporary variable until the end so I can set the
-    # "real" variable in one-shot.
-    IF(APPLE)
-        SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa")
-    ENDIF(APPLE)
-
-    # For threads, as mentioned Apple doesn't need this.
-    # In fact, there seems to be a problem if I used the Threads package
-    # and try using this line, so I'm just skipping it entirely for OS X.
-    IF(NOT APPLE)
-        SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT})
-    ENDIF(NOT APPLE)
-
-    # For MinGW library
-    IF(MINGW)
-        SET(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP})
-    ENDIF(MINGW)
-
-    # Set the final string here so the GUI reflects the final state.
-    SET(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL2 Library can be found")
-
-    # Unset the temp variable to INTERNAL so it is not seen in the CMake GUI
-    UNSET(SDL2_LIBRARY_TEMP)
-ENDIF(SDL2_LIBRARY_TEMP)
-
-message("</FindSDL2.cmake>")
-
-INCLUDE(FindPackageHandleStandardArgs)
-
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL2 REQUIRED_VARS SDL2_LIBRARY SDL2_INCLUDE_DIR)
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -181,14 +181,16 @@ add_library(core STATIC
    hle/kernel/svc.cpp
    hle/kernel/svc.h
    hle/kernel/svc_wrap.h
+    hle/kernel/synchronization_object.cpp
+    hle/kernel/synchronization_object.h
+    hle/kernel/synchronization.cpp
+    hle/kernel/synchronization.h
    hle/kernel/thread.cpp
    hle/kernel/thread.h
    hle/kernel/transfer_memory.cpp
    hle/kernel/transfer_memory.h
    hle/kernel/vm_manager.cpp
    hle/kernel/vm_manager.h
-    hle/kernel/wait_object.cpp
-    hle/kernel/wait_object.h
    hle/kernel/writable_event.cpp
    hle/kernel/writable_event.h
    hle/lock.cpp
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -14,6 +14,7 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/svc.h"
@@ -153,7 +154,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = Timing::CNTFREQ;
+    config.cntfrq_el0 = Hardware::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -268,7 +268,9 @@ struct System::Impl {
        is_powered_on = false;
        exit_lock = false;

-        gpu_core->WaitIdle();
+        if (gpu_core) {
+            gpu_core->WaitIdle();
+        }

        // Shutdown emulation session
        renderer.reset();
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -12,6 +12,7 @@
 #include "common/assert.h"
 #include "common/thread.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"

 namespace Core::Timing {

@@ -215,7 +216,7 @@ void CoreTiming::Idle() {
 }

 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
+    return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE};
 }

 s64 CoreTiming::GetDowncount() const {
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -11,7 +11,7 @@

 namespace Core::Timing {

-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
+constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;

 s64 msToCycles(std::chrono::milliseconds ms) {
    if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
@@ -20,9 +20,9 @@ s64 msToCycles(std::chrono::milliseconds ms) {
    }
    if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (ms.count() / 1000);
+        return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
    }
-    return (BASE_CLOCK_RATE * ms.count()) / 1000;
+    return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
 }

 s64 usToCycles(std::chrono::microseconds us) {
@@ -32,9 +32,9 @@ s64 usToCycles(std::chrono::microseconds us) {
    }
    if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (us.count() / 1000000);
+        return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
    }
-    return (BASE_CLOCK_RATE * us.count()) / 1000000;
+    return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
 }

 s64 nsToCycles(std::chrono::nanoseconds ns) {
@@ -44,14 +44,14 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
    }
    if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (ns.count() / 1000000000);
+        return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
    }
-    return (BASE_CLOCK_RATE * ns.count()) / 1000000000;
+    return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }

 u64 CpuCyclesToClockCycles(u64 ticks) {
-    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
-    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+    const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
 }

 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,28 +6,24 @@

 #include <chrono>
 #include "common/common_types.h"
+#include "core/hardware_properties.h"

 namespace Core::Timing {

-// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
-// The exact value used is of course unverified.
-constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
-constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
-
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);

 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    return std::chrono::milliseconds(cycles * 1000 / BASE_CLOCK_RATE);
+    return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
 }

 inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    return std::chrono::nanoseconds(cycles * 1000000000 / BASE_CLOCK_RATE);
+    return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
 }

 inline std::chrono::microseconds CyclesToUs(s64 cycles) {
-    return std::chrono::microseconds(cycles * 1000000 / BASE_CLOCK_RATE);
+    return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
 }

 u64 CpuCyclesToClockCycles(u64 ticks);
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include "core/hardware_properties.h"

 namespace Core {

@@ -39,9 +40,7 @@ public:
    void RunLoop(bool tight_loop);

 private:
-    static constexpr std::size_t NUM_CPU_CORES = 4;
-
-    std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers;
+    std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers;
    std::size_t active_core{}; ///< Active core, only used in single thread mode

    System& system;
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -75,6 +75,13 @@ public:
        return nullptr;
    }

+    /// Returns if window is shown (not minimized)
+    virtual bool IsShown() const = 0;
+
+    /// Retrieves Vulkan specific handlers from the window
+    virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
+                                        void* surface) const = 0;
+
    /**
     * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
     * @param framebuffer_x Framebuffer x-coordinate that was pressed
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+
+namespace Core {
+
+namespace Hardware {
+
+// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
+// The exact value used is of course unverified.
+constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Switch's hardware clock speed
+constexpr u32 NUM_CPU_CORES = 4;            // Number of CPU Cores
+
+} // namespace Hardware
+
+struct EmuThreadHandle {
+    u32 host_handle;
+    u32 guest_handle;
+
+    u64 GetRaw() const {
+        return (static_cast<u64>(host_handle) << 32) | guest_handle;
+    }
+
+    bool operator==(const EmuThreadHandle& rhs) const {
+        return std::tie(host_handle, guest_handle) == std::tie(rhs.host_handle, rhs.guest_handle);
+    }
+
+    bool operator!=(const EmuThreadHandle& rhs) const {
+        return !operator==(rhs);
+    }
+
+    static constexpr EmuThreadHandle InvalidHandle() {
+        constexpr u32 invalid_handle = 0xFFFFFFFF;
+        return {invalid_handle, invalid_handle};
+    }
+};
+
+} // namespace Core
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
 void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
    const VAddr arb_addr = thread->GetArbiterWaitAddress();
    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread->GetPriority() >= thread->GetPriority()) {
-            thread_list.insert(it, thread);
-            return;
-        }
-        ++it;
+
+    const auto iter =
+        std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
+            return entry->GetPriority() >= thread->GetPriority();
+        });
+
+    if (iter == thread_list.cend()) {
+        thread_list.push_back(std::move(thread));
+    } else {
+        thread_list.insert(iter, std::move(thread));
    }
-    thread_list.push_back(std::move(thread));
 }

 void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
    const VAddr arb_addr = thread->GetArbiterWaitAddress();
    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread.get() == thread.get()) {
-            thread_list.erase(it);
-            return;
-        }
-        ++it;
-    }
-    UNREACHABLE();
+
+    const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
+                                   [&thread](const auto& entry) { return thread == entry; });
+
+    ASSERT(iter != thread_list.cend());
+
+    thread_list.erase(iter);
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
-    std::vector<std::shared_ptr<Thread>> result;
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        std::shared_ptr<Thread> current_thread = *it;
-        result.push_back(std::move(current_thread));
-        ++it;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
+    VAddr address) const {
+    const auto iter = arb_threads.find(address);
+    if (iter == arb_threads.cend()) {
+        return {};
    }
-    return result;
+
+    const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
+    return {thread_list.cbegin(), thread_list.cend()};
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -86,7 +86,7 @@ private:
    void RemoveThread(std::shared_ptr<Thread> thread);

    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;

    /// List of threads waiting for a address arbiter
    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -12,7 +12,7 @@

 namespace Kernel {

-ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {}

 ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
@@ -31,6 +31,11 @@ void ClientSession::Acquire(Thread* thread) {
    UNIMPLEMENTED();
 }

+bool ClientSession::IsSignaled() const {
+    UNIMPLEMENTED();
+    return true;
+}
+
 ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel,
                                                                std::shared_ptr<Session> parent,
                                                                std::string name) {
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -7,7 +7,7 @@
 #include <memory>
 #include <string>

-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 union ResultCode;
@@ -22,7 +22,7 @@ class KernelCore;
 class Session;
 class Thread;

-class ClientSession final : public WaitObject {
+class ClientSession final : public SynchronizationObject {
 public:
    explicit ClientSession(KernelCore& kernel);
    ~ClientSession() override;
@@ -48,6 +48,8 @@ public:

    void Acquire(Thread* thread) override;

+    bool IsSignaled() const override;
+
 private:
    static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel,
                                                            std::shared_ptr<Session> parent,
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -47,15 +47,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
    const std::string& reason, u64 timeout, WakeupCallback&& callback,
    std::shared_ptr<WritableEvent> writable_event) {
    // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->SetWakeupCallback([context = *this, callback](ThreadWakeupReason reason,
-                                                          std::shared_ptr<Thread> thread,
-                                                          std::shared_ptr<WaitObject> object,
-                                                          std::size_t index) mutable -> bool {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
-        callback(thread, context, reason);
-        context.WriteToOutgoingCommandBuffer(*thread);
-        return true;
-    });
+    thread->SetWakeupCallback(
+        [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                                    std::shared_ptr<SynchronizationObject> object,
+                                    std::size_t index) mutable -> bool {
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
+            callback(thread, context, reason);
+            context.WriteToOutgoingCommandBuffer(*thread);
+            return true;
+        });

    auto& kernel = Core::System::GetInstance().Kernel();
    if (!writable_event) {
@@ -67,7 +67,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
    const auto readable_event{writable_event->GetReadableEvent()};
    writable_event->Clear();
    thread->SetStatus(ThreadStatus::WaitHLEEvent);
-    thread->SetWaitObjects({readable_event});
+    thread->SetSynchronizationObjects({readable_event});
    readable_event->AddWaitingThread(thread);

    if (timeout > 0) {
@@ -284,13 +284,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {

 std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
    std::vector<u8> buffer;
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
    auto& memory = Core::System::GetInstance().Memory();

    if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
        buffer.resize(BufferDescriptorA()[buffer_index].Size());
        memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
    } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
        buffer.resize(BufferDescriptorX()[buffer_index].Size());
        memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
    }
@@ -305,7 +310,8 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
        return 0;
    }

-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
    const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
    if (size > buffer_size) {
        LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
@@ -315,8 +321,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,

    auto& memory = Core::System::GetInstance().Memory();
    if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size,
+                   "BufferDescriptorB buffer_index {} is not large enough", buffer_index);
        memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
    } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size,
+                   "BufferDescriptorC buffer_index {} is not large enough", buffer_index);
        memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
    }

@@ -324,15 +338,35 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
 }

 std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
-    return is_buffer_a ? BufferDescriptorA()[buffer_index].Size()
-                       : BufferDescriptorX()[buffer_index].Size();
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
+    if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
+                   "BufferDescriptorA buffer_index {} is empty", buffer_index);
+        return BufferDescriptorA()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
+                   "BufferDescriptorX buffer_index {} is empty", buffer_index);
+        return BufferDescriptorX()[buffer_index].Size();
+    }
 }

 std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
-    return is_buffer_b ? BufferDescriptorB()[buffer_index].Size()
-                       : BufferDescriptorC()[buffer_index].Size();
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
+    if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        return BufferDescriptorB()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        return BufferDescriptorC()[buffer_index].Size();
+    }
 }

 std::string HLERequestContext::Description() const {
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -23,6 +23,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -54,10 +55,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
    if (thread->GetStatus() == ThreadStatus::WaitSynch ||
        thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
        // Remove the thread from each of its waiting objects' waitlists
-        for (const auto& object : thread->GetWaitObjects()) {
+        for (const auto& object : thread->GetSynchronizationObjects()) {
            object->RemoveWaitingThread(thread);
        }
-        thread->ClearWaitObjects();
+        thread->ClearSynchronizationObjects();

        // Invoke the wakeup callback before clearing the wait objects
        if (thread->HasWakeupCallback()) {
@@ -96,7 +97,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
 }

 struct KernelCore::Impl {
-    explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}
+    explicit Impl(Core::System& system)
+        : system{system}, global_scheduler{system}, synchronization{system} {}

    void Initialize(KernelCore& kernel) {
        Shutdown();
@@ -191,6 +193,7 @@ struct KernelCore::Impl {
    std::vector<std::shared_ptr<Process>> process_list;
    Process* current_process = nullptr;
    Kernel::GlobalScheduler global_scheduler;
+    Kernel::Synchronization synchronization;

    std::shared_ptr<ResourceLimit> system_resource_limit;

@@ -270,6 +273,14 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
    return impl->cores[id];
 }

+Kernel::Synchronization& KernelCore::Synchronization() {
+    return impl->synchronization;
+}
+
+const Kernel::Synchronization& KernelCore::Synchronization() const {
+    return impl->synchronization;
+}
+
 Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
    return *impl->exclusive_monitor;
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -29,6 +29,7 @@ class HandleTable;
 class PhysicalCore;
 class Process;
 class ResourceLimit;
+class Synchronization;
 class Thread;

 /// Represents a single instance of the kernel.
@@ -92,6 +93,12 @@ public:
    /// Gets the an instance of the respective physical CPU core.
    const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;

+    /// Gets the an instance of the Synchronization Interface.
+    Kernel::Synchronization& Synchronization();
+
+    /// Gets the an instance of the Synchronization Interface.
+    const Kernel::Synchronization& Synchronization() const;
+
    /// Stops execution of 'id' core, in order to reschedule a new thread.
    void PrepareReschedule(std::size_t id);

--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -337,7 +337,7 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
 }

 Process::Process(Core::System& system)
-    : WaitObject{system.Kernel()}, vm_manager{system},
+    : SynchronizationObject{system.Kernel()}, vm_manager{system},
      address_arbiter{system}, mutex{system}, system{system} {}

 Process::~Process() = default;
@@ -357,7 +357,7 @@ void Process::ChangeStatus(ProcessStatus new_status) {

    status = new_status;
    is_signaled = true;
-    WakeupAllWaitingThreads();
+    Signal();
 }

 void Process::AllocateMainThreadStack(u64 stack_size) {
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,8 +15,8 @@
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/vm_manager.h"
-#include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"

 namespace Core {
@@ -60,7 +60,7 @@ enum class ProcessStatus {
    DebugBreak,
 };

-class Process final : public WaitObject {
+class Process final : public SynchronizationObject {
 public:
    explicit Process(Core::System& system);
    ~Process() override;
@@ -359,10 +359,6 @@ private:
    /// specified by metadata provided to the process during loading.
    bool is_64bit_process = true;

-    /// Whether or not this process is signaled. This occurs
-    /// upon the process changing to a different state.
-    bool is_signaled = false;
-
    /// Total running time for the process in ticks.
    u64 total_process_running_time_ticks = 0;

--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -11,30 +11,30 @@

 namespace Kernel {

-ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
+ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;

 bool ReadableEvent::ShouldWait(const Thread* thread) const {
-    return !signaled;
+    return !is_signaled;
 }

 void ReadableEvent::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    ASSERT_MSG(IsSignaled(), "object unavailable!");
 }

 void ReadableEvent::Signal() {
-    if (!signaled) {
-        signaled = true;
-        WakeupAllWaitingThreads();
+    if (!is_signaled) {
+        is_signaled = true;
+        SynchronizationObject::Signal();
    };
 }

 void ReadableEvent::Clear() {
-    signaled = false;
+    is_signaled = false;
 }

 ResultCode ReadableEvent::Reset() {
-    if (!signaled) {
+    if (!is_signaled) {
        return ERR_INVALID_STATE;
    }

--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -5,7 +5,7 @@
 #pragma once

 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"

 union ResultCode;

@@ -14,7 +14,7 @@ namespace Kernel {
 class KernelCore;
 class WritableEvent;

-class ReadableEvent final : public WaitObject {
+class ReadableEvent final : public SynchronizationObject {
    friend class WritableEvent;

 public:
@@ -46,13 +46,11 @@ public:
    ///      then ERR_INVALID_STATE will be returned.
    ResultCode Reset();

+    void Signal() override;
+
 private:
    explicit ReadableEvent(KernelCore& kernel);

-    void Signal();
-
-    bool signaled{};
-
    std::string name; ///< Name of event (optional)
 };

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -124,8 +124,8 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
               "Thread yielding without being in front");
    scheduled_queue[core_id].yield(priority);

-    std::array<Thread*, NUM_CPU_CORES> current_threads;
-    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+    for (std::size_t i = 0; i < current_threads.size(); i++) {
        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
    }

@@ -177,8 +177,8 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
    // function...
    if (scheduled_queue[core_id].empty()) {
        // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
-        std::array<Thread*, NUM_CPU_CORES> current_threads;
-        for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+        for (std::size_t i = 0; i < current_threads.size(); i++) {
            current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
        }
        for (auto& thread : suggested_queue[core_id]) {
@@ -208,7 +208,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 }

 void GlobalScheduler::PreemptThreads() {
-    for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
+    for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
        const u32 priority = preemption_priorities[core_id];

        if (scheduled_queue[core_id].size(priority) > 0) {
@@ -349,7 +349,7 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
 }

 void GlobalScheduler::Shutdown() {
-    for (std::size_t core = 0; core < NUM_CPU_CORES; core++) {
+    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        scheduled_queue[core].clear();
        suggested_queue[core].clear();
    }
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -10,6 +10,7 @@

 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/thread.h"

 namespace Core {
@@ -23,8 +24,6 @@ class Process;

 class GlobalScheduler final {
 public:
-    static constexpr u32 NUM_CPU_CORES = 4;
-
    explicit GlobalScheduler(Core::System& system);
    ~GlobalScheduler();

@@ -125,7 +124,7 @@ public:
    void PreemptThreads();

    u32 CpuCoresCount() const {
-        return NUM_CPU_CORES;
+        return Core::Hardware::NUM_CPU_CORES;
    }

    void SetReselectionPending() {
@@ -149,13 +148,15 @@ private:
    bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);

    static constexpr u32 min_regular_priority = 2;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+        scheduled_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+        suggested_queue;
    std::atomic<bool> is_reselection_pending{false};

    // The priority levels at which the global scheduler preempts threads every 10 ms. They are
    // ordered from Core 0 to Core 3.
-    std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+    std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};

    /// Lists all thread ids that aren't deleted/etc.
    std::vector<std::shared_ptr<Thread>> thread_list;
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,7 +13,7 @@

 namespace Kernel {

-ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {}
+ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ServerPort::~ServerPort() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
@@ -39,6 +39,10 @@ void ServerPort::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

+bool ServerPort::IsSignaled() const {
+    return !pending_sessions.empty();
+}
+
 ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
                                                std::string name) {
    std::shared_ptr<ServerPort> server_port = std::make_shared<ServerPort>(kernel);
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -10,7 +10,7 @@
 #include <vector>
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Kernel {
@@ -20,7 +20,7 @@ class KernelCore;
 class ServerSession;
 class SessionRequestHandler;

-class ServerPort final : public WaitObject {
+class ServerPort final : public SynchronizationObject {
 public:
    explicit ServerPort(KernelCore& kernel);
    ~ServerPort() override;
@@ -82,6 +82,8 @@ public:
    bool ShouldWait(const Thread* thread) const override;
    void Acquire(Thread* thread) override;

+    bool IsSignaled() const override;
+
 private:
    /// ServerSessions waiting to be accepted by the port
    std::vector<std::shared_ptr<ServerSession>> pending_sessions;
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -24,7 +24,7 @@

 namespace Kernel {

-ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {}
+ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ServerSession::~ServerSession() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
@@ -50,6 +50,16 @@ bool ServerSession::ShouldWait(const Thread* thread) const {
    return pending_requesting_threads.empty() || currently_handling != nullptr;
 }

+bool ServerSession::IsSignaled() const {
+    // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
+    if (!parent->Client()) {
+        return true;
+    }
+
+    // Wait if we have no pending requests, or if we're currently handling a request.
+    return !pending_requesting_threads.empty() && currently_handling == nullptr;
+}
+
 void ServerSession::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
    // We are now handling a request, pop it from the stack.
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,7 +10,7 @@
 #include <vector>

 #include "common/threadsafe_queue.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Memory {
@@ -41,7 +41,7 @@ class Thread;
 * After the server replies to the request, the response is marshalled back to the caller's
 * TLS buffer and control is transferred back to it.
 */
-class ServerSession final : public WaitObject {
+class ServerSession final : public SynchronizationObject {
 public:
    explicit ServerSession(KernelCore& kernel);
    ~ServerSession() override;
@@ -73,6 +73,8 @@ public:
        return parent.get();
    }

+    bool IsSignaled() const override;
+
    /**
     * Sets the HLE handler for the session. This handler will be called to service IPC requests
     * instead of the regular IPC machinery. (The regular IPC machinery is currently not
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -9,7 +9,7 @@

 namespace Kernel {

-Session::Session(KernelCore& kernel) : WaitObject{kernel} {}
+Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Session::~Session() = default;

 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
@@ -29,6 +29,11 @@ bool Session::ShouldWait(const Thread* thread) const {
    return {};
 }

+bool Session::IsSignaled() const {
+    UNIMPLEMENTED();
+    return true;
+}
+
 void Session::Acquire(Thread* thread) {
    UNIMPLEMENTED();
 }
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <utility>

-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"

 namespace Kernel {

@@ -19,7 +19,7 @@ class ServerSession;
 * Parent structure to link the client and server endpoints of a session with their associated
 * client port.
 */
-class Session final : public WaitObject {
+class Session final : public SynchronizationObject {
 public:
    explicit Session(KernelCore& kernel);
    ~Session() override;
@@ -39,6 +39,8 @@ public:

    bool ShouldWait(const Thread* thread) const override;

+    bool IsSignaled() const override;
+
    void Acquire(Thread* thread) override;

    std::shared_ptr<ClientSession> Client() {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
+#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
@@ -433,22 +434,6 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
    return ERR_INVALID_HANDLE;
 }

-/// Default thread wakeup callback for WaitSynchronization
-static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                        std::shared_ptr<WaitObject> object, std::size_t index) {
-    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-
-    if (reason == ThreadWakeupReason::Timeout) {
-        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
-        return true;
-    }
-
-    ASSERT(reason == ThreadWakeupReason::Signal);
-    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
-    thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
-    return true;
-};
-
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
 static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
                                      u64 handle_count, s64 nano_seconds) {
@@ -472,14 +457,14 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
    }

    auto* const thread = system.CurrentScheduler().GetCurrentThread();
-
-    using ObjectPtr = Thread::ThreadWaitObjects::value_type;
-    Thread::ThreadWaitObjects objects(handle_count);
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto& kernel = system.Kernel();
+    using ObjectPtr = Thread::ThreadSynchronizationObjects::value_type;
+    Thread::ThreadSynchronizationObjects objects(handle_count);
+    const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();

    for (u64 i = 0; i < handle_count; ++i) {
        const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
-        const auto object = handle_table.Get<WaitObject>(handle);
+        const auto object = handle_table.Get<SynchronizationObject>(handle);

        if (object == nullptr) {
            LOG_ERROR(Kernel_SVC, "Object is a nullptr");
@@ -488,47 +473,10 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr

        objects[i] = object;
    }
-
-    // Find the first object that is acquirable in the provided list of objects
-    auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) {
-        return !object->ShouldWait(thread);
-    });
-
-    if (itr != objects.end()) {
-        // We found a ready object, acquire it and set the result value
-        WaitObject* object = itr->get();
-        object->Acquire(thread);
-        *index = static_cast<s32>(std::distance(objects.begin(), itr));
-        return RESULT_SUCCESS;
-    }
-
-    // No objects were ready to be acquired, prepare to suspend the thread.
-
-    // If a timeout value of 0 was provided, just return the Timeout error code instead of
-    // suspending the thread.
-    if (nano_seconds == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    if (thread->IsSyncCancelled()) {
-        thread->SetSyncCancelled(false);
-        return ERR_SYNCHRONIZATION_CANCELED;
-    }
-
-    for (auto& object : objects) {
-        object->AddWaitingThread(SharedFrom(thread));
-    }
-
-    thread->SetWaitObjects(std::move(objects));
-    thread->SetStatus(ThreadStatus::WaitSynch);
-
-    // Create an event to wake the thread up after the specified nanosecond delay has passed
-    thread->WakeAfterDelay(nano_seconds);
-    thread->SetWakeupCallback(DefaultThreadWakeupCallback);
-
-    system.PrepareReschedule(thread->GetProcessorID());
-
-    return RESULT_TIMEOUT;
+    auto& synchronization = kernel.Synchronization();
+    const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds);
+    *index = handle_result;
+    return result;
 }

 /// Resumes a thread waiting on WaitSynchronization
@@ -1863,10 +1811,14 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
    }

    auto& kernel = system.Kernel();
-    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
+
+    if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
+        return reserve_result;
+    }

    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
-    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    const auto result{handle_table.Create(std::move(transfer_mem_handle))};
    if (result.Failed()) {
        return result.Code();
    }
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -0,0 +1,87 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+/// Default thread wakeup callback for WaitSynchronization
+static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                                        std::shared_ptr<SynchronizationObject> object,
+                                        std::size_t index) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+
+    if (reason == ThreadWakeupReason::Timeout) {
+        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
+        return true;
+    }
+
+    ASSERT(reason == ThreadWakeupReason::Signal);
+    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
+    thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
+    return true;
+}
+
+Synchronization::Synchronization(Core::System& system) : system{system} {}
+
+void Synchronization::SignalObject(SynchronizationObject& obj) const {
+    if (obj.IsSignaled()) {
+        obj.WakeupAllWaitingThreads();
+    }
+}
+
+std::pair<ResultCode, Handle> Synchronization::WaitFor(
+    std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
+    auto* const thread = system.CurrentScheduler().GetCurrentThread();
+    // Find the first object that is acquirable in the provided list of objects
+    const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(),
+                                  [thread](const std::shared_ptr<SynchronizationObject>& object) {
+                                      return object->IsSignaled();
+                                  });
+
+    if (itr != sync_objects.end()) {
+        // We found a ready object, acquire it and set the result value
+        SynchronizationObject* object = itr->get();
+        object->Acquire(thread);
+        const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+        return {RESULT_SUCCESS, index};
+    }
+
+    // No objects were ready to be acquired, prepare to suspend the thread.
+
+    // If a timeout value of 0 was provided, just return the Timeout error code instead of
+    // suspending the thread.
+    if (nano_seconds == 0) {
+        return {RESULT_TIMEOUT, InvalidHandle};
+    }
+
+    if (thread->IsSyncCancelled()) {
+        thread->SetSyncCancelled(false);
+        return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+    }
+
+    for (auto& object : sync_objects) {
+        object->AddWaitingThread(SharedFrom(thread));
+    }
+
+    thread->SetSynchronizationObjects(std::move(sync_objects));
+    thread->SetStatus(ThreadStatus::WaitSynch);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    thread->WakeAfterDelay(nano_seconds);
+    thread->SetWakeupCallback(DefaultThreadWakeupCallback);
+
+    system.PrepareReschedule(thread->GetProcessorID());
+
+    return {RESULT_TIMEOUT, InvalidHandle};
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/synchronization.h
+++ b/src/core/hle/kernel/synchronization.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+#include "core/hle/result.h"
+
+namespace Core {
+class System;
+} // namespace Core
+
+namespace Kernel {
+
+class SynchronizationObject;
+
+/**
+ * The 'Synchronization' class is an interface for handling synchronization methods
+ * used by Synchronization objects and synchronization SVCs. This centralizes processing of
+ * such
+ */
+class Synchronization {
+public:
+    explicit Synchronization(Core::System& system);
+
+    /// Signals a synchronization object, waking up all its waiting threads
+    void SignalObject(SynchronizationObject& obj) const;
+
+    /// Tries to see if waiting for any of the sync_objects is necessary, if not
+    /// it returns Success and the handle index of the signaled sync object. In
+    /// case not, the current thread will be locked and wait for nano_seconds or
+    /// for a synchronization object to signal.
+    std::pair<ResultCode, Handle> WaitFor(
+        std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds);
+
+private:
+    Core::System& system;
+};
+} // namespace Kernel
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -10,20 +10,26 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/thread.h"

 namespace Kernel {

-WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {}
-WaitObject::~WaitObject() = default;
+SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {}
+SynchronizationObject::~SynchronizationObject() = default;

-void WaitObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::Signal() {
+    kernel.Synchronization().SignalObject(*this);
+}
+
+void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
    if (itr == waiting_threads.end())
        waiting_threads.push_back(std::move(thread));
 }

-void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
    // If a thread passed multiple handles to the same object,
    // the kernel might attempt to remove the thread from the object's
@@ -32,7 +38,7 @@ void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
        waiting_threads.erase(itr);
 }

-std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
+std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const {
    Thread* candidate = nullptr;
    u32 candidate_priority = THREADPRIO_LOWEST + 1;

@@ -50,23 +56,14 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
        if (ShouldWait(thread.get()))
            continue;

-        // A thread is ready to run if it's either in ThreadStatus::WaitSynch
-        // and the rest of the objects it is waiting on are ready.
-        bool ready_to_run = true;
-        if (thread_status == ThreadStatus::WaitSynch) {
-            ready_to_run = thread->AllWaitObjectsReady();
-        }
-
-        if (ready_to_run) {
-            candidate = thread.get();
-            candidate_priority = thread->GetPriority();
-        }
+        candidate = thread.get();
+        candidate_priority = thread->GetPriority();
    }

    return SharedFrom(candidate);
 }

-void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    ASSERT(!ShouldWait(thread.get()));

    if (!thread) {
@@ -74,7 +71,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    }

    if (thread->IsSleepingOnWait()) {
-        for (const auto& object : thread->GetWaitObjects()) {
+        for (const auto& object : thread->GetSynchronizationObjects()) {
            ASSERT(!object->ShouldWait(thread.get()));
            object->Acquire(thread.get());
        }
@@ -82,9 +79,9 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
        Acquire(thread.get());
    }

-    const std::size_t index = thread->GetWaitObjectIndex(SharedFrom(this));
+    const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));

-    thread->ClearWaitObjects();
+    thread->ClearSynchronizationObjects();

    thread->CancelWakeupTimer();

@@ -99,13 +96,13 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    }
 }

-void WaitObject::WakeupAllWaitingThreads() {
+void SynchronizationObject::WakeupAllWaitingThreads() {
    while (auto thread = GetHighestPriorityReadyThread()) {
        WakeupWaitingThread(thread);
    }
 }

-const std::vector<std::shared_ptr<Thread>>& WaitObject::GetWaitingThreads() const {
+const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
    return waiting_threads;
 }

--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -15,10 +15,10 @@ class KernelCore;
 class Thread;

 /// Class that represents a Kernel object that a thread can be waiting on
-class WaitObject : public Object {
+class SynchronizationObject : public Object {
 public:
-    explicit WaitObject(KernelCore& kernel);
-    ~WaitObject() override;
+    explicit SynchronizationObject(KernelCore& kernel);
+    ~SynchronizationObject() override;

    /**
     * Check if the specified thread should wait until the object is available
@@ -30,6 +30,13 @@ public:
    /// Acquire/lock the object for the specified thread if it is available
    virtual void Acquire(Thread* thread) = 0;

+    /// Signal this object
+    virtual void Signal();
+
+    virtual bool IsSignaled() const {
+        return is_signaled;
+    }
+
    /**
     * Add a thread to wait on this object
     * @param thread Pointer to thread to add
@@ -60,16 +67,20 @@ public:
    /// Get a const reference to the waiting threads list for debug use
    const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;

+protected:
+    bool is_signaled{}; // Tells if this sync object is signalled;
+
 private:
    /// Threads waiting for this object to become available
    std::vector<std::shared_ptr<Thread>> waiting_threads;
 };

-// Specialization of DynamicObjectCast for WaitObjects
+// Specialization of DynamicObjectCast for SynchronizationObjects
 template <>
-inline std::shared_ptr<WaitObject> DynamicObjectCast<WaitObject>(std::shared_ptr<Object> object) {
+inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>(
+    std::shared_ptr<Object> object) {
    if (object != nullptr && object->IsWaitable()) {
-        return std::static_pointer_cast<WaitObject>(object);
+        return std::static_pointer_cast<SynchronizationObject>(object);
    }
    return nullptr;
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -15,6 +15,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -31,11 +32,15 @@ bool Thread::ShouldWait(const Thread* thread) const {
    return status != ThreadStatus::Dead;
 }

+bool Thread::IsSignaled() const {
+    return status == ThreadStatus::Dead;
+}
+
 void Thread::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

-Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {}
+Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Thread::~Thread() = default;

 void Thread::Stop() {
@@ -45,7 +50,7 @@ void Thread::Stop() {
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;
    SetStatus(ThreadStatus::Dead);
-    WakeupAllWaitingThreads();
+    Signal();

    // Clean up any dangling references in objects that this thread was waiting for
    for (auto& wait_object : wait_objects) {
@@ -215,7 +220,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
    context.cpu_registers[1] = output;
 }

-s32 Thread::GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const {
+s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
    ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
    return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
@@ -336,14 +341,16 @@ void Thread::ChangeCore(u32 core, u64 mask) {
    SetCoreAndAffinityMask(core, mask);
 }

-bool Thread::AllWaitObjectsReady() const {
-    return std::none_of(
-        wait_objects.begin(), wait_objects.end(),
-        [this](const std::shared_ptr<WaitObject>& object) { return object->ShouldWait(this); });
+bool Thread::AllSynchronizationObjectsReady() const {
+    return std::none_of(wait_objects.begin(), wait_objects.end(),
+                        [this](const std::shared_ptr<SynchronizationObject>& object) {
+                            return object->ShouldWait(this);
+                        });
 }

 bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                  std::shared_ptr<WaitObject> object, std::size_t index) {
+                                  std::shared_ptr<SynchronizationObject> object,
+                                  std::size_t index) {
    ASSERT(wakeup_callback);
    return wakeup_callback(reason, std::move(thread), std::move(object), index);
 }
@@ -425,7 +432,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
            const s32 old_core = processor_id;
            if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
                if (static_cast<s32>(ideal_core) < 0) {
-                    processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
+                    processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES);
                } else {
                    processor_id = ideal_core;
                }
@@ -449,7 +456,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
            scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
        }

-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
                scheduler.Unsuggest(current_priority, core, this);
            }
@@ -460,7 +467,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
        }

-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
                scheduler.Suggest(current_priority, core, this);
            }
@@ -474,12 +481,12 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
        return;
    }
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.GlobalScheduler();
    if (processor_id >= 0) {
        scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
            scheduler.Unsuggest(old_priority, core, this);
        }
@@ -496,7 +503,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
        }
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
            scheduler.Suggest(current_priority, core, this);
        }
@@ -506,13 +513,13 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
 }

 void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.GlobalScheduler();
    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
        current_priority >= THREADPRIO_COUNT) {
        return;
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (((old_affinity_mask >> core) & 1) != 0) {
            if (core == static_cast<u32>(old_core)) {
                scheduler.Unschedule(current_priority, core, this);
@@ -522,7 +529,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
        }
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (((affinity_mask >> core) & 1) != 0) {
            if (core == static_cast<u32>(processor_id)) {
                scheduler.Schedule(current_priority, core, this);
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -11,7 +11,7 @@
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Kernel {
@@ -95,7 +95,7 @@ enum class ThreadSchedMasks : u32 {
    ForcePauseMask = 0x0070,
 };

-class Thread final : public WaitObject {
+class Thread final : public SynchronizationObject {
 public:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;
@@ -104,11 +104,11 @@ public:

    using ThreadContext = Core::ARM_Interface::ThreadContext;

-    using ThreadWaitObjects = std::vector<std::shared_ptr<WaitObject>>;
+    using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;

    using WakeupCallback =
        std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                           std::shared_ptr<WaitObject> object, std::size_t index)>;
+                           std::shared_ptr<SynchronizationObject> object, std::size_t index)>;

    /**
     * Creates and returns a new thread. The new thread is immediately scheduled
@@ -146,6 +146,7 @@ public:

    bool ShouldWait(const Thread* thread) const override;
    void Acquire(Thread* thread) override;
+    bool IsSignaled() const override;

    /**
     * Gets the thread's current priority
@@ -233,7 +234,7 @@ public:
     *
     * @param object Object to query the index of.
     */
-    s32 GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const;
+    s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const;

    /**
     * Stops a thread, invalidating it from further use
@@ -314,15 +315,15 @@ public:
        return owner_process;
    }

-    const ThreadWaitObjects& GetWaitObjects() const {
+    const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
        return wait_objects;
    }

-    void SetWaitObjects(ThreadWaitObjects objects) {
+    void SetSynchronizationObjects(ThreadSynchronizationObjects objects) {
        wait_objects = std::move(objects);
    }

-    void ClearWaitObjects() {
+    void ClearSynchronizationObjects() {
        for (const auto& waiting_object : wait_objects) {
            waiting_object->RemoveWaitingThread(SharedFrom(this));
        }
@@ -330,7 +331,7 @@ public:
    }

    /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllWaitObjectsReady() const;
+    bool AllSynchronizationObjectsReady() const;

    const MutexWaitingThreads& GetMutexWaitingThreads() const {
        return wait_mutex_threads;
@@ -395,7 +396,7 @@ public:
     *      will cause an assertion to trigger.
     */
    bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                              std::shared_ptr<WaitObject> object, std::size_t index);
+                              std::shared_ptr<SynchronizationObject> object, std::size_t index);

    u32 GetIdealCore() const {
        return ideal_core;
@@ -494,7 +495,7 @@ private:

    /// Objects that the thread is waiting on, in the same order as they were
    /// passed to WaitSynchronization.
-    ThreadWaitObjects wait_objects;
+    ThreadSynchronizationObjects wait_objects;

    /// List of threads that are waiting for a mutex that is held by this thread.
    MutexWaitingThreads wait_mutex_threads;
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -8,15 +8,23 @@
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/result.h"
+#include "core/memory.h"

 namespace Kernel {

-TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
-TransferMemory::~TransferMemory() = default;
+TransferMemory::TransferMemory(KernelCore& kernel, Memory::Memory& memory)
+    : Object{kernel}, memory{memory} {}

-std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
-                                                       u64 size, MemoryPermission permissions) {
-    std::shared_ptr<TransferMemory> transfer_memory{std::make_shared<TransferMemory>(kernel)};
+TransferMemory::~TransferMemory() {
+    // Release memory region when transfer memory is destroyed
+    Reset();
+}
+
+std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Memory::Memory& memory,
+                                                       VAddr base_address, u64 size,
+                                                       MemoryPermission permissions) {
+    std::shared_ptr<TransferMemory> transfer_memory{
+        std::make_shared<TransferMemory>(kernel, memory)};

    transfer_memory->base_address = base_address;
    transfer_memory->memory_size = size;
@@ -27,7 +35,7 @@ std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr
 }

 const u8* TransferMemory::GetPointer() const {
-    return backing_block.get()->data();
+    return memory.GetPointer(base_address);
 }

 u64 TransferMemory::GetSize() const {
@@ -62,6 +70,52 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
    return RESULT_SUCCESS;
 }

+ResultCode TransferMemory::Reserve() {
+    auto& vm_manager{owner_process->VMManager()};
+    const auto check_range_result{vm_manager.CheckRangeState(
+        base_address, memory_size, MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+        MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::All,
+        VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None,
+        MemoryAttribute::IpcAndDeviceMapped)};
+
+    if (check_range_result.Failed()) {
+        return check_range_result.Code();
+    }
+
+    auto [state_, permissions_, attribute] = *check_range_result;
+
+    if (const auto result{vm_manager.ReprotectRange(
+            base_address, memory_size, SharedMemory::ConvertPermissions(owner_permissions))};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         attribute | MemoryAttribute::Locked);
+}
+
+ResultCode TransferMemory::Reset() {
+    auto& vm_manager{owner_process->VMManager()};
+    if (const auto result{vm_manager.CheckRangeState(
+            base_address, memory_size,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::None,
+            VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked,
+            MemoryAttribute::IpcAndDeviceMapped)};
+        result.Failed()) {
+        return result.Code();
+    }
+
+    if (const auto result{
+            vm_manager.ReprotectRange(base_address, memory_size, VMAPermission::ReadWrite)};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         MemoryAttribute::None);
+}
+
 ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
    if (memory_size != size) {
        return ERR_INVALID_SIZE;
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -11,6 +11,10 @@

 union ResultCode;

+namespace Memory {
+class Memory;
+}
+
 namespace Kernel {

 class KernelCore;
@@ -26,12 +30,13 @@ enum class MemoryPermission : u32;
 ///
 class TransferMemory final : public Object {
 public:
-    explicit TransferMemory(KernelCore& kernel);
+    explicit TransferMemory(KernelCore& kernel, Memory::Memory& memory);
    ~TransferMemory() override;

    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;

-    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
+    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Memory::Memory& memory,
+                                                  VAddr base_address, u64 size,
                                                  MemoryPermission permissions);

    TransferMemory(const TransferMemory&) = delete;
@@ -80,6 +85,14 @@ public:
    ///
    ResultCode UnmapMemory(VAddr address, u64 size);

+    /// Reserves the region to be used for the transfer memory, called after the transfer memory is
+    /// created.
+    ResultCode Reserve();
+
+    /// Resets the region previously used for the transfer memory, called after the transfer memory
+    /// is closed.
+    ResultCode Reset();
+
 private:
    /// Memory block backing this instance.
    std::shared_ptr<PhysicalMemory> backing_block;
@@ -98,6 +111,8 @@ private:

    /// Whether or not this transfer memory instance has mapped memory.
    bool is_mapped = false;
+
+    Memory::Memory& memory;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -544,7 +544,8 @@ MemoryInfo VMManager::QueryMemory(VAddr address) const {

 ResultCode VMManager::SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask,
                                         MemoryAttribute attribute) {
-    constexpr auto ignore_mask = MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped;
+    constexpr auto ignore_mask =
+        MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped | MemoryAttribute::Locked;
    constexpr auto attribute_mask = ~ignore_mask;

    const auto result = CheckRangeState(
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -98,6 +98,8 @@ enum class MemoryAttribute : u32 {
    DeviceMapped = 4,
    /// Uncached memory
    Uncached = 8,
+
+    IpcAndDeviceMapped = LockedForIPC | DeviceMapped,
 };

 constexpr MemoryAttribute operator|(MemoryAttribute lhs, MemoryAttribute rhs) {
@@ -654,6 +656,35 @@ public:
    /// is scheduled.
    Common::PageTable page_table{Memory::PAGE_BITS};

+    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
+
+    /// Checks if an address range adheres to the specified states provided.
+    ///
+    /// @param address         The starting address of the address range.
+    /// @param size            The size of the address range.
+    /// @param state_mask      The memory state mask.
+    /// @param state           The state to compare the individual VMA states against,
+    ///                        which is done in the form of: (vma.state & state_mask) != state.
+    /// @param permission_mask The memory permissions mask.
+    /// @param permissions     The permission to compare the individual VMA permissions against,
+    ///                        which is done in the form of:
+    ///                        (vma.permission & permission_mask) != permission.
+    /// @param attribute_mask  The memory attribute mask.
+    /// @param attribute       The memory attributes to compare the individual VMA attributes
+    ///                        against, which is done in the form of:
+    ///                        (vma.attributes & attribute_mask) != attribute.
+    /// @param ignore_mask     The memory attributes to ignore during the check.
+    ///
+    /// @returns If successful, returns a tuple containing the memory attributes
+    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
+    ///          memory state across the memory range.
+    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
+    ///
+    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
+                                 VMAPermission permission_mask, VMAPermission permissions,
+                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
+                                 MemoryAttribute ignore_mask) const;
+
 private:
    using VMAIter = VMAMap::iterator;

@@ -707,35 +738,6 @@ private:
    /// Clears out the page table
    void ClearPageTable();

-    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
-
-    /// Checks if an address range adheres to the specified states provided.
-    ///
-    /// @param address         The starting address of the address range.
-    /// @param size            The size of the address range.
-    /// @param state_mask      The memory state mask.
-    /// @param state           The state to compare the individual VMA states against,
-    ///                        which is done in the form of: (vma.state & state_mask) != state.
-    /// @param permission_mask The memory permissions mask.
-    /// @param permissions     The permission to compare the individual VMA permissions against,
-    ///                        which is done in the form of:
-    ///                        (vma.permission & permission_mask) != permission.
-    /// @param attribute_mask  The memory attribute mask.
-    /// @param attribute       The memory attributes to compare the individual VMA attributes
-    ///                        against, which is done in the form of:
-    ///                        (vma.attributes & attribute_mask) != attribute.
-    /// @param ignore_mask     The memory attributes to ignore during the check.
-    ///
-    /// @returns If successful, returns a tuple containing the memory attributes
-    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
-    ///          memory state across the memory range.
-    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
-    ///
-    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
-                                 VMAPermission permission_mask, VMAPermission permissions,
-                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
-                                 MemoryAttribute ignore_mask) const;
-
    /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
    ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;

--- a/src/core/hle/kernel/writable_event.cpp
+++ b/src/core/hle/kernel/writable_event.cpp
@@ -22,7 +22,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) {
    writable_event->name = name + ":Writable";
    writable_event->readable = readable_event;
    readable_event->name = name + ":Readable";
-    readable_event->signaled = false;

    return {std::move(readable_event), std::move(writable_event)};
 }
@@ -40,7 +39,7 @@ void WritableEvent::Clear() {
 }

 bool WritableEvent::IsSignaled() const {
-    return readable->signaled;
+    return readable->IsSignaled();
 }

 } // namespace Kernel
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -709,8 +709,34 @@ void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
    apm_sys->SetCpuBoostMode(ctx);
 }

-IStorage::IStorage(std::vector<u8> buffer)
-    : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
+IStorageImpl::~IStorageImpl() = default;
+
+class StorageDataImpl final : public IStorageImpl {
+public:
+    explicit StorageDataImpl(std::vector<u8>&& buffer) : buffer{std::move(buffer)} {}
+
+    std::vector<u8>& GetData() override {
+        return buffer;
+    }
+
+    const std::vector<u8>& GetData() const override {
+        return buffer;
+    }
+
+    std::size_t GetSize() const override {
+        return buffer.size();
+    }
+
+private:
+    std::vector<u8> buffer;
+};
+
+IStorage::IStorage(std::vector<u8>&& buffer)
+    : ServiceFramework("IStorage"), impl{std::make_shared<StorageDataImpl>(std::move(buffer))} {
+    Register();
+}
+
+void IStorage::Register() {
    // clang-format off
        static const FunctionInfo functions[] = {
            {0, &IStorage::Open, "Open"},
@@ -723,8 +749,13 @@ IStorage::IStorage(std::vector<u8> buffer)

 IStorage::~IStorage() = default;

-const std::vector<u8>& IStorage::GetData() const {
-    return buffer;
+void IStorage::Open(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IStorageAccessor>(*this);
 }

 void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
@@ -816,7 +847,7 @@ private:
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>());

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -825,26 +856,25 @@ private:
    void PopOutData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
        const auto storage = applet->GetBroker().PopNormalDataToGame();
        if (storage == nullptr) {
            LOG_ERROR(Service_AM,
                      "storage is a nullptr. There is no data in the current normal channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NO_DATA_IN_CHANNEL);
            return;
        }

+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void PushInteractiveInData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>());

        ASSERT(applet->IsInitialized());
        applet->ExecuteInteractive();
@@ -857,19 +887,18 @@ private:
    void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
        const auto storage = applet->GetBroker().PopInteractiveDataToGame();
        if (storage == nullptr) {
            LOG_ERROR(Service_AM,
                      "storage is a nullptr. There is no data in the current interactive channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NO_DATA_IN_CHANNEL);
            return;
        }

+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) {
@@ -891,15 +920,6 @@ private:
    std::shared_ptr<Applets::Applet> applet;
 };

-void IStorage::Open(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_AM, "called");
-
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IStorageAccessor>(*this);
-}
-
 IStorageAccessor::IStorageAccessor(IStorage& storage)
    : ServiceFramework("IStorageAccessor"), backing(storage) {
    // clang-format off
@@ -921,7 +941,7 @@ void IStorageAccessor::GetSize(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 4};

    rb.Push(RESULT_SUCCESS);
-    rb.Push(static_cast<u64>(backing.buffer.size()));
+    rb.Push(static_cast<u64>(backing.GetSize()));
 }

 void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
@@ -932,17 +952,17 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());

-    if (data.size() > backing.buffer.size() - offset) {
+    if (data.size() > backing.GetSize() - offset) {
        LOG_ERROR(Service_AM,
                  "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
-                  backing.buffer.size(), data.size(), offset);
+                  backing.GetSize(), data.size(), offset);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
        return;
    }

-    std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
+    std::memcpy(backing.GetData().data() + offset, data.data(), data.size());

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
@@ -956,16 +976,16 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);

-    if (size > backing.buffer.size() - offset) {
+    if (size > backing.GetSize() - offset) {
        LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
-                  backing.buffer.size(), size, offset);
+                  backing.GetSize(), size, offset);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
        return;
    }

-    ctx.WriteBuffer(backing.buffer.data() + offset, size);
+    ctx.WriteBuffer(backing.GetData().data() + offset, size);

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
@@ -1031,7 +1051,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
    rp.SetCurrentOffset(3);
    const auto handle{rp.Pop<Kernel::Handle>()};

-    const auto transfer_mem =
+    auto transfer_mem =
        system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle);

    if (transfer_mem == nullptr) {
@@ -1047,7 +1067,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory)));
+    rb.PushIpcInterface<IStorage>(std::move(memory));
 }

 IApplicationFunctions::IApplicationFunctions(Core::System& system_)
@@ -1189,13 +1209,11 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
        u64 build_id{};
        std::memcpy(&build_id, build_id_full.data(), sizeof(u64));

-        const auto data =
-            backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
-
+        auto data = backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
        if (data.has_value()) {
            IPC::ResponseBuilder rb{ctx, 2, 0, 1};
            rb.Push(RESULT_SUCCESS);
-            rb.PushIpcInterface<AM::IStorage>(*data);
+            rb.PushIpcInterface<IStorage>(std::move(*data));
            launch_popped_application_specific = true;
            return;
        }
@@ -1218,7 +1236,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
        std::vector<u8> buffer(sizeof(LaunchParameterAccountPreselectedUser));
        std::memcpy(buffer.data(), &params, buffer.size());

-        rb.PushIpcInterface<AM::IStorage>(buffer);
+        rb.PushIpcInterface<IStorage>(std::move(buffer));
        launch_popped_account_preselect = true;
        return;
    }
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -12,7 +12,8 @@

 namespace Kernel {
 class KernelCore;
-}
+class TransferMemory;
+} // namespace Kernel

 namespace Service::NVFlinger {
 class NVFlinger;
@@ -188,19 +189,36 @@ private:
    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

+class IStorageImpl {
+public:
+    virtual ~IStorageImpl();
+    virtual std::vector<u8>& GetData() = 0;
+    virtual const std::vector<u8>& GetData() const = 0;
+    virtual std::size_t GetSize() const = 0;
+};
+
 class IStorage final : public ServiceFramework<IStorage> {
 public:
-    explicit IStorage(std::vector<u8> buffer);
+    explicit IStorage(std::vector<u8>&& buffer);
    ~IStorage() override;

-    const std::vector<u8>& GetData() const;
+    std::vector<u8>& GetData() {
+        return impl->GetData();
+    }
+
+    const std::vector<u8>& GetData() const {
+        return impl->GetData();
+    }
+
+    std::size_t GetSize() const {
+        return impl->GetSize();
+    }

 private:
+    void Register();
    void Open(Kernel::HLERequestContext& ctx);

-    std::vector<u8> buffer;
-
-    friend class IStorageAccessor;
+    std::shared_ptr<IStorageImpl> impl;
 };

 class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -50,16 +50,17 @@ AppletDataBroker::RawChannelData AppletDataBroker::PeekDataToAppletForDebug() co
    return {std::move(out_normal), std::move(out_interactive)};
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
    if (out_channel.empty())
        return nullptr;

    auto out = std::move(out_channel.front());
    out_channel.pop_front();
+    pop_out_data_event.writable->Clear();
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    if (in_channel.empty())
        return nullptr;

@@ -68,16 +69,17 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
    if (out_interactive_channel.empty())
        return nullptr;

    auto out = std::move(out_interactive_channel.front());
    out_interactive_channel.pop_front();
+    pop_interactive_out_data_event.writable->Clear();
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    if (in_interactive_channel.empty())
        return nullptr;

@@ -86,21 +88,21 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    return out;
 }

-void AppletDataBroker::PushNormalDataFromGame(IStorage storage) {
-    in_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushNormalDataFromApplet(IStorage storage) {
-    out_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_channel.emplace_back(std::move(storage));
    pop_out_data_event.writable->Signal();
 }

-void AppletDataBroker::PushInteractiveDataFromGame(IStorage storage) {
-    in_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_interactive_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushInteractiveDataFromApplet(IStorage storage) {
-    out_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_interactive_channel.emplace_back(std::move(storage));
    pop_interactive_out_data_event.writable->Signal();
 }

--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -72,17 +72,17 @@ public:
    // Retrieves but does not pop the data sent to applet.
    RawChannelData PeekDataToAppletForDebug() const;

-    std::unique_ptr<IStorage> PopNormalDataToGame();
-    std::unique_ptr<IStorage> PopNormalDataToApplet();
+    std::shared_ptr<IStorage> PopNormalDataToGame();
+    std::shared_ptr<IStorage> PopNormalDataToApplet();

-    std::unique_ptr<IStorage> PopInteractiveDataToGame();
-    std::unique_ptr<IStorage> PopInteractiveDataToApplet();
+    std::shared_ptr<IStorage> PopInteractiveDataToGame();
+    std::shared_ptr<IStorage> PopInteractiveDataToApplet();

-    void PushNormalDataFromGame(IStorage storage);
-    void PushNormalDataFromApplet(IStorage storage);
+    void PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage);

-    void PushInteractiveDataFromGame(IStorage storage);
-    void PushInteractiveDataFromApplet(IStorage storage);
+    void PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage);

    void SignalStateChanged() const;

@@ -94,16 +94,16 @@ private:
    // Queues are named from applet's perspective

    // PopNormalDataToApplet and PushNormalDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_channel;
+    std::deque<std::shared_ptr<IStorage>> in_channel;

    // PopNormalDataToGame and PushNormalDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_channel;
+    std::deque<std::shared_ptr<IStorage>> out_channel;

    // PopInteractiveDataToApplet and PushInteractiveDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> in_interactive_channel;

    // PopInteractiveDataToGame and PushInteractiveDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> out_interactive_channel;

    Kernel::EventPair state_changed_event;

--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -186,7 +186,7 @@ void Error::Execute() {

 void Error::DisplayCompleted() {
    complete = true;
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -20,7 +20,7 @@ namespace Service::AM::Applets {
 constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221};

 static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) {
-    std::unique_ptr<IStorage> storage = broker.PopNormalDataToApplet();
+    std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet();
    for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
        const auto data = storage->GetData();
        LOG_INFO(Service_AM,
@@ -148,7 +148,7 @@ void Auth::AuthFinished(bool successful) {
    std::vector<u8> out(sizeof(Return));
    std::memcpy(out.data(), &return_, sizeof(Return));

-    broker.PushNormalDataFromApplet(IStorage{out});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(out)));
    broker.SignalStateChanged();
 }

@@ -198,7 +198,7 @@ void PhotoViewer::Execute() {
 }

 void PhotoViewer::ViewFinished() {
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

@@ -234,8 +234,8 @@ void StubApplet::ExecuteInteractive() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "ExecuteInteractive");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

@@ -243,8 +243,8 @@ void StubApplet::Execute() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "Execute");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -50,7 +50,7 @@ void ProfileSelect::ExecuteInteractive() {

 void ProfileSelect::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
        return;
    }

@@ -71,7 +71,7 @@ void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {

    final_data = std::vector<u8>(sizeof(UserSelectionOutput));
    std::memcpy(final_data.data(), &output, final_data.size());
-    broker.PushNormalDataFromApplet(IStorage{final_data});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -102,7 +102,8 @@ void SoftwareKeyboard::ExecuteInteractive() {

 void SoftwareKeyboard::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
+        broker.SignalStateChanged();
        return;
    }

@@ -119,7 +120,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
        std::vector<u8> output_sub(SWKBD_OUTPUT_BUFFER_SIZE);

        if (config.utf_8) {
-            const u64 size = text->size() + 8;
+            const u64 size = text->size() + sizeof(u64);
            const auto new_text = Common::UTF16ToUTF8(*text);

            std::memcpy(output_sub.data(), &size, sizeof(u64));
@@ -130,7 +131,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
            std::memcpy(output_main.data() + 4, new_text.data(),
                        std::min(new_text.size(), SWKBD_OUTPUT_BUFFER_SIZE - 4));
        } else {
-            const u64 size = text->size() * 2 + 8;
+            const u64 size = text->size() * 2 + sizeof(u64);
            std::memcpy(output_sub.data(), &size, sizeof(u64));
            std::memcpy(output_sub.data() + 8, text->data(),
                        std::min(text->size() * 2, SWKBD_OUTPUT_BUFFER_SIZE - 8));
@@ -144,15 +145,15 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
        final_data = output_main;

        if (complete) {
-            broker.PushNormalDataFromApplet(IStorage{output_main});
+            broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
            broker.SignalStateChanged();
        } else {
-            broker.PushInteractiveDataFromApplet(IStorage{output_sub});
+            broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::move(output_sub)));
        }
    } else {
        output_main[0] = 1;
        complete = true;
-        broker.PushNormalDataFromApplet(IStorage{output_main});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
        broker.SignalStateChanged();
    }
 }
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -284,7 +284,7 @@ void WebBrowser::Finalize() {
    std::vector<u8> data(sizeof(WebCommonReturnValue));
    std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue));

-    broker.PushNormalDataFromApplet(IStorage{data});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(data)));
    broker.SignalStateChanged();

    if (!temporary_dir.empty() && FileUtil::IsDirectory(temporary_dir)) {
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
            {3, nullptr, "SetContextForMultiStream"},
            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
+            {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
+            {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {9, nullptr, "DecodeInterleavedForMultiStream"},
        };
        // clang-format on

--- a/src/core/hle/service/bcat/backend/backend.cpp
+++ b/src/core/hle/service/bcat/backend/backend.cpp
@@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name,
 }

 bool NullBackend::Clear(u64 title_id) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}");
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);

    return true;
 }

 void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id,
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
              Common::HexToString(passphrase));
 }

--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -420,7 +420,7 @@ public:
            return;
        }

-        IFile file(result.Unwrap());
+        auto file = std::make_shared<IFile>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -445,7 +445,7 @@ public:
            return;
        }

-        IDirectory directory(result.Unwrap());
+        auto directory = std::make_shared<IDirectory>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -794,8 +794,8 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
 void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_FS, "called");

-    IFileSystem filesystem(fsc.OpenSDMC().Unwrap(),
-                           SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));
+    auto filesystem = std::make_shared<IFileSystem>(
+        fsc.OpenSDMC().Unwrap(), SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -846,7 +846,8 @@ void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
        id = FileSys::StorageId::NandSystem;
    }

-    IFileSystem filesystem(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));
+    auto filesystem =
+        std::make_shared<IFileSystem>(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -898,7 +899,7 @@ void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
        return;
    }

-    IStorage storage(std::move(romfs.Unwrap()));
+    auto storage = std::make_shared<IStorage>(std::move(romfs.Unwrap()));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -937,7 +938,8 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {

    FileSys::PatchManager pm{title_id};

-    IStorage storage(pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));
+    auto storage = std::make_shared<IStorage>(
+        pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -10,6 +10,7 @@
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
 #include "core/frontend/input.h"
+#include "core/hardware_properties.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -37,11 +38,11 @@ namespace Service::HID {

 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66);
 [[maybe_unused]] constexpr s64 accelerometer_update_ticks =
-    static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
 [[maybe_unused]] constexpr s64 gyroscope_update_ticks =
-    static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;

 IAppletResource::IAppletResource(Core::System& system)
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
@@ -26,8 +27,8 @@

 namespace Service::NVFlinger {

-constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
-constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30);

 NVFlinger::NVFlinger(Core::System& system) : system(system) {
    displays.emplace_back(0, "Default", system);
@@ -222,7 +223,7 @@ void NVFlinger::Compose() {

 s64 NVFlinger::GetNextTicks() const {
    constexpr s64 max_hertz = 120LL;
-    return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
+    return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
 }

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -50,16 +50,16 @@ private:
        IPC::RequestParser rp{ctx};
        const auto process_id = rp.PopRaw<u64>();

-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }

-        LOG_DEBUG(Service_PREPO,
-                  "called, type={:02X}, process_id={:016X}, data1_size={:016X}, data2_size={:016X}",
-                  static_cast<u8>(Type), process_id, data1.size(), data2.size());
+        LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}",
+                  static_cast<u8>(Type), process_id, data[0].size());

        const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -70,19 +70,19 @@ private:
        IPC::RequestParser rp{ctx};
        const auto user_id = rp.PopRaw<u128>();
        const auto process_id = rp.PopRaw<u64>();
-
-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }

        LOG_DEBUG(
            Service_PREPO,
-            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}, "
-            "data2_size={:016X}",
-            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data1.size(), data2.size());
+            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}",
+            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size());

        const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id, user_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id,
+                                user_id);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/standard_steady_clock_core.h"

 namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
 TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};
    TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};

    if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/tick_based_steady_clock_core.h"

 namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
 SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};

    return {ticks_time_span.ToSeconds(), GetClockSourceId()};
 }
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -6,6 +6,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -233,7 +234,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
    if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
        const auto ticks{Clock::TimeSpanType::FromTicks(
            Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-            Core::Timing::CNTFREQ)};
+            Core::Hardware::CNTFREQ)};
        const s64 base_time_point{context.offset + current_time_point.time_point -
                                  ticks.ToSeconds()};
        IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/clock_types.h"
 #include "core/hle/service/time/steady_clock_core.h"
 #include "core/hle/service/time/time_sharedmemory.h"
@@ -31,7 +32,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
                                            Clock::TimeSpanType current_time_point) {
    const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};
    const Clock::SteadyClockContext context{
        static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
        clock_source_id};
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -9,6 +9,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/hid/controllers/npad.h"
 #include "core/hle/service/hid/hid.h"
@@ -17,7 +18,7 @@

 namespace Memory {

-constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12);
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;

 StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -371,6 +371,11 @@ enum class SDMCSize : u64 {
    S1TB = 0x10000000000ULL,
 };

+enum class RendererBackend {
+    OpenGL = 0,
+    Vulkan = 1,
+};
+
 struct Values {
    // System
    bool use_docked_mode;
@@ -419,6 +424,10 @@ struct Values {
    SDMCSize sdmc_size;

    // Renderer
+    RendererBackend renderer_backend;
+    bool renderer_debug;
+    int vulkan_device;
+
    float resolution_factor;
    bool use_frame_limit;
    u16 frame_limit;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -46,6 +46,16 @@ static u64 GenerateTelemetryId() {
    return telemetry_id;
 }

+static const char* TranslateRenderer(Settings::RendererBackend backend) {
+    switch (backend) {
+    case Settings::RendererBackend::OpenGL:
+        return "OpenGL";
+    case Settings::RendererBackend::Vulkan:
+        return "Vulkan";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
    u64 telemetry_id{};
    const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -169,7 +179,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
    AddField(field_type, "Audio_SinkId", Settings::values.sink_id);
    AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
    AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core);
-    AddField(field_type, "Renderer_Backend", "OpenGL");
+    AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend));
    AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor);
    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -7,13 +7,14 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/memory.h"
 #include "core/tools/freezer.h"

 namespace Tools {
 namespace {

-constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);

 u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) {
    switch (width) {
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -41,6 +41,7 @@ void Shutdown() {
    Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
    motion_emu.reset();
    sdl.reset();
+    udp.reset();
 }

 Keyboard* GetKeyboard() {
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -14,7 +14,6 @@
 #include "input_common/udp/client.h"
 #include "input_common/udp/protocol.h"

-using boost::asio::ip::address_v4;
 using boost::asio::ip::udp;

 namespace InputCommon::CemuhookUDP {
@@ -31,10 +30,10 @@ public:

    explicit Socket(const std::string& host, u16 port, u8 pad_index, u32 client_id,
                    SocketCallback callback)
-        : client_id(client_id), timer(io_service),
-          send_endpoint(udp::endpoint(address_v4::from_string(host), port)),
-          socket(io_service, udp::endpoint(udp::v4(), 0)), pad_index(pad_index),
-          callback(std::move(callback)) {}
+        : callback(std::move(callback)), timer(io_service),
+          socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id),
+          pad_index(pad_index),
+          send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {}

    void Stop() {
        io_service.stop();
@@ -126,7 +125,7 @@ static void SocketLoop(Socket* socket) {

 Client::Client(std::shared_ptr<DeviceStatus> status, const std::string& host, u16 port,
               u8 pad_index, u32 client_id)
-    : status(status) {
+    : status(std::move(status)) {
    StartCommunication(host, port, pad_index, client_id);
 }

@@ -207,7 +206,7 @@ void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 clie
        Common::Event success_event;
        SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {},
                                [&](Response::PadData data) { success_event.Set(); }};
-        Socket socket{host, port, pad_index, client_id, callback};
+        Socket socket{host, port, pad_index, client_id, std::move(callback)};
        std::thread worker_thread{SocketLoop, &socket};
        bool result = success_event.WaitFor(std::chrono::seconds(8));
        socket.Stop();
@@ -267,7 +266,7 @@ CalibrationConfigurationJob::CalibrationConfigurationJob(
                                        complete_event.Set();
                                    }
                                }};
-        Socket socket{host, port, pad_index, client_id, callback};
+        Socket socket{host, port, pad_index, client_id, std::move(callback)};
        std::thread worker_thread{SocketLoop, &socket};
        complete_event.Wait();
        socket.Stop();
--- a/src/input_common/udp/client.h
+++ b/src/input_common/udp/client.h
@@ -11,7 +11,6 @@
 #include <string>
 #include <thread>
 #include <tuple>
-#include <vector>
 #include "common/common_types.h"
 #include "common/thread.h"
 #include "common/vector_math.h"
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -7,7 +7,6 @@
 #include <array>
 #include <optional>
 #include <type_traits>
-#include <vector>
 #include <boost/crc.hpp>
 #include "common/bit_field.h"
 #include "common/swap.h"
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "common/logging/log.h"
+#include <mutex>
+#include <tuple>
+
 #include "common/param_package.h"
 #include "core/frontend/input.h"
 #include "core/settings.h"
@@ -14,7 +16,7 @@ namespace InputCommon::CemuhookUDP {
 class UDPTouchDevice final : public Input::TouchDevice {
 public:
    explicit UDPTouchDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {}
-    std::tuple<float, float, bool> GetStatus() const {
+    std::tuple<float, float, bool> GetStatus() const override {
        std::lock_guard guard(status->update_mutex);
        return status->touch_status;
    }
@@ -26,7 +28,7 @@ private:
 class UDPMotionDevice final : public Input::MotionDevice {
 public:
    explicit UDPMotionDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {}
-    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
        std::lock_guard guard(status->update_mutex);
        return status->motion_status;
    }
--- a/src/input_common/udp/udp.h
+++ b/src/input_common/udp/udp.h
@@ -2,15 +2,13 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#pragma once
+
 #include <memory>
-#include <unordered_map>
-#include "input_common/main.h"
-#include "input_common/udp/client.h"

 namespace InputCommon::CemuhookUDP {

-class UDPTouchDevice;
-class UDPMotionDevice;
+class Client;

 class State {
 public:
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -156,6 +156,7 @@ if (ENABLE_VULKAN)
        renderer_vulkan/maxwell_to_vk.cpp
        renderer_vulkan/maxwell_to_vk.h
        renderer_vulkan/renderer_vulkan.h
+        renderer_vulkan/renderer_vulkan.cpp
        renderer_vulkan/vk_blit_screen.cpp
        renderer_vulkan/vk_blit_screen.h
        renderer_vulkan/vk_buffer_cache.cpp
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -101,7 +101,10 @@ public:
    void TickFrame() {
        ++epoch;
        while (!pending_destruction.empty()) {
-            if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+            // Delay at least 4 frames before destruction.
+            // This is due to triple buffering happening on some drivers.
+            static constexpr u64 epochs_to_destroy = 5;
+            if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
                break;
            }
            pending_destruction.pop_front();
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
 #include "core/core_timing.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
    regs.reg_array[0xd00] = 1;
 }

-void Maxwell3D::ProcessQueryGet() {
-    const GPUVAddr sequence_address{regs.query.QueryAddress()};
-    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
-    // VAddr before writing.
-
-    // TODO(Subv): Support the other query units.
-    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
-               "Units other than CROP are unimplemented");
-
-    u64 result = 0;
-
-    // TODO(Subv): Support the other query variables
-    switch (regs.query.query_get.select) {
-    case Regs::QuerySelect::Zero:
-        // This seems to actually write the query sequence to the query address.
-        result = regs.query.query_sequence;
-        break;
-    default:
-        result = 1;
-        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
-                          static_cast<u32>(regs.query.query_get.select.Value()));
-    }
-
-    // TODO(Subv): Research and implement how query sync conditions work.
-
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
    struct LongQueryResult {
        u64_le value;
        u64_le timestamp;
    };
    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
+    if (long_query) {
+        // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+        // GPU, this command may actually take a while to complete in real hardware due to GPU
+        // wait queues.
+        LongQueryResult query_result{payload, system.GPU().GetTicks()};
+        memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+    } else {
+        memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+    }
+}

-    switch (regs.query.query_get.mode) {
-    case Regs::QueryMode::Write:
-    case Regs::QueryMode::Write2: {
-        u32 sequence = regs.query.query_sequence;
-        if (regs.query.query_get.short_query) {
-            // Write the current query sequence to the sequence address.
-            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
-            // query.
-            memory_manager.Write<u32>(sequence_address, sequence);
-        } else {
-            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
-            // GPU, this command may actually take a while to complete in real hardware due to GPU
-            // wait queues.
-            LongQueryResult query_result{};
-            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = system.CoreTiming().GetTicks();
-            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
-        }
+void Maxwell3D::ProcessQueryGet() {
+    // TODO(Subv): Support the other query units.
+    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
+               "Units other than CROP are unimplemented");
+
+    switch (regs.query.query_get.operation) {
+    case Regs::QueryOperation::Release: {
+        const u64 result = regs.query.query_sequence;
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        break;
+    }
+    case Regs::QueryOperation::Acquire: {
+        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
+        // to write a value that matches the current payload.
+        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+        break;
+    }
+    case Regs::QueryOperation::Counter: {
+        u64 result{};
+        switch (regs.query.query_get.select) {
+        case Regs::QuerySelect::Zero:
+            result = 0;
+            break;
+        default:
+            result = 1;
+            UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+                              static_cast<u32>(regs.query.query_get.select.Value()));
+        }
+        StampQueryResult(result, regs.query.query_get.short_query == 0);
+        break;
+    }
+    case Regs::QueryOperation::Trap: {
+        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+        break;
+    }
+    default: {
+        UNIMPLEMENTED_MSG("Unknown query operation");
        break;
    }
-    default:
-        UNIMPLEMENTED_MSG("Query mode {} not implemented",
-                          static_cast<u32>(regs.query.query_get.mode.Value()));
    }
 }

--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
        static constexpr std::size_t MaxConstBuffers = 18;
        static constexpr std::size_t MaxConstBufferSize = 0x10000;

-        enum class QueryMode : u32 {
-            Write = 0,
-            Sync = 1,
-            // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
-            // is.
-            Write2 = 2,
+        enum class QueryOperation : u32 {
+            Release = 0,
+            Acquire = 1,
+            Counter = 2,
+            Trap = 3,
        };

        enum class QueryUnit : u32 {
@@ -704,8 +703,8 @@ public:
                INSERT_UNION_PADDING_WORDS(0x15);

                s32 stencil_back_func_ref;
-                u32 stencil_back_func_mask;
                u32 stencil_back_mask;
+                u32 stencil_back_func_mask;

                INSERT_UNION_PADDING_WORDS(0xC);

@@ -862,7 +861,11 @@ public:

                float point_size;

-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x1);
+
+                u32 point_sprite_enable;
+
+                INSERT_UNION_PADDING_WORDS(0x5);

                u32 zeta_enable;

@@ -1077,7 +1080,7 @@ public:
                    u32 query_sequence;
                    union {
                        u32 raw;
-                        BitField<0, 2, QueryMode> mode;
+                        BitField<0, 2, QueryOperation> operation;
                        BitField<4, 1, u32> fence;
                        BitField<12, 4, QueryUnit> unit;
                        BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1409,6 +1412,9 @@ private:
    /// Handles a write to the QUERY_GET register.
    void ProcessQueryGet();

+    // Writes the query result accordingly
+    void StampQueryResult(u64 payload, bool long_query);
+
    // Handles Conditional Rendering
    void ProcessQueryCondition();

@@ -1458,8 +1464,8 @@ ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
 ASSERT_REG_POSITION(patch_vertices, 0x373);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
-ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
-ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
+ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
+ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
@@ -1494,6 +1500,7 @@ ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
 ASSERT_REG_POSITION(point_size, 0x546);
+ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -624,6 +624,19 @@ enum class ShuffleOperation : u64 {
    Bfly = 3, // shuffleXorNV
 };

+enum class ShfType : u64 {
+    Bits32 = 0,
+    U64 = 2,
+    S64 = 3,
+};
+
+enum class ShfXmode : u64 {
+    None = 0,
+    HI = 1,
+    X = 2,
+    XHI = 3,
+};
+
 union Instruction {
    constexpr Instruction& operator=(const Instruction& instr) {
        value = instr.value;
@@ -775,6 +788,13 @@ union Instruction {
        BitField<39, 1, u64> wrap;
    } shr;

+    union {
+        BitField<37, 2, ShfType> type;
+        BitField<48, 2, ShfXmode> xmode;
+        BitField<50, 1, u64> wrap;
+        BitField<20, 6, u64> immediate;
+    } shf;
+
    union {
        BitField<39, 5, u64> shift_amount;
        BitField<48, 1, u64> negate_b;
@@ -1123,6 +1143,11 @@ union Instruction {
        BitField<55, 1, u64> ftz;
    } fset;

+    union {
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, PredCondition> cond;
+    } fcmp;
+
    union {
        BitField<49, 1, u64> bf;
        BitField<35, 3, PredCondition> cond;
@@ -1652,11 +1677,11 @@ union Instruction {
    } xmad;

    union {
-        BitField<20, 14, u64> offset;
+        BitField<20, 14, u64> shifted_offset;
        BitField<34, 5, u64> index;

        u64 GetOffset() const {
-            return offset * 4;
+            return shifted_offset * 4;
        }
    } cbuf34;

@@ -1703,6 +1728,7 @@ public:
        BFE_C,
        BFE_R,
        BFE_IMM,
+        BFI_RC,
        BFI_IMM_R,
        BRA,
        BRX,
@@ -1800,6 +1826,7 @@ public:
        ICMP_R,
        ICMP_CR,
        ICMP_IMM,
+        FCMP_R,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2104,6 +2131,7 @@ private:
            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+            INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -2128,6 +2156,7 @@ private:
            INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
            INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
            INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
+            INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
            INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
    return true;
 }

+u64 GPU::GetTicks() const {
+    // This values were reversed engineered by fincs from NVN
+    // The gpu clock is reported in units of 385/625 nanoseconds
+    constexpr u64 gpu_ticks_num = 384;
+    constexpr u64 gpu_ticks_den = 625;
+
+    const u64 cpu_ticks = system.CoreTiming().GetTicks();
+    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
 void GPU::FlushCommands() {
    renderer.Rasterizer().FlushCommands();
 }
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = system.CoreTiming().GetTicks();
+        block.timestamp = GetTicks();
        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                   sizeof(block));
    } else {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:

    bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);

+    u64 GetTicks() const;
+
    std::unique_lock<std::mutex> LockSync() {
        return std::unique_lock{sync_mutex};
    }
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -86,7 +86,7 @@ struct CommandDataContainer {
 struct SynchState final {
    std::atomic_bool is_running{true};

-    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
    CommandQueue queue;
    u64 last_fence{};
    std::atomic<u64> signaled_fence{};
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -248,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
 }

 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
-    if (accelerate_draw != AccelDraw::Indexed) {
-        return 0;
-    }
    MICROPROFILE_SCOPE(OpenGL_Index);
    const auto& regs = system.GPU().Maxwell3D().regs;
    const std::size_t size = CalculateIndexBufferSize();
@@ -546,7 +543,8 @@ void RasterizerOpenGL::Clear() {
    }
 }

-void RasterizerOpenGL::DrawPrelude() {
+void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+    MICROPROFILE_SCOPE(OpenGL_Drawing);
    auto& gpu = system.GPU().Maxwell3D();

    SyncRasterizeEnable(state);
@@ -567,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() {

    buffer_cache.Acquire();

-    // Draw the vertex batch
-    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-
    std::size_t buffer_size = CalculateVertexArraysSize();

    // Add space for index buffer
@@ -596,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() {
    // Upload vertex and index data.
    SetupVertexBuffer(vao);
    SetupVertexInstances(vao);
-    index_buffer_offset = SetupIndexBuffer();
+
+    GLintptr index_buffer_offset;
+    if (is_indexed) {
+        index_buffer_offset = SetupIndexBuffer();
+    }

    // Prepare packed bindings.
    bind_ubo_pushbuffer.Setup();
@@ -630,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() {
        // As all cached buffers are invalidated, we need to recheck their state.
        gpu.dirty.ResetVertexArrays();
    }
+    gpu.dirty.memory_general = false;

    shader_program_manager->ApplyTo(state);
    state.Apply();
@@ -637,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() {
    if (texture_cache.TextureBarrier()) {
        glTextureBarrier();
    }
+
+    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+    const GLsizei num_instances =
+        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+    if (is_indexed) {
+        const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+        glDrawElementsInstancedBaseVertexBaseInstance(
+            primitive_mode, num_vertices, index_format,
+            reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
+            base_instance);
+    } else {
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+        glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
+                                          base_instance);
+    }
 }

-struct DrawParams {
-    bool is_indexed{};
-    bool is_instanced{};
-    GLenum primitive_mode{};
-    GLint count{};
-    GLint base_vertex{};
-
-    // Indexed settings
-    GLenum index_format{};
-    GLintptr index_buffer_offset{};
-
-    // Instanced setting
-    GLint num_instances{};
-    GLint base_instance{};
-
-    void DispatchDraw() {
-        if (is_indexed) {
-            const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
-            if (is_instanced) {
-                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
-                                                              index_buffer_ptr, num_instances,
-                                                              base_vertex, base_instance);
-            } else {
-                glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
-                                         base_vertex);
-            }
-        } else {
-            if (is_instanced) {
-                glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
-                                                  base_instance);
-            } else {
-                glDrawArrays(primitive_mode, base_vertex, count);
-            }
-        }
-    }
-};
-
 bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto current_instance = maxwell3d.state.current_instance;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(1);
-    draw_call.base_instance = static_cast<GLint>(current_instance);
-    draw_call.is_instanced = current_instance > 0;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
-    } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
-    }
-    draw_call.DispatchDraw();
-
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+    Draw(is_indexed, false);
    return true;
 }

 bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto& draw_setup = maxwell3d.mme_draw;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
-    draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
-    draw_call.is_instanced = draw_setup.instance_count > 1;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
-    } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
-    }
-    draw_call.DispatchDraw();
-
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+    Draw(is_indexed, true);
    return true;
 }

@@ -1293,6 +1220,7 @@ void RasterizerOpenGL::SyncPointState() {
    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
    // in OpenGL).
    state.point.program_control = regs.vp_point_size.enable != 0;
+    state.point.sprite = regs.point_sprite_enable != 0;
    state.point.size = std::max(1.0f, regs.point_size);
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -103,7 +103,7 @@ private:
                           std::size_t size);

    /// Syncs all the state, shaders, render targets and textures setting before a draw call.
-    void DrawPrelude();
+    void Draw(bool is_indexed, bool is_instanced);

    /// Configures the current textures to use for the draw command.
    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
@@ -220,12 +220,7 @@ private:

    GLintptr SetupIndexBuffer();

-    GLintptr index_buffer_offset;
-
    void SetupShaders(GLenum primitive_mode);
-
-    enum class AccelDraw { Disabled, Arrays, Indexed };
-    AccelDraw accelerate_draw = AccelDraw::Disabled;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() {

 void OpenGLState::ApplyPointSize() {
    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
+    Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
    if (UpdateValue(cur_state.point.size, point.size)) {
        glPointSize(point.size);
    }
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -132,6 +132,7 @@ public:

    struct {
        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        bool sprite = false;          // GL_POINT_SPRITE
        GLfloat size = 1.0f;          // GL_POINT_SIZE
    } point;

--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_UNSIGNED_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::SignedInt:
@@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::Float:
@@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
            return GL_FLOAT;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (attrib.size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return GL_UNSIGNED_BYTE;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
-        UNREACHABLE();
+        LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
        return {};
    }
 }
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -0,0 +1,265 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/telemetry.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+#include "core/perf_stats.h"
+#include "core/settings.h"
+#include "core/telemetry_session.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+
+VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
+                       VkDebugUtilsMessageTypeFlagsEXT type,
+                       const VkDebugUtilsMessengerCallbackDataEXT* data,
+                       [[maybe_unused]] void* user_data) {
+    const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
+    const char* message{data->pMessage};
+
+    if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
+        LOG_CRITICAL(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) {
+        LOG_WARNING(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) {
+        LOG_INFO(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) {
+        LOG_DEBUG(Render_Vulkan, "{}", message);
+    }
+    return VK_FALSE;
+}
+
+std::string GetReadableVersion(u32 version) {
+    return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
+                       VK_VERSION_PATCH(version));
+}
+
+std::string GetDriverVersion(const VKDevice& device) {
+    // Extracted from
+    // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
+    const u32 version = device.GetDriverVersion();
+
+    if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+        const u32 major = (version >> 22) & 0x3ff;
+        const u32 minor = (version >> 14) & 0x0ff;
+        const u32 secondary = (version >> 6) & 0x0ff;
+        const u32 tertiary = version & 0x003f;
+        return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
+    }
+    if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) {
+        const u32 major = version >> 14;
+        const u32 minor = version & 0x3fff;
+        return fmt::format("{}.{}", major, minor);
+    }
+
+    return GetReadableVersion(version);
+}
+
+std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
+    std::sort(std::begin(available_extensions), std::end(available_extensions));
+
+    static constexpr std::size_t AverageExtensionSize = 64;
+    std::string separated_extensions;
+    separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
+
+    const auto end = std::end(available_extensions);
+    for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
+        if (const bool is_last = extension + 1 == end; is_last) {
+            separated_extensions += *extension;
+        } else {
+            separated_extensions += fmt::format("{},", *extension);
+        }
+    }
+    return separated_extensions;
+}
+
+} // Anonymous namespace
+
+RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
+    : RendererBase(window), system{system} {}
+
+RendererVulkan::~RendererVulkan() {
+    ShutDown();
+}
+
+void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+    const auto& layout = render_window.GetFramebufferLayout();
+    if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
+        const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+        const bool use_accelerated =
+            rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+        const bool is_srgb = use_accelerated && screen_info.is_srgb;
+        if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
+            swapchain->Create(layout.width, layout.height, is_srgb);
+            blit_screen->Recreate();
+        }
+
+        scheduler->WaitWorker();
+
+        swapchain->AcquireNextImage();
+        const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+
+        scheduler->Flush(false, render_semaphore);
+
+        if (swapchain->Present(render_semaphore, fence)) {
+            blit_screen->Recreate();
+        }
+
+        render_window.SwapBuffers();
+        rasterizer->TickFrame();
+    }
+
+    render_window.PollEvents();
+}
+
+bool RendererVulkan::Init() {
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+    render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
+    const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
+
+    std::optional<vk::DebugUtilsMessengerEXT> callback;
+    if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
+        callback = CreateDebugCallback(dldi);
+        if (!callback) {
+            return false;
+        }
+    }
+
+    if (!PickDevices(dldi)) {
+        if (callback) {
+            instance.destroy(*callback, nullptr, dldi);
+        }
+        return false;
+    }
+    debug_callback = UniqueDebugUtilsMessengerEXT(
+        *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
+                       instance, nullptr, device->GetDispatchLoader()));
+
+    Report();
+
+    memory_manager = std::make_unique<VKMemoryManager>(*device);
+
+    resource_manager = std::make_unique<VKResourceManager>(*device);
+
+    const auto& framebuffer = render_window.GetFramebufferLayout();
+    swapchain = std::make_unique<VKSwapchain>(surface, *device);
+    swapchain->Create(framebuffer.width, framebuffer.height, false);
+
+    scheduler = std::make_unique<VKScheduler>(*device, *resource_manager);
+
+    rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
+                                                    *resource_manager, *memory_manager, *scheduler);
+
+    blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
+                                                 *resource_manager, *memory_manager, *swapchain,
+                                                 *scheduler, screen_info);
+
+    return true;
+}
+
+void RendererVulkan::ShutDown() {
+    if (!device) {
+        return;
+    }
+    const auto dev = device->GetLogical();
+    const auto& dld = device->GetDispatchLoader();
+    if (dev && dld.vkDeviceWaitIdle) {
+        dev.waitIdle(dld);
+    }
+
+    rasterizer.reset();
+    blit_screen.reset();
+    scheduler.reset();
+    swapchain.reset();
+    memory_manager.reset();
+    resource_manager.reset();
+    device.reset();
+}
+
+std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
+    const vk::DispatchLoaderDynamic& dldi) {
+    const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
+        {},
+        vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
+        vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
+        &DebugCallback, nullptr);
+    vk::DebugUtilsMessengerEXT callback;
+    if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
+        vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
+        return {};
+    }
+    return callback;
+}
+
+bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
+    const auto devices = instance.enumeratePhysicalDevices(dldi);
+
+    // TODO(Rodrigo): Choose device from config file
+    const s32 device_index = Settings::values.vulkan_device;
+    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+        LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+        return false;
+    }
+    const vk::PhysicalDevice physical_device = devices[device_index];
+
+    if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
+        return false;
+    }
+
+    device = std::make_unique<VKDevice>(dldi, physical_device, surface);
+    return device->Create(dldi, instance);
+}
+
+void RendererVulkan::Report() const {
+    const std::string vendor_name{device->GetVendorName()};
+    const std::string model_name{device->GetModelName()};
+    const std::string driver_version = GetDriverVersion(*device);
+    const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
+
+    const std::string api_version = GetReadableVersion(device->GetApiVersion());
+
+    const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+
+    LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
+    LOG_INFO(Render_Vulkan, "Device: {}", model_name);
+    LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
+
+    auto& telemetry_session = system.TelemetrySession();
+    constexpr auto field = Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
+    telemetry_session.AddField(field, "GPU_Model", model_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
+    telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -400,8 +400,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
             VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
        Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
             false);
-        Test(extension, nv_device_diagnostic_checkpoints,
-             VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        if (Settings::values.renderer_debug) {
+            Test(extension, nv_device_diagnostic_checkpoints,
+                 VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        }
    }

    if (khr_shader_float16_int8) {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -571,7 +571,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
        }
        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
-            texceptions.set(rt);
+            texceptions[rt] = true;
        }
    }

@@ -579,7 +579,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
    }
    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
-        texceptions.set(ZETA_TEXCEPTION_INDEX);
+        texceptions[ZETA_TEXCEPTION_INDEX] = true;
    }

    texture_cache.GuardRenderTargets(false);
@@ -1122,11 +1122,12 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions)

    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
        const auto& rendertarget = regs.rt[rt];
-        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
+        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
            continue;
+        }
        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
-            texceptions.test(rt)});
+            texceptions[rt]});
    }

    renderpass_params.has_zeta = regs.zeta_enable;
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {

    Node op_a = GetRegister(instr.gpr8);

-    Node op_b = [&]() -> Node {
+    Node op_b = [&] {
        if (instr.is_b_imm) {
            return GetImmediate19(instr);
        } else if (instr.is_b_gpr) {
@@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, value);
        break;
    }
+    case OpCode::Id::FCMP_R: {
+        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
+        Node op_c = GetRegister(instr.gpr39);
+        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
+        SetRegister(
+            bb, instr.gpr0,
+            Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
+        break;
+    }
    case OpCode::Id::RRO_C:
    case OpCode::Id::RRO_R:
    case OpCode::Id::RRO_IMM: {
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
            switch (opcode->get().GetId()) {
            case OpCode::Id::ICMP_CR:
-                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                        GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_R:
                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_RC:
                return {GetRegister(instr.gpr39),
-                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
            case OpCode::Id::ICMP_IMM:
                return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
            default:
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

-    const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
+    const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFI_RC:
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::BFI_IMM_R:
-            return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
+            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
        default:
            UNREACHABLE();
            return {Immediate(0), Immediate(0)};
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -69,13 +69,16 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    case OpCode::Id::MOV_SYS: {
        const Node value = [this, instr] {
            switch (instr.sys20) {
+            case SystemVariable::LaneId:
+                LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete");
+                return Immediate(0U);
            case SystemVariable::InvocationId:
                return Operation(OperationCode::InvocationId);
            case SystemVariable::Ydirection:
                return Operation(OperationCode::YNegate);
            case SystemVariable::InvocationInfo:
                LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
-                return Immediate(0u);
+                return Immediate(0U);
            case SystemVariable::Tid: {
                Node value = Immediate(0);
                value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
@@ -188,7 +191,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
                             static_cast<u32>(cc));

-        if (disable_flow_stack) {
+        if (decompiled) {
            break;
        }

@@ -200,7 +203,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
                             static_cast<u32>(cc));
-        if (disable_flow_stack) {
+        if (decompiled) {
            break;
        }

--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -10,8 +10,80 @@

 namespace VideoCommon::Shader {

+using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::ShfType;
+using Tegra::Shader::ShfXmode;
+
+namespace {
+
+Node IsFull(Node shift) {
+    return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
+}
+
+Node Shift(OperationCode opcode, Node value, Node shift) {
+    Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32));
+    Node shifted = Operation(opcode, move(value), shift);
+    return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
+}
+
+Node ClampShift(Node shift, s32 size = 32) {
+    shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
+    return Operation(OperationCode::IMin, move(shift), Immediate(size));
+}
+
+Node WrapShift(Node shift, s32 size = 32) {
+    return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
+}
+
+Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
+    Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
+    }
+
+    // And these when it's larger than or 32
+    const bool is_signed = type == ShfType::S64;
+    const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(opcode, high, move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+    // These values are used when the shift value is less than 32
+    Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
+    Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
+    Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
+
+    if (type == ShfType::Bits32) {
+        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+        return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
+    }
+
+    // And these when it's larger than or 32
+    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+    Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
+
+    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+} // Anonymous namespace

 u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
@@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
        }
    }();

-    switch (opcode->get().GetId()) {
+    switch (const auto opid = opcode->get().GetId(); opid) {
    case OpCode::Id::SHR_C:
    case OpCode::Id::SHR_R:
    case OpCode::Id::SHR_IMM: {
-        if (instr.shr.wrap) {
-            op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
-        } else {
-            op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
-            op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
-        }
+        op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));

        Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
-                                     std::move(op_a), std::move(op_b));
+                                     move(op_a), move(op_b));
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, std::move(value));
+        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    case OpCode::Id::SHL_C:
    case OpCode::Id::SHL_R:
    case OpCode::Id::SHL_IMM: {
-        const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
+        Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
-        SetRegister(bb, instr.gpr0, value);
+        SetRegister(bb, instr.gpr0, move(value));
+        break;
+    }
+    case OpCode::Id::SHF_RIGHT_R:
+    case OpCode::Id::SHF_RIGHT_IMM:
+    case OpCode::Id::SHF_LEFT_R:
+    case OpCode::Id::SHF_LEFT_IMM: {
+        UNIMPLEMENTED_IF(instr.generates_cc);
+        UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
+                             static_cast<int>(instr.shf.xmode.Value()));
+
+        if (instr.is_b_imm) {
+            op_b = Immediate(static_cast<u32>(instr.shf.immediate));
+        }
+        const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
+        Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
+
+        Node negated_shift = Operation(OperationCode::INegate, shift);
+        Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
+
+        const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
+        Node value = (is_right ? ShiftRight : ShiftLeft)(
+            move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
+
+        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    default:
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,19 +3,32 @@
 // Refer to the license.txt file included.

 #include <memory>
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/gpu_asynch.h"
 #include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#ifdef HAS_VULKAN
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#endif
 #include "video_core/video_core.h"

 namespace VideoCore {

 std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
                                             Core::System& system) {
-    return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+    switch (Settings::values.renderer_backend) {
+    case Settings::RendererBackend::OpenGL:
+        return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
+#ifdef HAS_VULKAN
+    case Settings::RendererBackend::Vulkan:
+        return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+#endif
+    default:
+        return nullptr;
+    }
 }

 std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
bunnei	0f70f68fb3	Revert "video_core: memory_manager: Use GPU interface for cache functions."	2020-02-15 17:47:15 -05:00
bunnei	f552d553ba	Merge pull request #3401 from FernandoS27/synchronization Set of refactors for Kernel Synchronization and Hardware Constants	2020-02-14 14:40:20 -05:00
bunnei	63a59b9935	Merge pull request #3379 from ReinUsesLisp/cbuf-offset shader/decode: Fix constant buffer offsets	2020-02-14 13:22:53 -05:00
Zach Hilman	4501bd8ca9	Merge pull request #3398 from brianclinkenbeard/fix-cmake-sdl2 Use config mode for finding SDL2 with CMake	2020-02-14 09:11:47 -05:00
Fernando Sahmkow	829d8c0d6b	Core: Correct compilition in GCC	2020-02-14 05:53:30 -04:00
bunnei	74feed372c	Merge pull request #3400 from makigumo/patch-1 update hwopus DecodeInterleaved for FW 7.0.0+	2020-02-13 21:26:13 -05:00
bunnei	3563af2364	Merge pull request #3395 from FernandoS27/queries GPU: Refactor queries implementation and correct GPU Clock.	2020-02-13 20:18:26 -05:00
Fernando Sahmkow	2bc949628d	Core: Address Feedback	2020-02-13 19:10:33 -04:00
Fernando Sahmkow	d6ed31b9fa	GPU: Address Feedback.	2020-02-13 18:16:07 -04:00
bunnei	8b9a56033a	Merge pull request #3405 from lioncash/thread address_arbiter: Minor cleanup to list querying	2020-02-12 21:46:00 -05:00
Brian Clinkenbeard	0d85b6bfe1	Merge branch 'master' into fix-cmake-sdl2	2020-02-12 16:07:07 -08:00
Lioncash	be269e21a5	address_arbiter: Collapse loops in InsertThread() and RemoveThread() Same behavior, but without the need to explicitly loop through everything manually.	2020-02-12 15:34:07 -05:00
Lioncash	9f2c703137	address_arbiter: Simplify GetThreadsWaitingOnAddress() Simplifies the overall function and also allows for it to become a const-qualified member function.	2020-02-12 15:10:16 -05:00
bunnei	8f8dda2d5b	Merge pull request #3403 from lioncash/debug bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear()	2020-02-12 11:17:43 -05:00
bunnei	2506f7b3a1	Merge pull request #3402 from lioncash/sys-global kernel/thread: Remove trivial usages of the global system accessor	2020-02-12 10:10:00 -05:00
Lioncash	f00a54f508	bcat/backend: Make formatting of passphrase consistent in NullBackend::SetPassphrase() Aligns the '=' to be consistent with the rest of the logs within this source file.	2020-02-12 01:18:29 -05:00
Lioncash	eefd97e80d	bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear() A formatting specifier within Clear wasn't being used, which will cause fmt to throw an exception. This fixes that.	2020-02-12 01:14:47 -05:00
Lioncash	b80c348b09	kernel/thread: Remove trivial usages of the global system accessor We can just use the kernel member variable directly instead of going through the system to obtain the same thing.	2020-02-12 01:00:41 -05:00
Fernando Sahmkow	1e6f8aba04	Core: Set all hardware emulation constants in a single file.	2020-02-11 20:19:11 -04:00
Fernando Sahmkow	d23d504d77	Kernel: Refactor synchronization to better match RE	2020-02-11 18:47:31 -04:00
makigumo	926ea5a16d	update hwopus DecodeInterleaved for FW 7.0.0+ trivial change, see https://switchbrew.org/wiki/Audio_services#IHardwareOpusDecoder	2020-02-11 18:41:04 +01:00
Fernando Sahmkow	c5aefe42aa	Kernel: Change WaitObject to Synchronization object. In order to better reflect RE.	2020-02-11 10:46:25 -04:00
bunnei	37f1cf8cbd	Merge pull request #3376 from ReinUsesLisp/point-sprite gl_rasterizer: Implement GL_POINT_SPRITE	2020-02-11 08:26:07 -05:00
Brian Clinkenbeard	68043dd233	use config mode for finding SDL2 with CMake	2020-02-10 19:56:33 -08:00
Fernando Sahmkow	8e9a4944db	GPU: Implement GPU Clock correctly.	2020-02-10 10:44:54 -04:00
Fernando Sahmkow	0cb3bcfbb7	Maxwell3D: Correct query reporting.	2020-02-10 10:41:43 -04:00
bunnei	84ea9c2b42	Merge pull request #3372 from ReinUsesLisp/fix-back-stencil maxwell_3d: Fix stencil back mask	2020-02-09 22:29:28 -05:00
Zach Hilman	21c3f48279	Merge pull request #3391 from Morph1984/remove-unknown Remove option "Show files with type 'Unknown'"	2020-02-09 12:08:01 -05:00
Morph	fcf3425b1b	Remove option "Show files with type 'Unknown'"	2020-02-09 11:30:02 -05:00
bunnei	a952fbc5b3	Merge pull request #3388 from bunnei/service-shared-ptr hle: services: Use std::shared_ptr instead of copy by value. - This is a prerequisite to adding a mutex to `ServiceFramework`, which cannot be copied. - This will be used for threaded services.	2020-02-08 21:35:30 -05:00
bunnei	e210835dd0	Merge pull request #3387 from bunnei/gpu-mpscqueue gpu_thread: Use MPSCQueue for GPU commands.	2020-02-08 21:15:48 -05:00
bunnei	6536cc9741	Merge pull request #3386 from bunnei/gpu-mem-interface video_core: memory_manager: Use GPU interface for cache functions.	2020-02-08 21:15:27 -05:00
bunnei	7b07e521ca	hle: services: Use std::shared_ptr instead of copy by value.	2020-02-07 23:02:26 -05:00
bunnei	b5c13ee0eb	gpu_thread: Use MPSCQueue for GPU commands. - Necessary for multiple service threads.	2020-02-07 23:01:23 -05:00
bunnei	7cacb08cdf	video_core: memory_manager: Use GPU interface for cache functions.	2020-02-07 22:59:35 -05:00
bunnei	90bda66028	Merge pull request #3378 from ReinUsesLisp/uscaled maxwell_to_gl: Implement R8G8_USCALED	2020-02-07 22:55:52 -05:00
bunnei	90df4b8e2b	Merge pull request #3369 from ReinUsesLisp/shf shader/shift: Implement SHF	2020-02-07 22:06:57 -05:00
bunnei	aa3f9b9606	Merge pull request #3381 from bunnei/ipc-fix hle: services: Fix prepo IPC, and add better error checking.	2020-02-07 16:25:42 -05:00
bunnei	09d766d357	Merge pull request #3362 from ReinUsesLisp/fix-instanced gl_rasterizer: Fix instanced draw arrays	2020-02-06 21:39:59 -05:00
bunnei	1b01c3036d	Merge pull request #3366 from bunnei/swkbd-fixes applets: Fixes for software keyboard and transfer memory.	2020-02-05 23:26:32 -05:00
bunnei	ba53543da6	kernel: transfer_memory: Properly reserve and reset memory region.	2020-02-05 23:06:54 -05:00
Zach Hilman	7a547b9342	wait_object: Make wait behavior only require one object to signal. - This was holdover from citra.	2020-02-05 23:06:53 -05:00
bunnei	3a0c1e79f8	am: Correct IPC object count mismatch.	2020-02-05 23:06:53 -05:00
bunnei	77da74e17a	services: am: Clear events on PopOutData and PopInteractiveOutData.	2020-02-05 23:06:52 -05:00
bunnei	84e895cdd6	am: Refactor IStorage interface.	2020-02-05 23:06:52 -05:00
bunnei	3557fa25d0	applets: software_keyboard: Signal state change on end of interactive session.	2020-02-05 23:06:51 -05:00
bunnei	be5fcffb89	applets: software_keyboard: Minor cleanup.	2020-02-05 23:06:50 -05:00
bunnei	2245c24e21	services: prepo: Fix IPC interface with SaveReport/SaveReportWithUser.	2020-02-05 22:52:35 -05:00
bunnei	9751ccc5e0	hle_ipc: Add error checking to read/write buffer access.	2020-02-05 22:52:35 -05:00
ReinUsesLisp	bf9a822b87	shader/decode: Fix constant buffer offsets Some instances were using cbuf34.offset instead of cbuf34.GetOffset(). This returned the an invalid offset. Address those instances and rename offset to "shifted_offset" to avoid future bugs.	2020-02-05 12:19:09 -03:00
ReinUsesLisp	8bb9eef97b	maxwell_to_gl: Implement R8G8_USCALED	2020-02-04 21:32:36 -03:00
ReinUsesLisp	c81c361e82	maxwell_to_gl: Reduce unimplemented formats to LOG_ERROR	2020-02-04 21:32:08 -03:00
bunnei	a0b4be4262	Merge pull request #3377 from ReinUsesLisp/bitset-vk vk_rasterizer: Use noexcept methods of std::bitset	2020-02-04 16:56:57 -05:00
ReinUsesLisp	0eb36c90f4	vk_rasterizer: Use noexcept variants of std::bitset Removes bounds checking from "texceptions" instances.	2020-02-04 18:04:24 -03:00
bunnei	08c508b1c4	Merge pull request #3357 from ReinUsesLisp/bfi-rc shader/bfi: Implement register-constant buffer variant	2020-02-04 15:14:13 -05:00
ReinUsesLisp	7da52673d0	gl_rasterizer: Implement GL_POINT_SPRITE OpenGL core defaults to GL_POINT_SPRITE, meanwhile on OpenGL compatibility we have to explicitly enable it. This fixes gl_PointCoord's behaviour.	2020-02-04 15:19:45 -03:00
bunnei	bf21aacc74	Merge pull request #3356 from ReinUsesLisp/fcmp shader/arithmetic: Implement FCMP	2020-02-04 11:36:59 -05:00
bunnei	5733287822	Merge pull request #3360 from CJBok/statusbar-buttons GUI: Togglable graphics settings buttons in status bar	2020-02-03 16:57:18 -05:00
bunnei	c31ec00d67	Merge pull request #3337 from ReinUsesLisp/vulkan-staged yuzu: Implement Vulkan frontend	2020-02-03 16:56:25 -05:00
bunnei	2cd51fc9fd	Merge pull request #3374 from lioncash/udp input_common/udp: Minor changes	2020-02-03 11:41:04 -05:00
Lioncash	c7678c3044	input_common/udp: Ensure that UDP is shut down within Shutdown() Previously the UDP backend would never actually get shut down.	2020-02-03 09:29:15 -05:00
Lioncash	83f8090273	input_common/udp: Add missing override specifiers Prevents trivial warnings and ensures interfaces are properly maintained between the base class.	2020-02-03 09:26:53 -05:00
Lioncash	5c61e0ba39	input_common/udp: std::move SocketCallback instances where applicable std::function is allowed to heap allocate if the size of the captures associated with each lambda exceed a certain threshold. This prevents potentially unnecessary reallocations from occurring.	2020-02-03 09:24:05 -05:00
Lioncash	fb9c9ddcc9	input_common/udp: std::move shared_ptr within Client constructor Gets rid of a trivially avoidable atomic reference count increment and decrement.	2020-02-03 09:21:46 -05:00
Lioncash	9bb6ab77f4	udp/client: Replace deprecated from_string() call with make_address_v4() Future-proofs code if boost is ever updated.	2020-02-03 09:20:40 -05:00
Lioncash	881408445a	input_common/udp: Silence -Wreorder warning for Socket Amends the constructor initializer list to specify the order of its elements in the same order that initialization would occur.	2020-02-03 09:15:32 -05:00
Lioncash	36524465a6	input_common/udp: Remove unnecessary inclusions	2020-02-03 09:13:40 -05:00
Lioncash	4aa9c9632d	input_common/udp: Add missing header guard	2020-02-03 09:09:06 -05:00
bunnei	157eb375a5	Merge pull request #3370 from ReinUsesLisp/node-shared-ptr shader: Remove curly braces initializers on shared pointers	2020-02-03 00:25:56 -05:00
ReinUsesLisp	4eed744277	maxwell_3d: Fix stencil back mask	2020-02-02 17:50:46 -03:00
ReinUsesLisp	729ca120e3	shader/shift: Implement SHIFT_RIGHT_{IMM,R} Shifts a pair of registers to the right and returns the low register.	2020-02-01 21:20:02 -03:00
ReinUsesLisp	017474c3f8	shader/shift: Implement SHF_LEFT_{IMM,R} Shifts a pair of registers to the left and returns the high register.	2020-02-01 21:19:44 -03:00
ReinUsesLisp	b69321650e	gl_rasterizer: Fix instanced draw arrays glDrawArrays was being used when the draw had a base instance specified. This commit removes the draw parameters abstraction and fixes the mentioned issue.	2020-01-30 02:22:00 -03:00
ReinUsesLisp	d027850f33	ci: Disable Vulkan for Windows MinGW builds	2020-01-29 19:44:00 -03:00
ReinUsesLisp	a7beabb68f	yuzu/bootmanager: Define Vulkan widget only when enabled	2020-01-29 19:20:12 -03:00
ReinUsesLisp	252415a163	ci: Disable Vulkan for Linux builds	2020-01-29 18:06:16 -03:00
ReinUsesLisp	c29584a090	yuzu_cmd: Fix memcpy on Vulkan handlers	2020-01-29 17:53:11 -03:00
ReinUsesLisp	f92cbc5501	yuzu: Implement Vulkan frontend Adds a Qt and SDL2 frontend for Vulkan. It also finishes the missing bits on Vulkan initialization.	2020-01-29 17:53:11 -03:00
ReinUsesLisp	8299f1ceef	web_service/telemetry_json: Report USER_CONFIG	2020-01-29 17:53:11 -03:00
ReinUsesLisp	788d57d723	settings: Add settings for graphics backend	2020-01-29 17:53:11 -03:00
ReinUsesLisp	e651e54b85	core: Only wait for idle on gpu_core when it was initialized This fixes crashes when a Vulkan device fails to initialize.	2020-01-29 17:53:11 -03:00
ReinUsesLisp	9f0162e4b5	shader/other: Fix skips for SYNC and BRK	2020-01-29 17:53:11 -03:00
ReinUsesLisp	270177f38a	shader/other: Stub S2R LaneId	2020-01-29 17:53:11 -03:00
ReinUsesLisp	b35449c85d	buffer_cache: Delay buffer destructions Delay buffer destruction some extra frames to avoid destroying buffers that are still being used from older frames. This happens on Nvidia's driver with mailbox.	2020-01-29 17:53:11 -03:00
CJBok	8d6b4e836c	clang	2020-01-29 05:43:55 +01:00
CJBok	6e87111f91	minor corrections	2020-01-29 00:02:28 +01:00
CJBok	4bc4fdf5ff	GUI: Togglable graphics settings buttons in status bar	2020-01-28 23:59:30 +01:00
ReinUsesLisp	137a8aa55c	shader/bfi: Implement register-constant buffer variant It's the same as the variant that was implemented, but it takes the operands from another source.	2020-01-27 01:20:38 -03:00
ReinUsesLisp	e3fc3459c8	shader/arithmetic: Implement FCMP Compares the third operand with zero, then selects between the first and second.	2020-01-27 01:15:44 -03:00