nit: typo

shader: FMUL: add static to post factor LUT
clang-format
2020-02-23 12:34:06 +07:00 · 2020-02-23 12:33:25 +07:00 · 2020-02-22 09:56:20 +07:00 · 2020-02-22 09:43:07 +07:00 · 2020-02-22 09:41:33 +07:00 · 2020-02-21 23:38:10 +07:00
110 changed files with 3633 additions and 1483 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -151,15 +151,22 @@ if (ENABLE_SDL2)
        set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers")
        set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library")
        set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll")
-    else()
-        find_package(SDL2 REQUIRED)
-    endif()

-    if (SDL2_FOUND)
-        # TODO(yuriks): Make FindSDL2.cmake export an IMPORTED library instead
        add_library(SDL2 INTERFACE)
        target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}")
        target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}")
+    else()
+        find_package(SDL2 REQUIRED)
+
+        # Some installations don't set SDL2_LIBRARIES
+        if("${SDL2_LIBRARIES}" STREQUAL "")
+            message(WARNING "SDL2_LIBRARIES wasn't set, manually setting to SDL2::SDL2")
+            set(SDL2_LIBRARIES "SDL2::SDL2")
+        endif()
+
+        include_directories(${SDL2_INCLUDE_DIRS})
+        add_library(SDL2 INTERFACE)
+        target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}")
    endif()
 else()
    set(SDL2_FOUND NO)
--- a/externals/cmake-modules/FindSDL2.cmake
+++ b/externals/cmake-modules/FindSDL2.cmake
@@ -1,239 +0,0 @@
-
-# This module defines
-# SDL2_LIBRARY, the name of the library to link against
-# SDL2_FOUND, if false, do not try to link to SDL2
-# SDL2_INCLUDE_DIR, where to find SDL.h
-# SDL2_DLL_DIR, where to find SDL2.dll if it exists
-#
-# This module responds to the the flag:
-# SDL2_BUILDING_LIBRARY
-# If this is defined, then no SDL2main will be linked in because
-# only applications need main().
-# Otherwise, it is assumed you are building an application and this
-# module will attempt to locate and set the the proper link flags
-# as part of the returned SDL2_LIBRARY variable.
-#
-# Don't forget to include SDLmain.h and SDLmain.m your project for the
-# OS X framework based version. (Other versions link to -lSDL2main which
-# this module will try to find on your behalf.) Also for OS X, this
-# module will automatically add the -framework Cocoa on your behalf.
-#
-#
-# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your configuration
-# and no SDL2_LIBRARY, it means CMake did not find your SDL2 library
-# (SDL2.dll, libsdl2.so, SDL2.framework, etc).
-# Set SDL2_LIBRARY_TEMP to point to your SDL2 library, and configure again.
-# Similarly, if you see an empty SDL2MAIN_LIBRARY, you should set this value
-# as appropriate. These values are used to generate the final SDL2_LIBRARY
-# variable, but when these values are unset, SDL2_LIBRARY does not get created.
-#
-#
-# $SDL2DIR is an environment variable that would
-# correspond to the ./configure --prefix=$SDL2DIR
-# used in building SDL2.
-# l.e.galup  9-20-02
-#
-# Modified by Eric Wing.
-# Added code to assist with automated building by using environmental variables
-# and providing a more controlled/consistent search behavior.
-# Added new modifications to recognize OS X frameworks and
-# additional Unix paths (FreeBSD, etc).
-# Also corrected the header search path to follow "proper" SDL guidelines.
-# Added a search for SDL2main which is needed by some platforms.
-# Added a search for threads which is needed by some platforms.
-# Added needed compile switches for MinGW.
-#
-# On OSX, this will prefer the Framework version (if found) over others.
-# People will have to manually change the cache values of
-# SDL2_LIBRARY to override this selection or set the CMake environment
-# CMAKE_INCLUDE_PATH to modify the search paths.
-#
-# Note that the header path has changed from SDL2/SDL.h to just SDL.h
-# This needed to change because "proper" SDL convention
-# is #include "SDL.h", not <SDL2/SDL.h>. This is done for portability
-# reasons because not all systems place things in SDL2/ (see FreeBSD).
-
-#=============================================================================
-# Copyright 2003-2009 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License").
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# CMake - Cross Platform Makefile Generator
-# Copyright 2000-2016 Kitware, Inc.
-# Copyright 2000-2011 Insight Software Consortium
-# All rights reserved.
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# 
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# 
-# * Neither the names of Kitware, Inc., the Insight Software Consortium,
-#   nor the names of their contributors may be used to endorse or promote
-#   products derived from this software without specific prior written
-#   permission.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# 
-# ------------------------------------------------------------------------------
-# 
-# The above copyright and license notice applies to distributions of
-# CMake in source and binary form.  Some source files contain additional
-# notices of original copyright by their contributors; see each source
-# for details.  Third-party software packages supplied with CMake under
-# compatible licenses provide their own copyright notices documented in
-# corresponding subdirectories.
-# 
-# ------------------------------------------------------------------------------
-# 
-# CMake was initially developed by Kitware with the following sponsorship:
-# 
-#  * National Library of Medicine at the National Institutes of Health
-#    as part of the Insight Segmentation and Registration Toolkit (ITK).
-# 
-#  * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel
-#    Visualization Initiative.
-# 
-#  * National Alliance for Medical Image Computing (NAMIC) is funded by the
-#    National Institutes of Health through the NIH Roadmap for Medical Research,
-#    Grant U54 EB005149.
-# 
-#  * Kitware, Inc.
-#
-
-message("<FindSDL2.cmake>")
-
-SET(SDL2_SEARCH_PATHS
-    ~/Library/Frameworks
-    /Library/Frameworks
-    /usr/local
-    /usr
-    /sw # Fink
-    /opt/local # DarwinPorts
-    /opt/csw # Blastwave
-    /opt
-    ${SDL2_PATH}
-)
-
-if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(VC_LIB_PATH_SUFFIX lib/x64)
-else()
-    set(VC_LIB_PATH_SUFFIX lib/x86)
-endif()
-
-FIND_LIBRARY(SDL2_LIBRARY_TEMP
-    NAMES SDL2
-    HINTS
-    $ENV{SDL2DIR}
-    PATH_SUFFIXES lib64 lib ${VC_LIB_PATH_SUFFIX}
-    PATHS ${SDL2_SEARCH_PATHS}
-)
-
-IF(SDL2_LIBRARY_TEMP)
-    if(MSVC)
-        get_filename_component(SDL2_DLL_DIR_TEMP ${SDL2_LIBRARY_TEMP} DIRECTORY)
-        if(EXISTS ${SDL2_DLL_DIR_TEMP}/SDL2.dll)
-            set(SDL2_DLL_DIR ${SDL2_DLL_DIR_TEMP})
-            unset(SDL2_DLL_DIR_TEMP)
-        endif()
-    endif()
-
-    FIND_PATH(SDL2_INCLUDE_DIR SDL.h
-        HINTS
-        $ENV{SDL2DIR}
-        PATH_SUFFIXES include/SDL2 include
-        PATHS ${SDL2_SEARCH_PATHS}
-    )
-
-    IF(NOT SDL2_BUILDING_LIBRARY)
-        IF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
-            # Non-OS X framework versions expect you to also dynamically link to
-            # SDL2main. This is mainly for Windows and OS X. Other (Unix) platforms
-            # seem to provide SDL2main for compatibility even though they don't
-            # necessarily need it.
-            FIND_LIBRARY(SDL2MAIN_LIBRARY
-                NAMES SDL2main
-                HINTS
-                $ENV{SDL2DIR}
-                PATH_SUFFIXES lib64 lib
-                PATHS ${SDL2_SEARCH_PATHS}
-            )
-        ENDIF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework")
-    ENDIF(NOT SDL2_BUILDING_LIBRARY)
-
-    # SDL2 may require threads on your system.
-    # The Apple build may not need an explicit flag because one of the
-    # frameworks may already provide it.
-    # But for non-OSX systems, I will use the CMake Threads package.
-    IF(NOT APPLE)
-        FIND_PACKAGE(Threads)
-    ENDIF(NOT APPLE)
-
-    # MinGW needs an additional library, mwindows
-    # It's total link flags should look like -lmingw32 -lSDL2main -lSDL2 -lmwindows
-    # (Actually on second look, I think it only needs one of the m* libraries.)
-    IF(MINGW)
-        SET(MINGW32_LIBRARY mingw32 CACHE STRING "mwindows for MinGW")
-    ENDIF(MINGW)
-
-    # For SDL2main
-    IF(NOT SDL2_BUILDING_LIBRARY)
-        IF(SDL2MAIN_LIBRARY)
-            SET(SDL2_LIBRARY_TEMP ${SDL2MAIN_LIBRARY} ${SDL2_LIBRARY_TEMP})
-        ENDIF(SDL2MAIN_LIBRARY)
-    ENDIF(NOT SDL2_BUILDING_LIBRARY)
-
-    # For OS X, SDL2 uses Cocoa as a backend so it must link to Cocoa.
-    # CMake doesn't display the -framework Cocoa string in the UI even
-    # though it actually is there if I modify a pre-used variable.
-    # I think it has something to do with the CACHE STRING.
-    # So I use a temporary variable until the end so I can set the
-    # "real" variable in one-shot.
-    IF(APPLE)
-        SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa")
-    ENDIF(APPLE)
-
-    # For threads, as mentioned Apple doesn't need this.
-    # In fact, there seems to be a problem if I used the Threads package
-    # and try using this line, so I'm just skipping it entirely for OS X.
-    IF(NOT APPLE)
-        SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT})
-    ENDIF(NOT APPLE)
-
-    # For MinGW library
-    IF(MINGW)
-        SET(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP})
-    ENDIF(MINGW)
-
-    # Set the final string here so the GUI reflects the final state.
-    SET(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL2 Library can be found")
-
-    # Unset the temp variable to INTERNAL so it is not seen in the CMake GUI
-    UNSET(SDL2_LIBRARY_TEMP)
-ENDIF(SDL2_LIBRARY_TEMP)
-
-message("</FindSDL2.cmake>")
-
-INCLUDE(FindPackageHandleStandardArgs)
-
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL2 REQUIRED_VARS SDL2_LIBRARY SDL2_INCLUDE_DIR)
--- a/externals/httplib/README.md
+++ b/externals/httplib/README.md
@@ -1,4 +1,4 @@
-From https://github.com/yhirose/cpp-httplib/commit/d9479bc0b12e8a1e8bce2d34da4feeef488581f3
+From https://github.com/yhirose/cpp-httplib/tree/fce8e6fefdab4ad48bc5b25c98e5ebfda4f3cf53

 MIT License

--- a/externals/httplib/httplib.h
+++ b/externals/httplib/httplib.h
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -181,14 +181,16 @@ add_library(core STATIC
    hle/kernel/svc.cpp
    hle/kernel/svc.h
    hle/kernel/svc_wrap.h
+    hle/kernel/synchronization_object.cpp
+    hle/kernel/synchronization_object.h
+    hle/kernel/synchronization.cpp
+    hle/kernel/synchronization.h
    hle/kernel/thread.cpp
    hle/kernel/thread.h
    hle/kernel/transfer_memory.cpp
    hle/kernel/transfer_memory.h
    hle/kernel/vm_manager.cpp
    hle/kernel/vm_manager.h
-    hle/kernel/wait_object.cpp
-    hle/kernel/wait_object.h
    hle/kernel/writable_event.cpp
    hle/kernel/writable_event.h
    hle/lock.cpp
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -14,6 +14,7 @@
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/svc.h"
@@ -153,7 +154,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = Timing::CNTFREQ;
+    config.cntfrq_el0 = Hardware::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -12,6 +12,7 @@
 #include "common/assert.h"
 #include "common/thread.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"

 namespace Core::Timing {

@@ -215,7 +216,7 @@ void CoreTiming::Idle() {
 }

 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
+    return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE};
 }

 s64 CoreTiming::GetDowncount() const {
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -11,7 +11,7 @@

 namespace Core::Timing {

-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;
+constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;

 s64 msToCycles(std::chrono::milliseconds ms) {
    if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
@@ -20,9 +20,9 @@ s64 msToCycles(std::chrono::milliseconds ms) {
    }
    if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (ms.count() / 1000);
+        return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
    }
-    return (BASE_CLOCK_RATE * ms.count()) / 1000;
+    return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
 }

 s64 usToCycles(std::chrono::microseconds us) {
@@ -32,9 +32,9 @@ s64 usToCycles(std::chrono::microseconds us) {
    }
    if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (us.count() / 1000000);
+        return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
    }
-    return (BASE_CLOCK_RATE * us.count()) / 1000000;
+    return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
 }

 s64 nsToCycles(std::chrono::nanoseconds ns) {
@@ -44,14 +44,14 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
    }
    if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return BASE_CLOCK_RATE * (ns.count() / 1000000000);
+        return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
    }
-    return (BASE_CLOCK_RATE * ns.count()) / 1000000000;
+    return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
 }

 u64 CpuCyclesToClockCycles(u64 ticks) {
-    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
-    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+    const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
 }

 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,28 +6,24 @@

 #include <chrono>
 #include "common/common_types.h"
+#include "core/hardware_properties.h"

 namespace Core::Timing {

-// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
-// The exact value used is of course unverified.
-constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
-constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
-
 s64 msToCycles(std::chrono::milliseconds ms);
 s64 usToCycles(std::chrono::microseconds us);
 s64 nsToCycles(std::chrono::nanoseconds ns);

 inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    return std::chrono::milliseconds(cycles * 1000 / BASE_CLOCK_RATE);
+    return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
 }

 inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    return std::chrono::nanoseconds(cycles * 1000000000 / BASE_CLOCK_RATE);
+    return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
 }

 inline std::chrono::microseconds CyclesToUs(s64 cycles) {
-    return std::chrono::microseconds(cycles * 1000000 / BASE_CLOCK_RATE);
+    return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
 }

 u64 CpuCyclesToClockCycles(u64 ticks);
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include "core/hardware_properties.h"

 namespace Core {

@@ -39,9 +40,7 @@ public:
    void RunLoop(bool tight_loop);

 private:
-    static constexpr std::size_t NUM_CPU_CORES = 4;
-
-    std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers;
+    std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers;
    std::size_t active_core{}; ///< Active core, only used in single thread mode

    System& system;
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -27,9 +27,9 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height) {
    // so just calculate them both even if the other isn't showing.
    FramebufferLayout res{width, height};

-    const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
-                                       ScreenUndocked::Width};
-    const auto window_aspect_ratio = static_cast<float>(height) / width;
+    const float window_aspect_ratio = static_cast<float>(height) / width;
+    const float emulation_aspect_ratio = EmulationAspectRatio(
+        static_cast<AspectRatio>(Settings::values.aspect_ratio), window_aspect_ratio);

    const Common::Rectangle<u32> screen_window_area{0, 0, width, height};
    Common::Rectangle<u32> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
@@ -58,4 +58,19 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
    return DefaultFrameLayout(width, height);
 }

+float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio) {
+    switch (aspect) {
+    case AspectRatio::Default:
+        return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
+    case AspectRatio::R4_3:
+        return 3.0f / 4.0f;
+    case AspectRatio::R21_9:
+        return 9.0f / 21.0f;
+    case AspectRatio::StretchToWindow:
+        return window_aspect_ratio;
+    default:
+        return static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width;
+    }
+}
+
 } // namespace Layout
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -18,6 +18,13 @@ enum ScreenDocked : u32 {
    HeightDocked = 1080,
 };

+enum class AspectRatio {
+    Default,
+    R4_3,
+    R21_9,
+    StretchToWindow,
+};
+
 /// Describes the layout of the window framebuffer
 struct FramebufferLayout {
    u32 width{ScreenUndocked::Width};
@@ -48,4 +55,12 @@ FramebufferLayout DefaultFrameLayout(u32 width, u32 height);
 */
 FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale);

+/**
+ * Convenience method to determine emulation aspect ratio
+ * @param aspect Represents the index of aspect ratio stored in Settings::values.aspect_ratio
+ * @param window_aspect_ratio Current window aspect ratio
+ * @return Emulation render window aspect ratio
+ */
+float EmulationAspectRatio(AspectRatio aspect, float window_aspect_ratio);
+
 } // namespace Layout
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -0,0 +1,45 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+
+#include "common/common_types.h"
+
+namespace Core {
+
+namespace Hardware {
+
+// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
+// The exact value used is of course unverified.
+constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Switch's hardware clock speed
+constexpr u32 NUM_CPU_CORES = 4;            // Number of CPU Cores
+
+} // namespace Hardware
+
+struct EmuThreadHandle {
+    u32 host_handle;
+    u32 guest_handle;
+
+    u64 GetRaw() const {
+        return (static_cast<u64>(host_handle) << 32) | guest_handle;
+    }
+
+    bool operator==(const EmuThreadHandle& rhs) const {
+        return std::tie(host_handle, guest_handle) == std::tie(rhs.host_handle, rhs.guest_handle);
+    }
+
+    bool operator!=(const EmuThreadHandle& rhs) const {
+        return !operator==(rhs);
+    }
+
+    static constexpr EmuThreadHandle InvalidHandle() {
+        constexpr u32 invalid_handle = 0xFFFFFFFF;
+        return {invalid_handle, invalid_handle};
+    }
+};
+
+} // namespace Core
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
 void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) {
    const VAddr arb_addr = thread->GetArbiterWaitAddress();
    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread->GetPriority() >= thread->GetPriority()) {
-            thread_list.insert(it, thread);
-            return;
-        }
-        ++it;
+
+    const auto iter =
+        std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) {
+            return entry->GetPriority() >= thread->GetPriority();
+        });
+
+    if (iter == thread_list.cend()) {
+        thread_list.push_back(std::move(thread));
+    } else {
+        thread_list.insert(iter, std::move(thread));
    }
-    thread_list.push_back(std::move(thread));
 }

 void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
    const VAddr arb_addr = thread->GetArbiterWaitAddress();
    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        const std::shared_ptr<Thread>& current_thread = *it;
-        if (current_thread.get() == thread.get()) {
-            thread_list.erase(it);
-            return;
-        }
-        ++it;
-    }
-    UNREACHABLE();
+
+    const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
+                                   [&thread](const auto& entry) { return thread == entry; });
+
+    ASSERT(iter != thread_list.cend());
+
+    thread_list.erase(iter);
 }

-std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) {
-    std::vector<std::shared_ptr<Thread>> result;
-    std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address];
-    auto it = thread_list.begin();
-    while (it != thread_list.end()) {
-        std::shared_ptr<Thread> current_thread = *it;
-        result.push_back(std::move(current_thread));
-        ++it;
+std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
+    VAddr address) const {
+    const auto iter = arb_threads.find(address);
+    if (iter == arb_threads.cend()) {
+        return {};
    }
-    return result;
+
+    const std::list<std::shared_ptr<Thread>>& thread_list = iter->second;
+    return {thread_list.cbegin(), thread_list.cend()};
 }
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -86,7 +86,7 @@ private:
    void RemoveThread(std::shared_ptr<Thread> thread);

    // Gets the threads waiting on an address.
-    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address);
+    std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;

    /// List of threads waiting for a address arbiter
    std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads;
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -12,7 +12,7 @@

 namespace Kernel {

-ClientSession::ClientSession(KernelCore& kernel) : WaitObject{kernel} {}
+ClientSession::ClientSession(KernelCore& kernel) : SynchronizationObject{kernel} {}

 ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
@@ -31,6 +31,11 @@ void ClientSession::Acquire(Thread* thread) {
    UNIMPLEMENTED();
 }

+bool ClientSession::IsSignaled() const {
+    UNIMPLEMENTED();
+    return true;
+}
+
 ResultVal<std::shared_ptr<ClientSession>> ClientSession::Create(KernelCore& kernel,
                                                                std::shared_ptr<Session> parent,
                                                                std::string name) {
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -7,7 +7,7 @@
 #include <memory>
 #include <string>

-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 union ResultCode;
@@ -22,7 +22,7 @@ class KernelCore;
 class Session;
 class Thread;

-class ClientSession final : public WaitObject {
+class ClientSession final : public SynchronizationObject {
 public:
    explicit ClientSession(KernelCore& kernel);
    ~ClientSession() override;
@@ -48,6 +48,8 @@ public:

    void Acquire(Thread* thread) override;

+    bool IsSignaled() const override;
+
 private:
    static ResultVal<std::shared_ptr<ClientSession>> Create(KernelCore& kernel,
                                                            std::shared_ptr<Session> parent,
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -47,15 +47,15 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
    const std::string& reason, u64 timeout, WakeupCallback&& callback,
    std::shared_ptr<WritableEvent> writable_event) {
    // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-    thread->SetWakeupCallback([context = *this, callback](ThreadWakeupReason reason,
-                                                          std::shared_ptr<Thread> thread,
-                                                          std::shared_ptr<WaitObject> object,
-                                                          std::size_t index) mutable -> bool {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
-        callback(thread, context, reason);
-        context.WriteToOutgoingCommandBuffer(*thread);
-        return true;
-    });
+    thread->SetWakeupCallback(
+        [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                                    std::shared_ptr<SynchronizationObject> object,
+                                    std::size_t index) mutable -> bool {
+            ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
+            callback(thread, context, reason);
+            context.WriteToOutgoingCommandBuffer(*thread);
+            return true;
+        });

    auto& kernel = Core::System::GetInstance().Kernel();
    if (!writable_event) {
@@ -67,7 +67,7 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
    const auto readable_event{writable_event->GetReadableEvent()};
    writable_event->Clear();
    thread->SetStatus(ThreadStatus::WaitHLEEvent);
-    thread->SetWaitObjects({readable_event});
+    thread->SetSynchronizationObjects({readable_event});
    readable_event->AddWaitingThread(thread);

    if (timeout > 0) {
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -23,6 +23,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -54,10 +55,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
    if (thread->GetStatus() == ThreadStatus::WaitSynch ||
        thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
        // Remove the thread from each of its waiting objects' waitlists
-        for (const auto& object : thread->GetWaitObjects()) {
+        for (const auto& object : thread->GetSynchronizationObjects()) {
            object->RemoveWaitingThread(thread);
        }
-        thread->ClearWaitObjects();
+        thread->ClearSynchronizationObjects();

        // Invoke the wakeup callback before clearing the wait objects
        if (thread->HasWakeupCallback()) {
@@ -96,7 +97,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
 }

 struct KernelCore::Impl {
-    explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}
+    explicit Impl(Core::System& system)
+        : system{system}, global_scheduler{system}, synchronization{system} {}

    void Initialize(KernelCore& kernel) {
        Shutdown();
@@ -191,6 +193,7 @@ struct KernelCore::Impl {
    std::vector<std::shared_ptr<Process>> process_list;
    Process* current_process = nullptr;
    Kernel::GlobalScheduler global_scheduler;
+    Kernel::Synchronization synchronization;

    std::shared_ptr<ResourceLimit> system_resource_limit;

@@ -270,6 +273,14 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
    return impl->cores[id];
 }

+Kernel::Synchronization& KernelCore::Synchronization() {
+    return impl->synchronization;
+}
+
+const Kernel::Synchronization& KernelCore::Synchronization() const {
+    return impl->synchronization;
+}
+
 Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
    return *impl->exclusive_monitor;
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -29,6 +29,7 @@ class HandleTable;
 class PhysicalCore;
 class Process;
 class ResourceLimit;
+class Synchronization;
 class Thread;

 /// Represents a single instance of the kernel.
@@ -92,6 +93,12 @@ public:
    /// Gets the an instance of the respective physical CPU core.
    const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;

+    /// Gets the an instance of the Synchronization Interface.
+    Kernel::Synchronization& Synchronization();
+
+    /// Gets the an instance of the Synchronization Interface.
+    const Kernel::Synchronization& Synchronization() const;
+
    /// Stops execution of 'id' core, in order to reschedule a new thread.
    void PrepareReschedule(std::size_t id);

--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -337,7 +337,7 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
 }

 Process::Process(Core::System& system)
-    : WaitObject{system.Kernel()}, vm_manager{system},
+    : SynchronizationObject{system.Kernel()}, vm_manager{system},
      address_arbiter{system}, mutex{system}, system{system} {}

 Process::~Process() = default;
@@ -357,7 +357,7 @@ void Process::ChangeStatus(ProcessStatus new_status) {

    status = new_status;
    is_signaled = true;
-    WakeupAllWaitingThreads();
+    Signal();
 }

 void Process::AllocateMainThreadStack(u64 stack_size) {
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,8 +15,8 @@
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/vm_manager.h"
-#include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"

 namespace Core {
@@ -60,7 +60,7 @@ enum class ProcessStatus {
    DebugBreak,
 };

-class Process final : public WaitObject {
+class Process final : public SynchronizationObject {
 public:
    explicit Process(Core::System& system);
    ~Process() override;
@@ -359,10 +359,6 @@ private:
    /// specified by metadata provided to the process during loading.
    bool is_64bit_process = true;

-    /// Whether or not this process is signaled. This occurs
-    /// upon the process changing to a different state.
-    bool is_signaled = false;
-
    /// Total running time for the process in ticks.
    u64 total_process_running_time_ticks = 0;

--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -11,30 +11,30 @@

 namespace Kernel {

-ReadableEvent::ReadableEvent(KernelCore& kernel) : WaitObject{kernel} {}
+ReadableEvent::ReadableEvent(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ReadableEvent::~ReadableEvent() = default;

 bool ReadableEvent::ShouldWait(const Thread* thread) const {
-    return !signaled;
+    return !is_signaled;
 }

 void ReadableEvent::Acquire(Thread* thread) {
-    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
+    ASSERT_MSG(IsSignaled(), "object unavailable!");
 }

 void ReadableEvent::Signal() {
-    if (!signaled) {
-        signaled = true;
-        WakeupAllWaitingThreads();
+    if (!is_signaled) {
+        is_signaled = true;
+        SynchronizationObject::Signal();
    };
 }

 void ReadableEvent::Clear() {
-    signaled = false;
+    is_signaled = false;
 }

 ResultCode ReadableEvent::Reset() {
-    if (!signaled) {
+    if (!is_signaled) {
        return ERR_INVALID_STATE;
    }

--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -5,7 +5,7 @@
 #pragma once

 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"

 union ResultCode;

@@ -14,7 +14,7 @@ namespace Kernel {
 class KernelCore;
 class WritableEvent;

-class ReadableEvent final : public WaitObject {
+class ReadableEvent final : public SynchronizationObject {
    friend class WritableEvent;

 public:
@@ -46,13 +46,11 @@ public:
    ///      then ERR_INVALID_STATE will be returned.
    ResultCode Reset();

+    void Signal() override;
+
 private:
    explicit ReadableEvent(KernelCore& kernel);

-    void Signal();
-
-    bool signaled{};
-
    std::string name; ///< Name of event (optional)
 };

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -124,8 +124,8 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
               "Thread yielding without being in front");
    scheduled_queue[core_id].yield(priority);

-    std::array<Thread*, NUM_CPU_CORES> current_threads;
-    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+    for (std::size_t i = 0; i < current_threads.size(); i++) {
        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
    }

@@ -177,8 +177,8 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
    // function...
    if (scheduled_queue[core_id].empty()) {
        // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
-        std::array<Thread*, NUM_CPU_CORES> current_threads;
-        for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
+        for (std::size_t i = 0; i < current_threads.size(); i++) {
            current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
        }
        for (auto& thread : suggested_queue[core_id]) {
@@ -208,7 +208,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 }

 void GlobalScheduler::PreemptThreads() {
-    for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
+    for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
        const u32 priority = preemption_priorities[core_id];

        if (scheduled_queue[core_id].size(priority) > 0) {
@@ -349,7 +349,7 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
 }

 void GlobalScheduler::Shutdown() {
-    for (std::size_t core = 0; core < NUM_CPU_CORES; core++) {
+    for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        scheduled_queue[core].clear();
        suggested_queue[core].clear();
    }
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -10,6 +10,7 @@

 #include "common/common_types.h"
 #include "common/multi_level_queue.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/thread.h"

 namespace Core {
@@ -23,8 +24,6 @@ class Process;

 class GlobalScheduler final {
 public:
-    static constexpr u32 NUM_CPU_CORES = 4;
-
    explicit GlobalScheduler(Core::System& system);
    ~GlobalScheduler();

@@ -125,7 +124,7 @@ public:
    void PreemptThreads();

    u32 CpuCoresCount() const {
-        return NUM_CPU_CORES;
+        return Core::Hardware::NUM_CPU_CORES;
    }

    void SetReselectionPending() {
@@ -149,13 +148,15 @@ private:
    bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);

    static constexpr u32 min_regular_priority = 2;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
-    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+        scheduled_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, Core::Hardware::NUM_CPU_CORES>
+        suggested_queue;
    std::atomic<bool> is_reselection_pending{false};

    // The priority levels at which the global scheduler preempts threads every 10 ms. They are
    // ordered from Core 0 to Core 3.
-    std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+    std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};

    /// Lists all thread ids that aren't deleted/etc.
    std::vector<std::shared_ptr<Thread>> thread_list;
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,7 +13,7 @@

 namespace Kernel {

-ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {}
+ServerPort::ServerPort(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ServerPort::~ServerPort() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
@@ -39,6 +39,10 @@ void ServerPort::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

+bool ServerPort::IsSignaled() const {
+    return !pending_sessions.empty();
+}
+
 ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
                                                std::string name) {
    std::shared_ptr<ServerPort> server_port = std::make_shared<ServerPort>(kernel);
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -10,7 +10,7 @@
 #include <vector>
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Kernel {
@@ -20,7 +20,7 @@ class KernelCore;
 class ServerSession;
 class SessionRequestHandler;

-class ServerPort final : public WaitObject {
+class ServerPort final : public SynchronizationObject {
 public:
    explicit ServerPort(KernelCore& kernel);
    ~ServerPort() override;
@@ -82,6 +82,8 @@ public:
    bool ShouldWait(const Thread* thread) const override;
    void Acquire(Thread* thread) override;

+    bool IsSignaled() const override;
+
 private:
    /// ServerSessions waiting to be accepted by the port
    std::vector<std::shared_ptr<ServerSession>> pending_sessions;
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -24,7 +24,7 @@

 namespace Kernel {

-ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {}
+ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
 ServerSession::~ServerSession() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
@@ -50,6 +50,16 @@ bool ServerSession::ShouldWait(const Thread* thread) const {
    return pending_requesting_threads.empty() || currently_handling != nullptr;
 }

+bool ServerSession::IsSignaled() const {
+    // Closed sessions should never wait, an error will be returned from svcReplyAndReceive.
+    if (!parent->Client()) {
+        return true;
+    }
+
+    // Wait if we have no pending requests, or if we're currently handling a request.
+    return !pending_requesting_threads.empty() && currently_handling == nullptr;
+}
+
 void ServerSession::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
    // We are now handling a request, pop it from the stack.
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,7 +10,7 @@
 #include <vector>

 #include "common/threadsafe_queue.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Memory {
@@ -41,7 +41,7 @@ class Thread;
 * After the server replies to the request, the response is marshalled back to the caller's
 * TLS buffer and control is transferred back to it.
 */
-class ServerSession final : public WaitObject {
+class ServerSession final : public SynchronizationObject {
 public:
    explicit ServerSession(KernelCore& kernel);
    ~ServerSession() override;
@@ -73,6 +73,8 @@ public:
        return parent.get();
    }

+    bool IsSignaled() const override;
+
    /**
     * Sets the HLE handler for the session. This handler will be called to service IPC requests
     * instead of the regular IPC machinery. (The regular IPC machinery is currently not
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -9,7 +9,7 @@

 namespace Kernel {

-Session::Session(KernelCore& kernel) : WaitObject{kernel} {}
+Session::Session(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Session::~Session() = default;

 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
@@ -29,6 +29,11 @@ bool Session::ShouldWait(const Thread* thread) const {
    return {};
 }

+bool Session::IsSignaled() const {
+    UNIMPLEMENTED();
+    return true;
+}
+
 void Session::Acquire(Thread* thread) {
    UNIMPLEMENTED();
 }
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -8,7 +8,7 @@
 #include <string>
 #include <utility>

-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"

 namespace Kernel {

@@ -19,7 +19,7 @@ class ServerSession;
 * Parent structure to link the client and server endpoints of a session with their associated
 * client port.
 */
-class Session final : public WaitObject {
+class Session final : public SynchronizationObject {
 public:
    explicit Session(KernelCore& kernel);
    ~Session() override;
@@ -39,6 +39,8 @@ public:

    bool ShouldWait(const Thread* thread) const override;

+    bool IsSignaled() const override;
+
    void Acquire(Thread* thread) override;

    std::shared_ptr<ClientSession> Client() {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -32,6 +32,7 @@
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
+#include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/kernel/writable_event.h"
@@ -433,22 +434,6 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
    return ERR_INVALID_HANDLE;
 }

-/// Default thread wakeup callback for WaitSynchronization
-static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                        std::shared_ptr<WaitObject> object, std::size_t index) {
-    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-
-    if (reason == ThreadWakeupReason::Timeout) {
-        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
-        return true;
-    }
-
-    ASSERT(reason == ThreadWakeupReason::Signal);
-    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
-    thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
-    return true;
-};
-
 /// Wait for the given handles to synchronize, timeout after the specified nanoseconds
 static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
                                      u64 handle_count, s64 nano_seconds) {
@@ -472,14 +457,14 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
    }

    auto* const thread = system.CurrentScheduler().GetCurrentThread();
-
-    using ObjectPtr = Thread::ThreadWaitObjects::value_type;
-    Thread::ThreadWaitObjects objects(handle_count);
-    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto& kernel = system.Kernel();
+    using ObjectPtr = Thread::ThreadSynchronizationObjects::value_type;
+    Thread::ThreadSynchronizationObjects objects(handle_count);
+    const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();

    for (u64 i = 0; i < handle_count; ++i) {
        const Handle handle = memory.Read32(handles_address + i * sizeof(Handle));
-        const auto object = handle_table.Get<WaitObject>(handle);
+        const auto object = handle_table.Get<SynchronizationObject>(handle);

        if (object == nullptr) {
            LOG_ERROR(Kernel_SVC, "Object is a nullptr");
@@ -488,47 +473,10 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr

        objects[i] = object;
    }
-
-    // Find the first object that is acquirable in the provided list of objects
-    auto itr = std::find_if(objects.begin(), objects.end(), [thread](const ObjectPtr& object) {
-        return !object->ShouldWait(thread);
-    });
-
-    if (itr != objects.end()) {
-        // We found a ready object, acquire it and set the result value
-        WaitObject* object = itr->get();
-        object->Acquire(thread);
-        *index = static_cast<s32>(std::distance(objects.begin(), itr));
-        return RESULT_SUCCESS;
-    }
-
-    // No objects were ready to be acquired, prepare to suspend the thread.
-
-    // If a timeout value of 0 was provided, just return the Timeout error code instead of
-    // suspending the thread.
-    if (nano_seconds == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    if (thread->IsSyncCancelled()) {
-        thread->SetSyncCancelled(false);
-        return ERR_SYNCHRONIZATION_CANCELED;
-    }
-
-    for (auto& object : objects) {
-        object->AddWaitingThread(SharedFrom(thread));
-    }
-
-    thread->SetWaitObjects(std::move(objects));
-    thread->SetStatus(ThreadStatus::WaitSynch);
-
-    // Create an event to wake the thread up after the specified nanosecond delay has passed
-    thread->WakeAfterDelay(nano_seconds);
-    thread->SetWakeupCallback(DefaultThreadWakeupCallback);
-
-    system.PrepareReschedule(thread->GetProcessorID());
-
-    return RESULT_TIMEOUT;
+    auto& synchronization = kernel.Synchronization();
+    const auto [result, handle_result] = synchronization.WaitFor(objects, nano_seconds);
+    *index = handle_result;
+    return result;
 }

 /// Resumes a thread waiting on WaitSynchronization
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -0,0 +1,87 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+/// Default thread wakeup callback for WaitSynchronization
+static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
+                                        std::shared_ptr<SynchronizationObject> object,
+                                        std::size_t index) {
+    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+
+    if (reason == ThreadWakeupReason::Timeout) {
+        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
+        return true;
+    }
+
+    ASSERT(reason == ThreadWakeupReason::Signal);
+    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
+    thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
+    return true;
+}
+
+Synchronization::Synchronization(Core::System& system) : system{system} {}
+
+void Synchronization::SignalObject(SynchronizationObject& obj) const {
+    if (obj.IsSignaled()) {
+        obj.WakeupAllWaitingThreads();
+    }
+}
+
+std::pair<ResultCode, Handle> Synchronization::WaitFor(
+    std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
+    auto* const thread = system.CurrentScheduler().GetCurrentThread();
+    // Find the first object that is acquirable in the provided list of objects
+    const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(),
+                                  [thread](const std::shared_ptr<SynchronizationObject>& object) {
+                                      return object->IsSignaled();
+                                  });
+
+    if (itr != sync_objects.end()) {
+        // We found a ready object, acquire it and set the result value
+        SynchronizationObject* object = itr->get();
+        object->Acquire(thread);
+        const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+        return {RESULT_SUCCESS, index};
+    }
+
+    // No objects were ready to be acquired, prepare to suspend the thread.
+
+    // If a timeout value of 0 was provided, just return the Timeout error code instead of
+    // suspending the thread.
+    if (nano_seconds == 0) {
+        return {RESULT_TIMEOUT, InvalidHandle};
+    }
+
+    if (thread->IsSyncCancelled()) {
+        thread->SetSyncCancelled(false);
+        return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+    }
+
+    for (auto& object : sync_objects) {
+        object->AddWaitingThread(SharedFrom(thread));
+    }
+
+    thread->SetSynchronizationObjects(std::move(sync_objects));
+    thread->SetStatus(ThreadStatus::WaitSynch);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    thread->WakeAfterDelay(nano_seconds);
+    thread->SetWakeupCallback(DefaultThreadWakeupCallback);
+
+    system.PrepareReschedule(thread->GetProcessorID());
+
+    return {RESULT_TIMEOUT, InvalidHandle};
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/synchronization.h
+++ b/src/core/hle/kernel/synchronization.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/hle/kernel/object.h"
+#include "core/hle/result.h"
+
+namespace Core {
+class System;
+} // namespace Core
+
+namespace Kernel {
+
+class SynchronizationObject;
+
+/**
+ * The 'Synchronization' class is an interface for handling synchronization methods
+ * used by Synchronization objects and synchronization SVCs. This centralizes processing of
+ * such
+ */
+class Synchronization {
+public:
+    explicit Synchronization(Core::System& system);
+
+    /// Signals a synchronization object, waking up all its waiting threads
+    void SignalObject(SynchronizationObject& obj) const;
+
+    /// Tries to see if waiting for any of the sync_objects is necessary, if not
+    /// it returns Success and the handle index of the signaled sync object. In
+    /// case not, the current thread will be locked and wait for nano_seconds or
+    /// for a synchronization object to signal.
+    std::pair<ResultCode, Handle> WaitFor(
+        std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds);
+
+private:
+    Core::System& system;
+};
+} // namespace Kernel
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -10,20 +10,26 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/synchronization.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/kernel/thread.h"

 namespace Kernel {

-WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {}
-WaitObject::~WaitObject() = default;
+SynchronizationObject::SynchronizationObject(KernelCore& kernel) : Object{kernel} {}
+SynchronizationObject::~SynchronizationObject() = default;

-void WaitObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::Signal() {
+    kernel.Synchronization().SignalObject(*this);
+}
+
+void SynchronizationObject::AddWaitingThread(std::shared_ptr<Thread> thread) {
    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
    if (itr == waiting_threads.end())
        waiting_threads.push_back(std::move(thread));
 }

-void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
    // If a thread passed multiple handles to the same object,
    // the kernel might attempt to remove the thread from the object's
@@ -32,7 +38,7 @@ void WaitObject::RemoveWaitingThread(std::shared_ptr<Thread> thread) {
        waiting_threads.erase(itr);
 }

-std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
+std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const {
    Thread* candidate = nullptr;
    u32 candidate_priority = THREADPRIO_LOWEST + 1;

@@ -57,7 +63,7 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
    return SharedFrom(candidate);
 }

-void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
+void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    ASSERT(!ShouldWait(thread.get()));

    if (!thread) {
@@ -65,7 +71,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    }

    if (thread->IsSleepingOnWait()) {
-        for (const auto& object : thread->GetWaitObjects()) {
+        for (const auto& object : thread->GetSynchronizationObjects()) {
            ASSERT(!object->ShouldWait(thread.get()));
            object->Acquire(thread.get());
        }
@@ -73,9 +79,9 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
        Acquire(thread.get());
    }

-    const std::size_t index = thread->GetWaitObjectIndex(SharedFrom(this));
+    const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));

-    thread->ClearWaitObjects();
+    thread->ClearSynchronizationObjects();

    thread->CancelWakeupTimer();

@@ -90,13 +96,13 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    }
 }

-void WaitObject::WakeupAllWaitingThreads() {
+void SynchronizationObject::WakeupAllWaitingThreads() {
    while (auto thread = GetHighestPriorityReadyThread()) {
        WakeupWaitingThread(thread);
    }
 }

-const std::vector<std::shared_ptr<Thread>>& WaitObject::GetWaitingThreads() const {
+const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
    return waiting_threads;
 }

--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -15,10 +15,10 @@ class KernelCore;
 class Thread;

 /// Class that represents a Kernel object that a thread can be waiting on
-class WaitObject : public Object {
+class SynchronizationObject : public Object {
 public:
-    explicit WaitObject(KernelCore& kernel);
-    ~WaitObject() override;
+    explicit SynchronizationObject(KernelCore& kernel);
+    ~SynchronizationObject() override;

    /**
     * Check if the specified thread should wait until the object is available
@@ -30,6 +30,13 @@ public:
    /// Acquire/lock the object for the specified thread if it is available
    virtual void Acquire(Thread* thread) = 0;

+    /// Signal this object
+    virtual void Signal();
+
+    virtual bool IsSignaled() const {
+        return is_signaled;
+    }
+
    /**
     * Add a thread to wait on this object
     * @param thread Pointer to thread to add
@@ -60,16 +67,20 @@ public:
    /// Get a const reference to the waiting threads list for debug use
    const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;

+protected:
+    bool is_signaled{}; // Tells if this sync object is signalled;
+
 private:
    /// Threads waiting for this object to become available
    std::vector<std::shared_ptr<Thread>> waiting_threads;
 };

-// Specialization of DynamicObjectCast for WaitObjects
+// Specialization of DynamicObjectCast for SynchronizationObjects
 template <>
-inline std::shared_ptr<WaitObject> DynamicObjectCast<WaitObject>(std::shared_ptr<Object> object) {
+inline std::shared_ptr<SynchronizationObject> DynamicObjectCast<SynchronizationObject>(
+    std::shared_ptr<Object> object) {
    if (object != nullptr && object->IsWaitable()) {
-        return std::static_pointer_cast<WaitObject>(object);
+        return std::static_pointer_cast<SynchronizationObject>(object);
    }
    return nullptr;
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -15,6 +15,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -31,11 +32,15 @@ bool Thread::ShouldWait(const Thread* thread) const {
    return status != ThreadStatus::Dead;
 }

+bool Thread::IsSignaled() const {
+    return status == ThreadStatus::Dead;
+}
+
 void Thread::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

-Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {}
+Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
 Thread::~Thread() = default;

 void Thread::Stop() {
@@ -45,7 +50,7 @@ void Thread::Stop() {
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;
    SetStatus(ThreadStatus::Dead);
-    WakeupAllWaitingThreads();
+    Signal();

    // Clean up any dangling references in objects that this thread was waiting for
    for (auto& wait_object : wait_objects) {
@@ -215,7 +220,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
    context.cpu_registers[1] = output;
 }

-s32 Thread::GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const {
+s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
    ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
    return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
@@ -336,14 +341,16 @@ void Thread::ChangeCore(u32 core, u64 mask) {
    SetCoreAndAffinityMask(core, mask);
 }

-bool Thread::AllWaitObjectsReady() const {
-    return std::none_of(
-        wait_objects.begin(), wait_objects.end(),
-        [this](const std::shared_ptr<WaitObject>& object) { return object->ShouldWait(this); });
+bool Thread::AllSynchronizationObjectsReady() const {
+    return std::none_of(wait_objects.begin(), wait_objects.end(),
+                        [this](const std::shared_ptr<SynchronizationObject>& object) {
+                            return object->ShouldWait(this);
+                        });
 }

 bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                                  std::shared_ptr<WaitObject> object, std::size_t index) {
+                                  std::shared_ptr<SynchronizationObject> object,
+                                  std::size_t index) {
    ASSERT(wakeup_callback);
    return wakeup_callback(reason, std::move(thread), std::move(object), index);
 }
@@ -425,7 +432,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
            const s32 old_core = processor_id;
            if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
                if (static_cast<s32>(ideal_core) < 0) {
-                    processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
+                    processor_id = HighestSetCore(affinity_mask, Core::Hardware::NUM_CPU_CORES);
                } else {
                    processor_id = ideal_core;
                }
@@ -449,7 +456,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
            scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
        }

-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
                scheduler.Unsuggest(current_priority, core, this);
            }
@@ -460,7 +467,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
        }

-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
                scheduler.Suggest(current_priority, core, this);
            }
@@ -474,12 +481,12 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
        return;
    }
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.GlobalScheduler();
    if (processor_id >= 0) {
        scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
            scheduler.Unsuggest(old_priority, core, this);
        }
@@ -496,7 +503,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
        }
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
            scheduler.Suggest(current_priority, core, this);
        }
@@ -506,13 +513,13 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
 }

 void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.GlobalScheduler();
    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
        current_priority >= THREADPRIO_COUNT) {
        return;
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (((old_affinity_mask >> core) & 1) != 0) {
            if (core == static_cast<u32>(old_core)) {
                scheduler.Unschedule(current_priority, core, this);
@@ -522,7 +529,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
        }
    }

-    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
        if (((affinity_mask >> core) & 1) != 0) {
            if (core == static_cast<u32>(processor_id)) {
                scheduler.Schedule(current_priority, core, this);
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -11,7 +11,7 @@
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/wait_object.h"
+#include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

 namespace Kernel {
@@ -95,7 +95,7 @@ enum class ThreadSchedMasks : u32 {
    ForcePauseMask = 0x0070,
 };

-class Thread final : public WaitObject {
+class Thread final : public SynchronizationObject {
 public:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;
@@ -104,11 +104,11 @@ public:

    using ThreadContext = Core::ARM_Interface::ThreadContext;

-    using ThreadWaitObjects = std::vector<std::shared_ptr<WaitObject>>;
+    using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;

    using WakeupCallback =
        std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                           std::shared_ptr<WaitObject> object, std::size_t index)>;
+                           std::shared_ptr<SynchronizationObject> object, std::size_t index)>;

    /**
     * Creates and returns a new thread. The new thread is immediately scheduled
@@ -146,6 +146,7 @@ public:

    bool ShouldWait(const Thread* thread) const override;
    void Acquire(Thread* thread) override;
+    bool IsSignaled() const override;

    /**
     * Gets the thread's current priority
@@ -233,7 +234,7 @@ public:
     *
     * @param object Object to query the index of.
     */
-    s32 GetWaitObjectIndex(std::shared_ptr<WaitObject> object) const;
+    s32 GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const;

    /**
     * Stops a thread, invalidating it from further use
@@ -314,15 +315,15 @@ public:
        return owner_process;
    }

-    const ThreadWaitObjects& GetWaitObjects() const {
+    const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
        return wait_objects;
    }

-    void SetWaitObjects(ThreadWaitObjects objects) {
+    void SetSynchronizationObjects(ThreadSynchronizationObjects objects) {
        wait_objects = std::move(objects);
    }

-    void ClearWaitObjects() {
+    void ClearSynchronizationObjects() {
        for (const auto& waiting_object : wait_objects) {
            waiting_object->RemoveWaitingThread(SharedFrom(this));
        }
@@ -330,7 +331,7 @@ public:
    }

    /// Determines whether all the objects this thread is waiting on are ready.
-    bool AllWaitObjectsReady() const;
+    bool AllSynchronizationObjectsReady() const;

    const MutexWaitingThreads& GetMutexWaitingThreads() const {
        return wait_mutex_threads;
@@ -395,7 +396,7 @@ public:
     *      will cause an assertion to trigger.
     */
    bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
-                              std::shared_ptr<WaitObject> object, std::size_t index);
+                              std::shared_ptr<SynchronizationObject> object, std::size_t index);

    u32 GetIdealCore() const {
        return ideal_core;
@@ -494,7 +495,7 @@ private:

    /// Objects that the thread is waiting on, in the same order as they were
    /// passed to WaitSynchronization.
-    ThreadWaitObjects wait_objects;
+    ThreadSynchronizationObjects wait_objects;

    /// List of threads that are waiting for a mutex that is held by this thread.
    MutexWaitingThreads wait_mutex_threads;
--- a/src/core/hle/kernel/writable_event.cpp
+++ b/src/core/hle/kernel/writable_event.cpp
@@ -22,7 +22,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) {
    writable_event->name = name + ":Writable";
    writable_event->readable = readable_event;
    readable_event->name = name + ":Readable";
-    readable_event->signaled = false;

    return {std::move(readable_event), std::move(writable_event)};
 }
@@ -40,7 +39,7 @@ void WritableEvent::Clear() {
 }

 bool WritableEvent::IsSignaled() const {
-    return readable->signaled;
+    return readable->IsSignaled();
 }

 } // namespace Kernel
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -847,7 +847,7 @@ private:
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>());

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -867,14 +867,14 @@ private:

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void PushInteractiveInData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>());

        ASSERT(applet->IsInitialized());
        applet->ExecuteInteractive();
@@ -898,7 +898,7 @@ private:

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -50,7 +50,7 @@ AppletDataBroker::RawChannelData AppletDataBroker::PeekDataToAppletForDebug() co
    return {std::move(out_normal), std::move(out_interactive)};
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
    if (out_channel.empty())
        return nullptr;

@@ -60,7 +60,7 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    if (in_channel.empty())
        return nullptr;

@@ -69,7 +69,7 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
    if (out_interactive_channel.empty())
        return nullptr;

@@ -79,7 +79,7 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    if (in_interactive_channel.empty())
        return nullptr;

@@ -88,21 +88,21 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    return out;
 }

-void AppletDataBroker::PushNormalDataFromGame(IStorage storage) {
-    in_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushNormalDataFromApplet(IStorage storage) {
-    out_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_channel.emplace_back(std::move(storage));
    pop_out_data_event.writable->Signal();
 }

-void AppletDataBroker::PushInteractiveDataFromGame(IStorage storage) {
-    in_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_interactive_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushInteractiveDataFromApplet(IStorage storage) {
-    out_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_interactive_channel.emplace_back(std::move(storage));
    pop_interactive_out_data_event.writable->Signal();
 }

--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -72,17 +72,17 @@ public:
    // Retrieves but does not pop the data sent to applet.
    RawChannelData PeekDataToAppletForDebug() const;

-    std::unique_ptr<IStorage> PopNormalDataToGame();
-    std::unique_ptr<IStorage> PopNormalDataToApplet();
+    std::shared_ptr<IStorage> PopNormalDataToGame();
+    std::shared_ptr<IStorage> PopNormalDataToApplet();

-    std::unique_ptr<IStorage> PopInteractiveDataToGame();
-    std::unique_ptr<IStorage> PopInteractiveDataToApplet();
+    std::shared_ptr<IStorage> PopInteractiveDataToGame();
+    std::shared_ptr<IStorage> PopInteractiveDataToApplet();

-    void PushNormalDataFromGame(IStorage storage);
-    void PushNormalDataFromApplet(IStorage storage);
+    void PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage);

-    void PushInteractiveDataFromGame(IStorage storage);
-    void PushInteractiveDataFromApplet(IStorage storage);
+    void PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage);

    void SignalStateChanged() const;

@@ -94,16 +94,16 @@ private:
    // Queues are named from applet's perspective

    // PopNormalDataToApplet and PushNormalDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_channel;
+    std::deque<std::shared_ptr<IStorage>> in_channel;

    // PopNormalDataToGame and PushNormalDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_channel;
+    std::deque<std::shared_ptr<IStorage>> out_channel;

    // PopInteractiveDataToApplet and PushInteractiveDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> in_interactive_channel;

    // PopInteractiveDataToGame and PushInteractiveDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> out_interactive_channel;

    Kernel::EventPair state_changed_event;

--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -186,7 +186,7 @@ void Error::Execute() {

 void Error::DisplayCompleted() {
    complete = true;
-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -20,7 +20,7 @@ namespace Service::AM::Applets {
 constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221};

 static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) {
-    std::unique_ptr<IStorage> storage = broker.PopNormalDataToApplet();
+    std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet();
    for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
        const auto data = storage->GetData();
        LOG_INFO(Service_AM,
@@ -148,7 +148,7 @@ void Auth::AuthFinished(bool successful) {
    std::vector<u8> out(sizeof(Return));
    std::memcpy(out.data(), &return_, sizeof(Return));

-    broker.PushNormalDataFromApplet(IStorage{std::move(out)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(out)));
    broker.SignalStateChanged();
 }

@@ -198,7 +198,7 @@ void PhotoViewer::Execute() {
 }

 void PhotoViewer::ViewFinished() {
-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

@@ -234,8 +234,8 @@ void StubApplet::ExecuteInteractive() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "ExecuteInteractive");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

@@ -243,8 +243,8 @@ void StubApplet::Execute() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "Execute");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -50,7 +50,7 @@ void ProfileSelect::ExecuteInteractive() {

 void ProfileSelect::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
        return;
    }

@@ -71,7 +71,7 @@ void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {

    final_data = std::vector<u8>(sizeof(UserSelectionOutput));
    std::memcpy(final_data.data(), &output, final_data.size());
-    broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -102,7 +102,7 @@ void SoftwareKeyboard::ExecuteInteractive() {

 void SoftwareKeyboard::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{std::move(final_data)});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
        broker.SignalStateChanged();
        return;
    }
@@ -145,15 +145,15 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
        final_data = output_main;

        if (complete) {
-            broker.PushNormalDataFromApplet(IStorage{std::move(output_main)});
+            broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
            broker.SignalStateChanged();
        } else {
-            broker.PushInteractiveDataFromApplet(IStorage{std::move(output_sub)});
+            broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::move(output_sub)));
        }
    } else {
        output_main[0] = 1;
        complete = true;
-        broker.PushNormalDataFromApplet(IStorage{std::move(output_main)});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
        broker.SignalStateChanged();
    }
 }
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -284,7 +284,7 @@ void WebBrowser::Finalize() {
    std::vector<u8> data(sizeof(WebCommonReturnValue));
    std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue));

-    broker.PushNormalDataFromApplet(IStorage{std::move(data)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(data)));
    broker.SignalStateChanged();

    if (!temporary_dir.empty() && FileUtil::IsDirectory(temporary_dir)) {
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -170,8 +170,10 @@ public:
            {3, nullptr, "SetContextForMultiStream"},
            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"},
+            {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"},
+            {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {9, nullptr, "DecodeInterleavedForMultiStream"},
        };
        // clang-format on

--- a/src/core/hle/service/bcat/backend/backend.cpp
+++ b/src/core/hle/service/bcat/backend/backend.cpp
@@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name,
 }

 bool NullBackend::Clear(u64 title_id) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}");
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);

    return true;
 }

 void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id,
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
              Common::HexToString(passphrase));
 }

--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -200,7 +200,8 @@ private:
    DownloadResult DownloadInternal(const std::string& resolved_path, u32 timeout_seconds,
                                    const std::string& content_type_name) {
        if (client == nullptr) {
-            client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT, timeout_seconds);
+            client = std::make_unique<httplib::SSLClient>(BOXCAT_HOSTNAME, PORT);
+            client->set_timeout_sec(timeout_seconds);
        }

        httplib::Headers headers{
@@ -448,8 +449,8 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title)

 Boxcat::StatusResult Boxcat::GetStatus(std::optional<std::string>& global,
                                       std::map<std::string, EventStatus>& games) {
-    httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT),
-                              static_cast<int>(TIMEOUT_SECONDS)};
+    httplib::SSLClient client{BOXCAT_HOSTNAME, static_cast<int>(PORT)};
+    client.set_timeout_sec(static_cast<int>(TIMEOUT_SECONDS));

    httplib::Headers headers{
        {std::string("Game-Assets-API-Version"), std::string(BOXCAT_API_VERSION)},
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -420,7 +420,7 @@ public:
            return;
        }

-        IFile file(result.Unwrap());
+        auto file = std::make_shared<IFile>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -445,7 +445,7 @@ public:
            return;
        }

-        IDirectory directory(result.Unwrap());
+        auto directory = std::make_shared<IDirectory>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -794,8 +794,8 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
 void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_FS, "called");

-    IFileSystem filesystem(fsc.OpenSDMC().Unwrap(),
-                           SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));
+    auto filesystem = std::make_shared<IFileSystem>(
+        fsc.OpenSDMC().Unwrap(), SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -846,7 +846,8 @@ void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
        id = FileSys::StorageId::NandSystem;
    }

-    IFileSystem filesystem(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));
+    auto filesystem =
+        std::make_shared<IFileSystem>(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -898,7 +899,7 @@ void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
        return;
    }

-    IStorage storage(std::move(romfs.Unwrap()));
+    auto storage = std::make_shared<IStorage>(std::move(romfs.Unwrap()));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -937,7 +938,8 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {

    FileSys::PatchManager pm{title_id};

-    IStorage storage(pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));
+    auto storage = std::make_shared<IStorage>(
+        pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -10,6 +10,7 @@
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
 #include "core/frontend/input.h"
+#include "core/hardware_properties.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -37,11 +38,11 @@ namespace Service::HID {

 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr s64 pad_update_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 66);
+constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66);
 [[maybe_unused]] constexpr s64 accelerometer_update_ticks =
-    static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
 [[maybe_unused]] constexpr s64 gyroscope_update_ticks =
-    static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 100);
+    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;

 IAppletResource::IAppletResource(Core::System& system)
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -129,12 +129,20 @@ public:
            {304, nullptr, "Disconnect"},
            {400, nullptr, "Initialize"},
            {401, nullptr, "Finalize"},
-            {402, nullptr, "SetOperationMode"},
+            {402, &IUserLocalCommunicationService::Initialize2, "Initialize2"}, // 7.0.0+
        };
        // clang-format on

        RegisterHandlers(functions);
    }
+
+    void Initialize2(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_LDN, "(STUBBED) called");
+        // Result success seem make this services start network and continue.
+        // If we just pass result error then it will stop and maybe try again and again.
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_UNKNOWN);
+    }
 };

 class LDNS final : public ServiceFramework<LDNS> {
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -44,6 +44,8 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
        return GetWaitbase(input, output);
    case IoctlCommand::IocChannelSetTimeoutCommand:
        return ChannelSetTimeout(input, output);
+    case IoctlCommand::IocChannelSetTimeslice:
+        return ChannelSetTimeslice(input, output);
    default:
        break;
    }
@@ -228,4 +230,14 @@ u32 nvhost_gpu::ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>&
    return 0;
 }

+u32 nvhost_gpu::ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetTimeslice params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
+    LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
+
+    channel_timeslice = params.timeslice;
+
+    return 0;
+}
+
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -48,6 +48,7 @@ private:
        IocAllocObjCtxCommand = 0xC0104809,
        IocChannelGetWaitbaseCommand = 0xC0080003,
        IocChannelSetTimeoutCommand = 0x40044803,
+        IocChannelSetTimeslice = 0xC004481D,
    };

    enum class CtxObjects : u32_le {
@@ -101,6 +102,11 @@ private:
    static_assert(sizeof(IoctlChannelSetPriority) == 4,
                  "IoctlChannelSetPriority is incorrect size");

+    struct IoctlSetTimeslice {
+        u32_le timeslice;
+    };
+    static_assert(sizeof(IoctlSetTimeslice) == 4, "IoctlSetTimeslice is incorrect size");
+
    struct IoctlEventIdControl {
        u32_le cmd; // 0=disable, 1=enable, 2=clear
        u32_le id;
@@ -174,6 +180,7 @@ private:
    u64_le user_data{};
    IoctlZCullBind zcull_params{};
    u32_le channel_priority{};
+    u32_le channel_timeslice{};

    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
    u32 SetClientData(const std::vector<u8>& input, std::vector<u8>& output);
@@ -188,6 +195,7 @@ private:
                  const std::vector<u8>& input2, IoctlVersion version);
    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
    u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);

    std::shared_ptr<nvmap> nvmap_dev;
    u32 assigned_syncpoints{};
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
@@ -26,8 +27,8 @@

 namespace Service::NVFlinger {

-constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
-constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);
+constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30);

 NVFlinger::NVFlinger(Core::System& system) : system(system) {
    displays.emplace_back(0, "Default", system);
@@ -222,7 +223,7 @@ void NVFlinger::Compose() {

 s64 NVFlinger::GetNextTicks() const {
    constexpr s64 max_hertz = 120LL;
-    return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
+    return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
 }

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/standard_steady_clock_core.h"

 namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
 TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};
    TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};

    if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/tick_based_steady_clock_core.h"

 namespace Service::Time::Clock {
@@ -12,7 +13,7 @@ namespace Service::Time::Clock {
 SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};

    return {ticks_time_span.ToSeconds(), GetClockSourceId()};
 }
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -6,6 +6,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -233,7 +234,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
    if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
        const auto ticks{Clock::TimeSpanType::FromTicks(
            Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-            Core::Timing::CNTFREQ)};
+            Core::Hardware::CNTFREQ)};
        const s64 base_time_point{context.offset + current_time_point.time_point -
                                  ticks.ToSeconds()};
        IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -5,6 +5,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/service/time/clock_types.h"
 #include "core/hle/service/time/steady_clock_core.h"
 #include "core/hle/service/time/time_sharedmemory.h"
@@ -31,7 +32,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
                                            Clock::TimeSpanType current_time_point) {
    const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
-        Core::Timing::CNTFREQ)};
+        Core::Hardware::CNTFREQ)};
    const Clock::SteadyClockContext context{
        static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
        clock_source_id};
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -9,6 +9,7 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/service/hid/controllers/npad.h"
 #include "core/hle/service/hid/hid.h"
@@ -17,7 +18,7 @@

 namespace Memory {

-constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 12);
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12);
 constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;

 StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -429,6 +429,7 @@ struct Values {
    int vulkan_device;

    float resolution_factor;
+    int aspect_ratio;
    bool use_frame_limit;
    u16 frame_limit;
    bool use_disk_shader_cache;
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -7,13 +7,14 @@
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
 #include "core/memory.h"
 #include "core/tools/freezer.h"

 namespace Tools {
 namespace {

-constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
+constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);

 u64 MemoryReadWidth(Memory::Memory& memory, u32 width, VAddr addr) {
    switch (width) {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
    memory_manager.h
    morton.cpp
    morton.h
+    query_cache.h
    rasterizer_accelerated.cpp
    rasterizer_accelerated.h
    rasterizer_cache.cpp
@@ -74,6 +75,8 @@ add_library(video_core STATIC
    renderer_opengl/gl_stream_buffer.h
    renderer_opengl/gl_texture_cache.cpp
    renderer_opengl/gl_texture_cache.h
+    renderer_opengl/gl_query_cache.cpp
+    renderer_opengl/gl_query_cache.h
    renderer_opengl/maxwell_to_gl.h
    renderer_opengl/renderer_opengl.cpp
    renderer_opengl/renderer_opengl.h
@@ -177,6 +180,8 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_pipeline_cache.cpp
        renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_query_cache.cpp
+        renderer_vulkan/vk_query_cache.h
        renderer_vulkan/vk_rasterizer.cpp
        renderer_vulkan/vk_rasterizer.h
        renderer_vulkan/vk_renderpass_cache.cpp
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,17 +4,21 @@

 #include <cinttypes>
 #include <cstring>
+#include <optional>
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/textures/texture.h"

 namespace Tegra::Engines {

+using VideoCore::QueryType;
+
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;

@@ -399,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        ProcessQueryCondition();
        break;
    }
+    case MAXWELL3D_REG_INDEX(counter_reset): {
+        ProcessCounterReset();
+        break;
+    }
    case MAXWELL3D_REG_INDEX(sync_info): {
        ProcessSyncPoint();
        break;
@@ -481,7 +489,7 @@ void Maxwell3D::FlushMMEInlineDraw() {

    const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
    if (ShouldExecute()) {
-        rasterizer.DrawMultiBatch(is_indexed);
+        rasterizer.Draw(is_indexed, true);
    }

    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -519,61 +527,51 @@ void Maxwell3D::ProcessFirmwareCall4() {
    regs.reg_array[0xd00] = 1;
 }

-void Maxwell3D::ProcessQueryGet() {
-    const GPUVAddr sequence_address{regs.query.QueryAddress()};
-    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
-    // VAddr before writing.
-
-    // TODO(Subv): Support the other query units.
-    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
-               "Units other than CROP are unimplemented");
-
-    u64 result = 0;
-
-    // TODO(Subv): Support the other query variables
-    switch (regs.query.query_get.select) {
-    case Regs::QuerySelect::Zero:
-        // This seems to actually write the query sequence to the query address.
-        result = regs.query.query_sequence;
-        break;
-    default:
-        result = 1;
-        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
-                          static_cast<u32>(regs.query.query_get.select.Value()));
-    }
-
-    // TODO(Subv): Research and implement how query sync conditions work.
-
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
    struct LongQueryResult {
        u64_le value;
        u64_le timestamp;
    };
    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
+    if (long_query) {
+        // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+        // GPU, this command may actually take a while to complete in real hardware due to GPU
+        // wait queues.
+        LongQueryResult query_result{payload, system.GPU().GetTicks()};
+        memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+    } else {
+        memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+    }
+}

-    switch (regs.query.query_get.mode) {
-    case Regs::QueryMode::Write:
-    case Regs::QueryMode::Write2: {
-        u32 sequence = regs.query.query_sequence;
-        if (regs.query.query_get.short_query) {
-            // Write the current query sequence to the sequence address.
-            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
-            // query.
-            memory_manager.Write<u32>(sequence_address, sequence);
-        } else {
-            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
-            // GPU, this command may actually take a while to complete in real hardware due to GPU
-            // wait queues.
-            LongQueryResult query_result{};
-            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = system.CoreTiming().GetTicks();
-            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+void Maxwell3D::ProcessQueryGet() {
+    // TODO(Subv): Support the other query units.
+    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
+               "Units other than CROP are unimplemented");
+
+    switch (regs.query.query_get.operation) {
+    case Regs::QueryOperation::Release:
+        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+        break;
+    case Regs::QueryOperation::Acquire:
+        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
+        // matches the current payload.
+        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+        break;
+    case Regs::QueryOperation::Counter:
+        if (const std::optional<u64> result = GetQueryResult()) {
+            // If the query returns an empty optional it means it's cached and deferred.
+            // In this case we have a non-empty result, so we stamp it immediately.
+            StampQueryResult(*result, regs.query.query_get.short_query == 0);
        }
        break;
-    }
+    case Regs::QueryOperation::Trap:
+        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+        break;
    default:
-        UNIMPLEMENTED_MSG("Query mode {} not implemented",
-                          static_cast<u32>(regs.query.query_get.mode.Value()));
+        UNIMPLEMENTED_MSG("Unknown query operation");
+        break;
    }
 }

@@ -590,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
    }
    case Regs::ConditionMode::ResNonZero: {
        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
        execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
        break;
    }
    case Regs::ConditionMode::Equal: {
        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
        execute_on =
            cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
        break;
    }
    case Regs::ConditionMode::NotEqual: {
        Regs::QueryCompare cmp;
-        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
        execute_on =
            cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
        break;
@@ -616,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
    }
 }

+void Maxwell3D::ProcessCounterReset() {
+    switch (regs.counter_reset) {
+    case Regs::CounterReset::SampleCnt:
+        rasterizer.ResetCounter(QueryType::SamplesPassed);
+        break;
+    default:
+        LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
+                    static_cast<int>(regs.counter_reset));
+        break;
+    }
+}
+
 void Maxwell3D::ProcessSyncPoint() {
    const u32 sync_point = regs.sync_info.sync_point.Value();
    const u32 increment = regs.sync_info.increment.Value();
@@ -644,7 +654,7 @@ void Maxwell3D::DrawArrays() {

    const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
    if (ShouldExecute()) {
-        rasterizer.DrawBatch(is_indexed);
+        rasterizer.Draw(is_indexed, false);
    }

    // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -658,6 +668,22 @@ void Maxwell3D::DrawArrays() {
    }
 }

+std::optional<u64> Maxwell3D::GetQueryResult() {
+    switch (regs.query.query_get.select) {
+    case Regs::QuerySelect::Zero:
+        return 0;
+    case Regs::QuerySelect::SamplesPassed:
+        // Deferred.
+        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+                         system.GPU().GetTicks());
+        return {};
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+                          static_cast<u32>(regs.query.query_get.select.Value()));
+        return 1;
+    }
+}
+
 void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
    auto& shader = state.shader_stages[stage_index];
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <bitset>
+#include <optional>
 #include <type_traits>
 #include <unordered_map>
 #include <vector>
@@ -71,12 +72,11 @@ public:
        static constexpr std::size_t MaxConstBuffers = 18;
        static constexpr std::size_t MaxConstBufferSize = 0x10000;

-        enum class QueryMode : u32 {
-            Write = 0,
-            Sync = 1,
-            // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
-            // is.
-            Write2 = 2,
+        enum class QueryOperation : u32 {
+            Release = 0,
+            Acquire = 1,
+            Counter = 2,
+            Trap = 3,
        };

        enum class QueryUnit : u32 {
@@ -410,6 +410,27 @@ public:
            Linear = 1,
        };

+        enum class CounterReset : u32 {
+            SampleCnt = 0x01,
+            Unk02 = 0x02,
+            Unk03 = 0x03,
+            Unk04 = 0x04,
+            EmittedPrimitives = 0x10, // Not tested
+            Unk11 = 0x11,
+            Unk12 = 0x12,
+            Unk13 = 0x13,
+            Unk15 = 0x15,
+            Unk16 = 0x16,
+            Unk17 = 0x17,
+            Unk18 = 0x18,
+            Unk1A = 0x1A,
+            Unk1B = 0x1B,
+            Unk1C = 0x1C,
+            Unk1D = 0x1D,
+            Unk1E = 0x1E,
+            GeneratedPrimitives = 0x1F,
+        };
+
        struct Cull {
            enum class FrontFace : u32 {
                ClockWise = 0x0900,
@@ -704,8 +725,8 @@ public:
                INSERT_UNION_PADDING_WORDS(0x15);

                s32 stencil_back_func_ref;
-                u32 stencil_back_func_mask;
                u32 stencil_back_mask;
+                u32 stencil_back_func_mask;

                INSERT_UNION_PADDING_WORDS(0xC);

@@ -858,11 +879,19 @@ public:
                    BitField<7, 1, u32> c7;
                } clip_distance_enabled;

-                INSERT_UNION_PADDING_WORDS(0x1);
+                u32 samplecnt_enable;

                float point_size;

-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x1);
+
+                u32 point_sprite_enable;
+
+                INSERT_UNION_PADDING_WORDS(0x3);
+
+                CounterReset counter_reset;
+
+                INSERT_UNION_PADDING_WORDS(0x1);

                u32 zeta_enable;

@@ -1077,7 +1106,7 @@ public:
                    u32 query_sequence;
                    union {
                        u32 raw;
-                        BitField<0, 2, QueryMode> mode;
+                        BitField<0, 2, QueryOperation> operation;
                        BitField<4, 1, u32> fence;
                        BitField<12, 4, QueryUnit> unit;
                        BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1409,9 +1438,15 @@ private:
    /// Handles a write to the QUERY_GET register.
    void ProcessQueryGet();

-    // Handles Conditional Rendering
+    /// Writes the query result accordingly.
+    void StampQueryResult(u64 payload, bool long_query);
+
+    /// Handles conditional rendering.
    void ProcessQueryCondition();

+    /// Handles counter resets.
+    void ProcessCounterReset();
+
    /// Handles writes to syncing register.
    void ProcessSyncPoint();

@@ -1428,6 +1463,9 @@ private:

    // Handles a instance drawcall from MME
    void StepInstance(MMEDrawMode expected_mode, u32 count);
+
+    /// Returns a query's value or an empty object if the value will be deferred through a cache.
+    std::optional<u64> GetQueryResult();
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
@@ -1458,8 +1496,8 @@ ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
 ASSERT_REG_POSITION(patch_vertices, 0x373);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
-ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
-ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
+ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
+ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
@@ -1493,7 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
+ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 ASSERT_REG_POSITION(point_size, 0x546);
+ASSERT_REG_POSITION(point_sprite_enable, 0x548);
+ASSERT_REG_POSITION(counter_reset, 0x54C);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1677,11 +1677,11 @@ union Instruction {
    } xmad;

    union {
-        BitField<20, 14, u64> offset;
+        BitField<20, 14, u64> shifted_offset;
        BitField<34, 5, u64> index;

        u64 GetOffset() const {
-            return offset * 4;
+            return shifted_offset * 4;
        }
    } cbuf34;

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
    return true;
 }

+u64 GPU::GetTicks() const {
+    // This values were reversed engineered by fincs from NVN
+    // The gpu clock is reported in units of 385/625 nanoseconds
+    constexpr u64 gpu_ticks_num = 384;
+    constexpr u64 gpu_ticks_den = 625;
+
+    const u64 cpu_ticks = system.CoreTiming().GetTicks();
+    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
 void GPU::FlushCommands() {
    renderer.Rasterizer().FlushCommands();
 }
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = system.CoreTiming().GetTicks();
+        block.timestamp = GetTicks();
        memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                   sizeof(block));
    } else {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:

    bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);

+    u64 GetTicks() const;
+
    std::unique_lock<std::mutex> LockSync() {
        return std::unique_lock{sync_mutex};
    }
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -86,7 +86,7 @@ struct CommandDataContainer {
 struct SynchState final {
    std::atomic_bool is_running{true};

-    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
    CommandQueue queue;
    u64 last_fence{};
    std::atomic<u64> signaled_fence{};
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

@@ -84,7 +85,9 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    const auto cpu_addr = GpuToCpuAddress(gpu_addr);
    ASSERT(cpu_addr);

-    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
+    // Flush and invalidate through the GPU interface, to be asynchronous if possible.
+    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
+
    UnmapRange(gpu_addr, aligned_size);
    ASSERT(system.CurrentProcess()
               ->VMManager()
@@ -242,6 +245,8 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+            // Flush must happen on the rasterizer interface, such that memory is always synchronous
+            // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
            std::memcpy(dest_buffer, src_ptr, copy_amount);
            break;
@@ -292,6 +297,8 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+            // Invalidate must happen on the rasterizer interface, such that memory is always
+            // synchronous when it is written (even when in asynchronous GPU mode).
            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
            std::memcpy(dest_ptr, src_buffer, copy_amount);
            break;
@@ -339,6 +346,8 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::

        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
+            // Flush must happen on the rasterizer interface, such that memory is always synchronous
+            // when it is copied (even when in asynchronous GPU mode).
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
            WriteBlock(dest_addr, src_ptr, copy_amount);
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,359 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class QueryCache, class HostCounter>
+class CounterStreamBase {
+public:
+    explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
+        : cache{cache}, type{type} {}
+
+    /// Updates the state of the stream, enabling or disabling as needed.
+    void Update(bool enabled) {
+        if (enabled) {
+            Enable();
+        } else {
+            Disable();
+        }
+    }
+
+    /// Resets the stream to zero. It doesn't disable the query after resetting.
+    void Reset() {
+        if (current) {
+            current->EndQuery();
+
+            // Immediately start a new query to avoid disabling its state.
+            current = cache.Counter(nullptr, type);
+        }
+        last = nullptr;
+    }
+
+    /// Returns the current counter slicing as needed.
+    std::shared_ptr<HostCounter> Current() {
+        if (!current) {
+            return nullptr;
+        }
+        current->EndQuery();
+        last = std::move(current);
+        current = cache.Counter(last, type);
+        return last;
+    }
+
+    /// Returns true when the counter stream is enabled.
+    bool IsEnabled() const {
+        return current != nullptr;
+    }
+
+private:
+    /// Enables the stream.
+    void Enable() {
+        if (current) {
+            return;
+        }
+        current = cache.Counter(last, type);
+    }
+
+    // Disables the stream.
+    void Disable() {
+        if (current) {
+            current->EndQuery();
+        }
+        last = std::exchange(current, nullptr);
+    }
+
+    QueryCache& cache;
+    const VideoCore::QueryType type;
+
+    std::shared_ptr<HostCounter> current;
+    std::shared_ptr<HostCounter> last;
+};
+
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
+          class QueryPool>
+class QueryCacheBase {
+public:
+    explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+        : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
+                                                      static_cast<QueryCache&>(*this),
+                                                      VideoCore::QueryType::SamplesPassed}}} {}
+
+    void InvalidateRegion(CacheAddr addr, std::size_t size) {
+        std::unique_lock lock{mutex};
+        FlushAndRemoveRegion(addr, size);
+    }
+
+    void FlushRegion(CacheAddr addr, std::size_t size) {
+        std::unique_lock lock{mutex};
+        FlushAndRemoveRegion(addr, size);
+    }
+
+    /**
+     * Records a query in GPU mapped memory, potentially marked with a timestamp.
+     * @param gpu_addr  GPU address to flush to when the mapped memory is read.
+     * @param type      Query type, e.g. SamplesPassed.
+     * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
+     */
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
+        std::unique_lock lock{mutex};
+        auto& memory_manager = system.GPU().MemoryManager();
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+
+        CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
+        if (!query) {
+            const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+            ASSERT_OR_EXECUTE(cpu_addr, return;);
+
+            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
+        }
+
+        query->BindCounter(Stream(type).Current(), timestamp);
+    }
+
+    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
+    void UpdateCounters() {
+        std::unique_lock lock{mutex};
+        const auto& regs = system.GPU().Maxwell3D().regs;
+        Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
+    }
+
+    /// Resets a counter to zero. It doesn't disable the query after resetting.
+    void ResetCounter(VideoCore::QueryType type) {
+        std::unique_lock lock{mutex};
+        Stream(type).Reset();
+    }
+
+    /// Disable all active streams. Expected to be called at the end of a command buffer.
+    void DisableStreams() {
+        std::unique_lock lock{mutex};
+        for (auto& stream : streams) {
+            stream.Update(false);
+        }
+    }
+
+    /// Returns a new host counter.
+    std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
+                                         VideoCore::QueryType type) {
+        return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
+                                             type);
+    }
+
+    /// Returns the counter stream of the specified type.
+    CounterStream& Stream(VideoCore::QueryType type) {
+        return streams[static_cast<std::size_t>(type)];
+    }
+
+    /// Returns the counter stream of the specified type.
+    const CounterStream& Stream(VideoCore::QueryType type) const {
+        return streams[static_cast<std::size_t>(type)];
+    }
+
+protected:
+    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
+
+private:
+    /// Flushes a memory range to guest memory and removes it from the cache.
+    void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
+        const u64 addr_begin = static_cast<u64>(addr);
+        const u64 addr_end = addr_begin + static_cast<u64>(size);
+        const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
+            const u64 cache_begin = query.GetCacheAddr();
+            const u64 cache_end = cache_begin + query.SizeInBytes();
+            return cache_begin < addr_end && addr_begin < cache_end;
+        };
+
+        const u64 page_end = addr_end >> PAGE_SHIFT;
+        for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+            const auto& it = cached_queries.find(page);
+            if (it == std::end(cached_queries)) {
+                continue;
+            }
+            auto& contents = it->second;
+            for (auto& query : contents) {
+                if (!in_range(query)) {
+                    continue;
+                }
+                rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
+                query.Flush();
+            }
+            contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
+                           std::end(contents));
+        }
+    }
+
+    /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
+    CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
+        rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
+        const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
+        return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
+                                                  host_ptr);
+    }
+
+    /// Tries to a get a cached query. Returns nullptr on failure.
+    CachedQuery* TryGet(CacheAddr addr) {
+        const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+        const auto it = cached_queries.find(page);
+        if (it == std::end(cached_queries)) {
+            return nullptr;
+        }
+        auto& contents = it->second;
+        const auto found =
+            std::find_if(std::begin(contents), std::end(contents),
+                         [addr](auto& query) { return query.GetCacheAddr() == addr; });
+        return found != std::end(contents) ? &*found : nullptr;
+    }
+
+    static constexpr std::uintptr_t PAGE_SIZE = 4096;
+    static constexpr unsigned PAGE_SHIFT = 12;
+
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
+
+    std::recursive_mutex mutex;
+
+    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
+
+    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+};
+
+template <class QueryCache, class HostCounter>
+class HostCounterBase {
+public:
+    explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
+        : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
+        // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
+        constexpr u64 depth_threshold = 96;
+        if (depth > depth_threshold) {
+            depth = 0;
+            base_result = dependency->Query();
+            dependency = nullptr;
+        }
+    }
+    virtual ~HostCounterBase() = default;
+
+    /// Returns the current value of the query.
+    u64 Query() {
+        if (result) {
+            return *result;
+        }
+
+        u64 value = BlockingQuery() + base_result;
+        if (dependency) {
+            value += dependency->Query();
+            dependency = nullptr;
+        }
+
+        result = value;
+        return *result;
+    }
+
+    /// Returns true when flushing this query will potentially wait.
+    bool WaitPending() const noexcept {
+        return result.has_value();
+    }
+
+    u64 Depth() const noexcept {
+        return depth;
+    }
+
+protected:
+    /// Returns the value of query from the backend API blocking as needed.
+    virtual u64 BlockingQuery() const = 0;
+
+private:
+    std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
+    std::optional<u64> result;               ///< Filled with the already returned value.
+    u64 depth;                               ///< Number of nested dependencies.
+    u64 base_result = 0;                     ///< Equivalent to nested dependencies value.
+};
+
+template <class HostCounter>
+class CachedQueryBase {
+public:
+    explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
+        : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
+    virtual ~CachedQueryBase() = default;
+
+    CachedQueryBase(CachedQueryBase&&) noexcept = default;
+    CachedQueryBase(const CachedQueryBase&) = delete;
+
+    CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
+    CachedQueryBase& operator=(const CachedQueryBase&) = delete;
+
+    /// Flushes the query to guest memory.
+    virtual void Flush() {
+        // When counter is nullptr it means that it's just been reseted. We are supposed to write a
+        // zero in these cases.
+        const u64 value = counter ? counter->Query() : 0;
+        std::memcpy(host_ptr, &value, sizeof(u64));
+
+        if (timestamp) {
+            std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
+        }
+    }
+
+    /// Binds a counter to this query.
+    void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
+        if (counter) {
+            // If there's an old counter set it means the query is being rewritten by the game.
+            // To avoid losing the data forever, flush here.
+            Flush();
+        }
+        counter = std::move(counter_);
+        timestamp = timestamp_;
+    }
+
+    VAddr CpuAddr() const noexcept {
+        return cpu_addr;
+    }
+
+    CacheAddr GetCacheAddr() const noexcept {
+        return ToCacheAddr(host_ptr);
+    }
+
+    u64 SizeInBytes() const noexcept {
+        return SizeInBytes(timestamp.has_value());
+    }
+
+    static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
+        return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
+    }
+
+protected:
+    /// Returns true when querying the counter may potentially block.
+    bool WaitPending() const noexcept {
+        return counter && counter->WaitPending();
+    }
+
+private:
+    static constexpr std::size_t SMALL_QUERY_SIZE = 8;   // Query size without timestamp.
+    static constexpr std::size_t LARGE_QUERY_SIZE = 16;  // Query size with timestamp.
+    static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
+
+    VAddr cpu_addr;                       ///< Guest CPU address.
+    u8* host_ptr;                         ///< Writable host pointer.
+    std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
+    std::optional<u64> timestamp;         ///< Timestamp to flush to guest memory.
+};
+
+} // namespace VideoCommon
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@

 #include <atomic>
 #include <functional>
+#include <optional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
@@ -17,6 +18,11 @@ class MemoryManager;

 namespace VideoCore {

+enum class QueryType {
+    SamplesPassed,
+};
+constexpr std::size_t NumQueryTypes = 1;
+
 enum class LoadCallbackStage {
    Prepare,
    Decompile,
@@ -29,11 +35,8 @@ class RasterizerInterface {
 public:
    virtual ~RasterizerInterface() {}

-    /// Draw the current batch of vertex arrays
-    virtual bool DrawBatch(bool is_indexed) = 0;
-
-    /// Draw the current batch of multiple instances of vertex arrays
-    virtual bool DrawMultiBatch(bool is_indexed) = 0;
+    /// Dispatches a draw invocation
+    virtual void Draw(bool is_indexed, bool is_instanced) = 0;

    /// Clear the current framebuffer
    virtual void Clear() = 0;
@@ -41,6 +44,12 @@ public:
    /// Dispatches a compute shader invocation
    virtual void DispatchCompute(GPUVAddr code_addr) = 0;

+    /// Resets the counter of a query
+    virtual void ResetCounter(QueryType type) = 0;
+
+    /// Records a GPU query and caches it
+    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
+
    /// Notify rasterizer that all caches should be flushed to Switch memory
    virtual void FlushAll() = 0;

--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -0,0 +1,120 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
+
+namespace OpenGL {
+
+namespace {
+
+constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
+
+constexpr GLenum GetTarget(VideoCore::QueryType type) {
+    return QueryTargets[static_cast<std::size_t>(type)];
+}
+
+} // Anonymous namespace
+
+QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
+    : VideoCommon::QueryCacheBase<
+          QueryCache, CachedQuery, CounterStream, HostCounter,
+          std::vector<OGLQuery>>{system,
+                                 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
+      gl_rasterizer{gl_rasterizer} {}
+
+QueryCache::~QueryCache() = default;
+
+OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
+    auto& reserve = query_pools[static_cast<std::size_t>(type)];
+    OGLQuery query;
+    if (reserve.empty()) {
+        query.Create(GetTarget(type));
+        return query;
+    }
+
+    query = std::move(reserve.back());
+    reserve.pop_back();
+    return query;
+}
+
+void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
+    query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
+}
+
+bool QueryCache::AnyCommandQueued() const noexcept {
+    return gl_rasterizer.AnyCommandQueued();
+}
+
+HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type)
+    : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+      type{type}, query{cache.AllocateQuery(type)} {
+    glBeginQuery(GetTarget(type), query.handle);
+}
+
+HostCounter::~HostCounter() {
+    cache.Reserve(type, std::move(query));
+}
+
+void HostCounter::EndQuery() {
+    if (!cache.AnyCommandQueued()) {
+        // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
+        // having any of these causes a lock. glFlush is considered a command, so we can safely wait
+        // for this. Insert to the OpenGL command stream a flush.
+        glFlush();
+    }
+    glEndQuery(GetTarget(type));
+}
+
+u64 HostCounter::BlockingQuery() const {
+    GLint64 value;
+    glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
+    return static_cast<u64>(value);
+}
+
+CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
+    : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+
+CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
+    : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
+
+CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
+    VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
+    cache = rhs.cache;
+    type = rhs.type;
+    return *this;
+}
+
+void CachedQuery::Flush() {
+    // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
+    // To avoid this disable and re-enable keeping the dependency stream.
+    // But we only have to do this if we have pending waits to be done.
+    auto& stream = cache->Stream(type);
+    const bool slice_counter = WaitPending() && stream.IsEnabled();
+    if (slice_counter) {
+        stream.Update(false);
+    }
+
+    VideoCommon::CachedQueryBase<HostCounter>::Flush();
+
+    if (slice_counter) {
+        stream.Update(true);
+    }
+}
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -0,0 +1,78 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/query_cache.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace OpenGL {
+
+class CachedQuery;
+class HostCounter;
+class QueryCache;
+class RasterizerOpenGL;
+
+using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
+
+class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
+                                                            HostCounter, std::vector<OGLQuery>> {
+public:
+    explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+    ~QueryCache();
+
+    OGLQuery AllocateQuery(VideoCore::QueryType type);
+
+    void Reserve(VideoCore::QueryType type, OGLQuery&& query);
+
+    bool AnyCommandQueued() const noexcept;
+
+private:
+    RasterizerOpenGL& gl_rasterizer;
+};
+
+class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
+public:
+    explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type);
+    ~HostCounter();
+
+    void EndQuery();
+
+private:
+    u64 BlockingQuery() const override;
+
+    QueryCache& cache;
+    const VideoCore::QueryType type;
+    OGLQuery query;
+};
+
+class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
+public:
+    explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
+                         u8* host_ptr);
+    CachedQuery(CachedQuery&& rhs) noexcept;
+    CachedQuery(const CachedQuery&) = delete;
+
+    CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+    CachedQuery& operator=(const CachedQuery&) = delete;
+
+    void Flush() override;
+
+private:
+    QueryCache* cache;
+    VideoCore::QueryType type;
+};
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,6 +25,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
                                   ScreenInfo& info)
    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
-      shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+      shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
+      screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
    shader_program_manager = std::make_unique<GLShader::ProgramManager>();
    state.draw.shader_program = 0;
    state.Apply();
@@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() {
    } else if (use_stencil) {
        glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
    }
+
+    ++num_queued_commands;
 }

 void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    MICROPROFILE_SCOPE(OpenGL_Drawing);
    auto& gpu = system.GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+
+    query_cache.UpdateCounters();

    SyncRasterizeEnable(state);
    SyncColorMask();
@@ -611,7 +617,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {

    // Setup shaders and their used resources.
    texture_cache.GuardSamplers(true);
-    const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+    const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
    SetupShaders(primitive_mode);
    texture_cache.GuardSamplers(false);

@@ -638,35 +644,47 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
        glTextureBarrier();
    }

+    ++num_queued_commands;
+
    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
    const GLsizei num_instances =
        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
    if (is_indexed) {
-        const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
        const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
-        glDrawElementsInstancedBaseVertexBaseInstance(
-            primitive_mode, num_vertices, index_format,
-            reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
-            base_instance);
+        const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
+        const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+        if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
+            glDrawElements(primitive_mode, num_vertices, format, offset);
+        } else if (num_instances == 1 && base_instance == 0) {
+            glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
+        } else if (base_vertex == 0 && base_instance == 0) {
+            glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
+        } else if (base_vertex == 0) {
+            glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
+                                                num_instances, base_instance);
+        } else if (base_instance == 0) {
+            glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
+                                              num_instances, base_vertex);
+        } else {
+            glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
+                                                          offset, num_instances, base_vertex,
+                                                          base_instance);
+        }
    } else {
        const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
-        glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
-                                          base_instance);
+        if (num_instances == 1 && base_instance == 0) {
+            glDrawArrays(primitive_mode, base_vertex, num_vertices);
+        } else if (base_instance == 0) {
+            glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
+        } else {
+            glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
+                                              num_instances, base_instance);
+        }
    }
 }

-bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
-    Draw(is_indexed, false);
-    return true;
-}
-
-bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
-    Draw(is_indexed, true);
-    return true;
-}
-
 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
    if (device.HasBrokenCompute()) {
        return;
@@ -707,6 +725,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
    state.ApplyProgramPipeline();

    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+    ++num_queued_commands;
+}
+
+void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
+    query_cache.ResetCounter(type);
+}
+
+void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+                             std::optional<u64> timestamp) {
+    query_cache.Query(gpu_addr, type, timestamp);
 }

 void RasterizerOpenGL::FlushAll() {}
@@ -718,6 +746,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
    }
    texture_cache.FlushRegion(addr, size);
    buffer_cache.FlushRegion(addr, size);
+    query_cache.FlushRegion(addr, size);
 }

 void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -728,6 +757,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
    texture_cache.InvalidateRegion(addr, size);
    shader_cache.InvalidateRegion(addr, size);
    buffer_cache.InvalidateRegion(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }

 void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -738,10 +768,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
 }

 void RasterizerOpenGL::FlushCommands() {
+    // Only flush when we have commands queued to OpenGL.
+    if (num_queued_commands == 0) {
+        return;
+    }
+    num_queued_commands = 0;
    glFlush();
 }

 void RasterizerOpenGL::TickFrame() {
+    // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
+    num_queued_commands = 0;
+
    buffer_cache.TickFrame();
 }

@@ -1220,6 +1258,7 @@ void RasterizerOpenGL::SyncPointState() {
    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
    // in OpenGL).
    state.point.program_control = regs.vp_point_size.enable != 0;
+    state.point.sprite = regs.point_sprite_enable != 0;
    state.point.size = std::max(1.0f, regs.point_size);
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,6 +24,7 @@
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_sampler_cache.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -57,10 +58,11 @@ public:
                              ScreenInfo& info);
    ~RasterizerOpenGL() override;

-    bool DrawBatch(bool is_indexed) override;
-    bool DrawMultiBatch(bool is_indexed) override;
+    void Draw(bool is_indexed, bool is_instanced) override;
    void Clear() override;
    void DispatchCompute(GPUVAddr code_addr) override;
+    void ResetCounter(VideoCore::QueryType type) override;
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void FlushAll() override;
    void FlushRegion(CacheAddr addr, u64 size) override;
    void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -75,6 +77,11 @@ public:
    void LoadDiskResources(const std::atomic_bool& stop_loading,
                           const VideoCore::DiskResourceLoadCallback& callback) override;

+    /// Returns true when there are commands queued to the OpenGL server.
+    bool AnyCommandQueued() const {
+        return num_queued_commands > 0;
+    }
+
 private:
    /// Configures the color and depth framebuffer states.
    void ConfigureFramebuffers();
@@ -102,9 +109,6 @@ private:
    void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
                           std::size_t size);

-    /// Syncs all the state, shaders, render targets and textures setting before a draw call.
-    void Draw(bool is_indexed, bool is_instanced);
-
    /// Configures the current textures to use for the draw command.
    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);

@@ -180,10 +184,23 @@ private:
    /// Syncs the alpha test state to match the guest state
    void SyncAlphaTest();

-    /// Check for extension that are not strictly required
-    /// but are needed for correct emulation
+    /// Check for extension that are not strictly required but are needed for correct emulation
    void CheckExtensions();

+    std::size_t CalculateVertexArraysSize() const;
+
+    std::size_t CalculateIndexBufferSize() const;
+
+    /// Updates and returns a vertex array object representing current vertex format
+    GLuint SetupVertexFormat();
+
+    void SetupVertexBuffer(GLuint vao);
+    void SetupVertexInstances(GLuint vao);
+
+    GLintptr SetupIndexBuffer();
+
+    void SetupShaders(GLenum primitive_mode);
+
    const Device device;
    OpenGLState state;

@@ -191,6 +208,7 @@ private:
    ShaderCacheOpenGL shader_cache;
    SamplerCacheOpenGL sampler_cache;
    FramebufferCacheOpenGL framebuffer_cache;
+    QueryCache query_cache;

    Core::System& system;
    ScreenInfo& screen_info;
@@ -208,19 +226,8 @@ private:
    BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
    BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};

-    std::size_t CalculateVertexArraysSize() const;
-
-    std::size_t CalculateIndexBufferSize() const;
-
-    /// Updates and returns a vertex array object representing current vertex format
-    GLuint SetupVertexFormat();
-
-    void SetupVertexBuffer(GLuint vao);
-    void SetupVertexInstances(GLuint vao);
-
-    GLintptr SetupIndexBuffer();
-
-    void SetupShaders(GLenum primitive_mode);
+    /// Number of commands queued to the OpenGL driver. Reseted on flush.
+    std::size_t num_queued_commands = 0;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -207,4 +207,21 @@ void OGLFramebuffer::Release() {
    handle = 0;
 }

+void OGLQuery::Create(GLenum target) {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glCreateQueries(target, 1, &handle);
+}
+
+void OGLQuery::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteQueries(1, &handle);
+    handle = 0;
+}
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -266,4 +266,29 @@ public:
    GLuint handle = 0;
 };

+class OGLQuery : private NonCopyable {
+public:
+    OGLQuery() = default;
+
+    OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+    ~OGLQuery() {
+        Release();
+    }
+
+    OGLQuery& operator=(OGLQuery&& o) noexcept {
+        Release();
+        handle = std::exchange(o.handle, 0);
+        return *this;
+    }
+
+    /// Creates a new internal OpenGL resource and stores the handle
+    void Create(GLenum target);
+
+    /// Deletes the internal OpenGL resource
+    void Release();
+
+    GLuint handle = 0;
+};
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() {

 void OpenGLState::ApplyPointSize() {
    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
+    Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
    if (UpdateValue(cur_state.point.size, point.size)) {
        glPointSize(point.size);
    }
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -132,6 +132,7 @@ public:

    struct {
        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        bool sprite = false;          // GL_POINT_SPRITE
        GLfloat size = 1.0f;          // GL_POINT_SIZE
    } point;

--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_UNSIGNED_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::SignedInt:
@@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::Float:
@@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
            return GL_FLOAT;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (attrib.size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return GL_UNSIGNED_BYTE;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
-        UNREACHABLE();
+        LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
        return {};
    }
 }
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
    features.depthBiasClamp = true;
    features.geometryShader = true;
    features.tessellationShader = true;
+    features.occlusionQueryPrecise = true;
    features.fragmentStoresAndAtomics = true;
    features.shaderImageGatherExtended = true;
    features.shaderStorageImageWriteWithoutFormat = true;
@@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
    bit8_storage.uniformAndStorageBuffer8BitAccess = true;
    SetNext(next, bit8_storage);

+    vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
+    host_query_reset.hostQueryReset = true;
+    SetNext(next, host_query_reset);
+
    vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
    if (is_float16_supported) {
        float16_int8.shaderFloat16 = true;
@@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
        VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
        VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
        VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
+        VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
    };
    std::bitset<required_extensions.size()> available_extensions{};

@@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
        std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
        std::make_pair(features.geometryShader, "geometryShader"),
        std::make_pair(features.tessellationShader, "tessellationShader"),
+        std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
        std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
        std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
        std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
        }
    };

-    extensions.reserve(13);
+    extensions.reserve(14);
    extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
    extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
    extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
    extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
    extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
    extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
+    extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);

    [[maybe_unused]] const bool nsight =
        std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -0,0 +1,122 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+namespace {
+
+constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
+
+constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
+    return QUERY_TARGETS[static_cast<std::size_t>(type)];
+}
+
+} // Anonymous namespace
+
+QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+
+QueryPool::~QueryPool() = default;
+
+void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
+    device = &device_;
+    type = type_;
+}
+
+std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
+    std::size_t index;
+    do {
+        index = CommitResource(fence);
+    } while (usage[index]);
+    usage[index] = true;
+
+    return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
+}
+
+void QueryPool::Allocate(std::size_t begin, std::size_t end) {
+    usage.resize(end);
+
+    const auto dev = device->GetLogical();
+    const u32 size = static_cast<u32>(end - begin);
+    const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
+    pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
+}
+
+void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
+    const auto it =
+        std::find_if(std::begin(pools), std::end(pools),
+                     [query_pool = query.first](auto& pool) { return query_pool == *pool; });
+    ASSERT(it != std::end(pools));
+
+    const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
+    usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
+}
+
+VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                           const VKDevice& device, VKScheduler& scheduler)
+    : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
+                                  QueryPool>{system, rasterizer},
+      device{device}, scheduler{scheduler} {
+    for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
+        query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+    }
+}
+
+VKQueryCache::~VKQueryCache() = default;
+
+std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
+    return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+}
+
+void VKQueryCache::Reserve(VideoCore::QueryType type,
+                           std::pair<vk::QueryPool, std::uint32_t> query) {
+    query_pools[static_cast<std::size_t>(type)].Reserve(query);
+}
+
+HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type)
+    : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
+      type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
+    const auto dev = cache.Device().GetLogical();
+    cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
+        dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
+        cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
+    });
+}
+
+HostCounter::~HostCounter() {
+    cache.Reserve(type, query);
+}
+
+void HostCounter::EndQuery() {
+    cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
+        cmdbuf.endQuery(query.first, query.second, dld);
+    });
+}
+
+u64 HostCounter::BlockingQuery() const {
+    if (ticks >= cache.Scheduler().Ticks()) {
+        cache.Scheduler().Flush();
+    }
+
+    const auto dev = cache.Device().GetLogical();
+    const auto& dld = cache.Device().GetDispatchLoader();
+    u64 value;
+    dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
+                            vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
+    return value;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -0,0 +1,104 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/query_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class CachedQuery;
+class HostCounter;
+class VKDevice;
+class VKQueryCache;
+class VKScheduler;
+
+using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
+
+class QueryPool final : public VKFencedPool {
+public:
+    explicit QueryPool();
+    ~QueryPool() override;
+
+    void Initialize(const VKDevice& device, VideoCore::QueryType type);
+
+    std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
+
+    void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    static constexpr std::size_t GROW_STEP = 512;
+
+    const VKDevice* device = nullptr;
+    VideoCore::QueryType type = {};
+
+    std::vector<UniqueQueryPool> pools;
+    std::vector<bool> usage;
+};
+
+class VKQueryCache final
+    : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
+                                         QueryPool> {
+public:
+    explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                          const VKDevice& device, VKScheduler& scheduler);
+    ~VKQueryCache();
+
+    std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
+
+    void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
+
+    const VKDevice& Device() const noexcept {
+        return device;
+    }
+
+    VKScheduler& Scheduler() const noexcept {
+        return scheduler;
+    }
+
+private:
+    const VKDevice& device;
+    VKScheduler& scheduler;
+};
+
+class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
+public:
+    explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
+                         VideoCore::QueryType type);
+    ~HostCounter();
+
+    void EndQuery();
+
+private:
+    u64 BlockingQuery() const override;
+
+    VKQueryCache& cache;
+    const VideoCore::QueryType type;
+    const std::pair<vk::QueryPool, std::uint32_t> query;
+    const u64 ticks;
+};
+
+class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
+public:
+    explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
+        : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -289,25 +289,19 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
                    staging_pool),
      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
-      sampler_cache(device) {}
+      sampler_cache(device), query_cache(system, *this, device, scheduler) {
+    scheduler.SetQueryCache(query_cache);
+}

 RasterizerVulkan::~RasterizerVulkan() = default;

-bool RasterizerVulkan::DrawBatch(bool is_indexed) {
-    Draw(is_indexed, false);
-    return true;
-}
-
-bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
-    Draw(is_indexed, true);
-    return true;
-}
-
 void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    MICROPROFILE_SCOPE(Vulkan_Drawing);

    FlushWork();

+    query_cache.UpdateCounters();
+
    const auto& gpu = system.GPU().Maxwell3D();
    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};

@@ -362,6 +356,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 void RasterizerVulkan::Clear() {
    MICROPROFILE_SCOPE(Vulkan_Clearing);

+    query_cache.UpdateCounters();
+
    const auto& gpu = system.GPU().Maxwell3D();
    if (!system.GPU().Maxwell3D().ShouldExecute()) {
        return;
@@ -429,6 +425,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    sampled_views.clear();
    image_views.clear();

+    query_cache.UpdateCounters();
+
    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
    const ComputePipelineCacheKey key{
        code_addr,
@@ -471,17 +469,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    });
 }

+void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
+    query_cache.ResetCounter(type);
+}
+
+void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
+                             std::optional<u64> timestamp) {
+    query_cache.Query(gpu_addr, type, timestamp);
+}
+
 void RasterizerVulkan::FlushAll() {}

 void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
    texture_cache.FlushRegion(addr, size);
    buffer_cache.FlushRegion(addr, size);
+    query_cache.FlushRegion(addr, size);
 }

 void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
    texture_cache.InvalidateRegion(addr, size);
    pipeline_cache.InvalidateRegion(addr, size);
    buffer_cache.InvalidateRegion(addr, size);
+    query_cache.InvalidateRegion(addr, size);
 }

 void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -24,6 +24,7 @@
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -96,7 +97,7 @@ struct ImageView {
    vk::ImageLayout* layout = nullptr;
 };

-class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
 public:
    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
                              VKScreenInfo& screen_info, const VKDevice& device,
@@ -104,10 +105,11 @@ public:
                              VKScheduler& scheduler);
    ~RasterizerVulkan() override;

-    bool DrawBatch(bool is_indexed) override;
-    bool DrawMultiBatch(bool is_indexed) override;
+    void Draw(bool is_indexed, bool is_instanced) override;
    void Clear() override;
    void DispatchCompute(GPUVAddr code_addr) override;
+    void ResetCounter(VideoCore::QueryType type) override;
+    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void FlushAll() override;
    void FlushRegion(CacheAddr addr, u64 size) override;
    void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -140,8 +142,6 @@ private:

    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;

-    void Draw(bool is_indexed, bool is_instanced);
-
    void FlushWork();

    Texceptions UpdateAttachments();
@@ -247,6 +247,7 @@ private:
    VKPipelineCache pipeline_cache;
    VKBufferCache buffer_cache;
    VKSamplerCache sampler_cache;
+    VKQueryCache query_cache;

    std::array<View, Maxwell::NumRenderTargets> color_attachments;
    View zeta_attachment;
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_query_cache.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"

@@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
 }

 void VKScheduler::AllocateNewContext() {
+    ++ticks;
+
    std::unique_lock lock{mutex};
    current_fence = next_fence;
    next_fence = &resource_manager.CommitFence();
@@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
                         device.GetDispatchLoader());
+    // Enable counters once again. These are disabled when a command buffer is finished.
+    if (query_cache) {
+        query_cache->UpdateCounters();
+    }
 }

 void VKScheduler::InvalidateState() {
@@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
 }

 void VKScheduler::EndPendingOperations() {
+    query_cache->DisableStreams();
    EndRenderPass();
 }

--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <atomic>
 #include <condition_variable>
 #include <memory>
 #include <optional>
@@ -18,6 +19,7 @@ namespace Vulkan {

 class VKDevice;
 class VKFence;
+class VKQueryCache;
 class VKResourceManager;

 class VKFenceView {
@@ -67,6 +69,11 @@ public:
    /// Binds a pipeline to the current execution context.
    void BindGraphicsPipeline(vk::Pipeline pipeline);

+    /// Assigns the query cache.
+    void SetQueryCache(VKQueryCache& query_cache_) {
+        query_cache = &query_cache_;
+    }
+
    /// Returns true when viewports have been set in the current command buffer.
    bool TouchViewports() {
        return std::exchange(state.viewports, true);
@@ -112,6 +119,11 @@ public:
        return current_fence;
    }

+    /// Returns the current command buffer tick.
+    u64 Ticks() const {
+        return ticks;
+    }
+
 private:
    class Command {
    public:
@@ -205,6 +217,8 @@ private:

    const VKDevice& device;
    VKResourceManager& resource_manager;
+    VKQueryCache* query_cache = nullptr;
+
    vk::CommandBuffer current_cmdbuf;
    VKFence* current_fence = nullptr;
    VKFence* next_fence = nullptr;
@@ -227,6 +241,7 @@ private:
    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
    std::mutex mutex;
    std::condition_variable cv;
+    std::atomic<u64> ticks = 0;
    bool quit = false;
 };

--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -275,12 +275,14 @@ public:
        AddCapability(spv::Capability::ImageGatherExtended);
        AddCapability(spv::Capability::SampledBuffer);
        AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
+        AddCapability(spv::Capability::DrawParameters);
        AddCapability(spv::Capability::SubgroupBallotKHR);
        AddCapability(spv::Capability::SubgroupVoteKHR);
        AddExtension("SPV_KHR_shader_ballot");
        AddExtension("SPV_KHR_subgroup_vote");
        AddExtension("SPV_KHR_storage_buffer_storage_class");
        AddExtension("SPV_KHR_variable_pointers");
+        AddExtension("SPV_KHR_shader_draw_parameters");

        if (ir.UsesViewportIndex()) {
            AddCapability(spv::Capability::MultiViewport);
@@ -492,9 +494,11 @@ private:
        interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));

        // Declare input attributes
-        vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_uint, "vertex_index");
+        vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
        instance_index =
-            DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_uint, "instance_index");
+            DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
+        base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
+        base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
    }

    void DeclareTessControl() {
@@ -1068,9 +1072,12 @@ private:
                    return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
                            Type::Float};
                case 2:
-                    return {OpLoad(t_uint, instance_index), Type::Uint};
+                    return {
+                        OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
+                        Type::Int};
                case 3:
-                    return {OpLoad(t_uint, vertex_index), Type::Uint};
+                    return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
+                            Type::Int};
                }
                UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
                return {Constant(t_uint, 0U), Type::Uint};
@@ -2542,6 +2549,8 @@ private:

    Id instance_index{};
    Id vertex_index{};
+    Id base_instance{};
+    Id base_vertex{};
    std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
    Id frag_depth{};
    Id frag_coord{};
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {

        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);

-        // TODO(Rodrigo): Should precise be used when there's a postfactor?
-        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+        static constexpr std::array FmulPostFactor = {
+            1.000f, // None
+            0.500f, // Divide 2
+            0.250f, // Divide 4
+            0.125f, // Divide 8
+            8.000f, // Mul 8
+            4.000f, // Mul 4
+            2.000f, // Mul 2
+        };

        if (instr.fmul.postfactor != 0) {
-            auto postfactor = static_cast<s32>(instr.fmul.postfactor);
-
-            // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
-            // logic.
-            if (postfactor >= 4) {
-                postfactor = 7 - postfactor;
-            } else {
-                postfactor = 0 - postfactor;
-            }
-
-            if (postfactor > 0) {
-                value = Operation(OperationCode::FMul, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << postfactor)));
-            } else {
-                value = Operation(OperationCode::FDiv, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << -postfactor)));
-            }
+            op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
+                             Immediate(FmulPostFactor[instr.fmul.postfactor]));
        }

+        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
        value = GetSaturatedFloat(value, instr.alu.saturate_d);

        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
            switch (opcode->get().GetId()) {
            case OpCode::Id::ICMP_CR:
-                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                        GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_R:
                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_RC:
                return {GetRegister(instr.gpr39),
-                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
            case OpCode::Id::ICMP_IMM:
                return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
            default:
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
        switch (opcode->get().GetId()) {
        case OpCode::Id::BFI_RC:
            return {GetRegister(instr.gpr39),
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::BFI_IMM_R:
            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
        default:
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -83,14 +83,14 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {

        const bool input_signed = instr.conversion.is_input_signed;

-        if (instr.conversion.src_size == Register::Size::Byte) {
-            const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
-            if (offset > 0) {
-                value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
-                                        std::move(value), Immediate(offset));
+        if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
+            ASSERT(instr.conversion.src_size == Register::Size::Byte ||
+                   instr.conversion.src_size == Register::Size::Short);
+            if (instr.conversion.src_size == Register::Size::Short) {
+                ASSERT(offset == 0 || offset == 2);
            }
-        } else {
-            UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+            value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+                                    std::move(value), Immediate(offset * 8));
        }

        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -522,68 +522,53 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               Node array, Node depth_compare, u32 bias_offset,
                               std::vector<Node> aoffi,
                               std::optional<Tegra::Shader::Register> bindless_reg) {
-    const auto is_array = static_cast<bool>(array);
-    const auto is_shadow = static_cast<bool>(depth_compare);
+    const bool is_array = array != nullptr;
+    const bool is_shadow = depth_compare != nullptr;
    const bool is_bindless = bindless_reg.has_value();

-    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
-                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
-                         "This method is not supported.");
+    UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
+    ASSERT_MSG(texture_type != TextureType::Texture3D || is_array || is_shadow,
+               "Illegal texture type");

    const SamplerInfo info{texture_type, is_array, is_shadow, false};
-    Node index_var{};
+    Node index_var;
    const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
                                         : GetSampler(instr.sampler, info);
-    Node4 values;
-    if (sampler == nullptr) {
-        for (u32 element = 0; element < values.size(); ++element) {
-            values[element] = Immediate(0);
-        }
-        return values;
+    if (!sampler) {
+        return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
    }

    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
                            process_mode == TextureProcessMode::LLA;
-
-    // LOD selection (either via bias or explicit textureLod) not supported in GL for
-    // sampler2DArrayShadow and samplerCubeArrayShadow.
-    const bool gl_lod_supported =
-        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
-          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
-
-    const OperationCode read_method =
-        (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
-
-    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
+    const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;

    Node bias;
    Node lod;
-    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
-        switch (process_mode) {
-        case TextureProcessMode::LZ:
-            lod = Immediate(0.0f);
-            break;
-        case TextureProcessMode::LB:
-            // If present, lod or bias are always stored in the register
-            // indexed by the gpr20 field with an offset depending on the
-            // usage of the other registers
-            bias = GetRegister(instr.gpr20.Value() + bias_offset);
-            break;
-        case TextureProcessMode::LL:
-            lod = GetRegister(instr.gpr20.Value() + bias_offset);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
-            break;
-        }
+    switch (process_mode) {
+    case TextureProcessMode::None:
+        break;
+    case TextureProcessMode::LZ:
+        lod = Immediate(0.0f);
+        break;
+    case TextureProcessMode::LB:
+        // If present, lod or bias are always stored in the register indexed by the gpr20 field with
+        // an offset depending on the usage of the other registers.
+        bias = GetRegister(instr.gpr20.Value() + bias_offset);
+        break;
+    case TextureProcessMode::LL:
+        lod = GetRegister(instr.gpr20.Value() + bias_offset);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
+        break;
    }

+    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto copy_coords = coords;
        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
                         lod,      {},    element,       index_var};
-        values[element] = Operation(read_method, meta, std::move(copy_coords));
+        values[element] = Operation(opcode, meta, coords);
    }

    return values;
--- a/src/web_service/web_backend.cpp
+++ b/src/web_service/web_backend.cpp
@@ -73,14 +73,12 @@ struct Client::Impl {
                if (!parsedUrl.GetPort(&port)) {
                    port = HTTP_PORT;
                }
-                cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port,
-                                                        TIMEOUT_SECONDS);
+                cli = std::make_unique<httplib::Client>(parsedUrl.m_Host.c_str(), port);
            } else if (parsedUrl.m_Scheme == "https") {
                if (!parsedUrl.GetPort(&port)) {
                    port = HTTPS_PORT;
                }
-                cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port,
-                                                           TIMEOUT_SECONDS);
+                cli = std::make_unique<httplib::SSLClient>(parsedUrl.m_Host.c_str(), port);
            } else {
                LOG_ERROR(WebService, "Bad URL scheme {}", parsedUrl.m_Scheme);
                return Common::WebResult{Common::WebResult::Code::InvalidURL, "Bad URL scheme"};
@@ -90,6 +88,7 @@ struct Client::Impl {
            LOG_ERROR(WebService, "Invalid URL {}", host + path);
            return Common::WebResult{Common::WebResult::Code::InvalidURL, "Invalid URL"};
        }
+        cli->set_timeout_sec(TIMEOUT_SECONDS);

        httplib::Headers params;
        if (!jwt.empty()) {
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -630,6 +630,7 @@ void Config::ReadRendererValues() {
    Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
    Settings::values.resolution_factor =
        ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
+    Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
    Settings::values.use_frame_limit =
        ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
    Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
@@ -742,7 +743,6 @@ void Config::ReadUIValues() {
 void Config::ReadUIGamelistValues() {
    qt_config->beginGroup(QStringLiteral("UIGameList"));

-    UISettings::values.show_unknown = ReadSetting(QStringLiteral("show_unknown"), true).toBool();
    UISettings::values.show_add_ons = ReadSetting(QStringLiteral("show_add_ons"), true).toBool();
    UISettings::values.icon_size = ReadSetting(QStringLiteral("icon_size"), 64).toUInt();
    UISettings::values.row_1_text_id = ReadSetting(QStringLiteral("row_1_text_id"), 3).toUInt();
@@ -1065,6 +1065,7 @@ void Config::SaveRendererValues() {
    WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
    WriteSetting(QStringLiteral("resolution_factor"),
                 static_cast<double>(Settings::values.resolution_factor), 1.0);
+    WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
    WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
    WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
    WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
@@ -1159,7 +1160,6 @@ void Config::SaveUIValues() {
 void Config::SaveUIGamelistValues() {
    qt_config->beginGroup(QStringLiteral("UIGameList"));

-    WriteSetting(QStringLiteral("show_unknown"), UISettings::values.show_unknown, true);
    WriteSetting(QStringLiteral("show_add_ons"), UISettings::values.show_add_ons, true);
    WriteSetting(QStringLiteral("icon_size"), UISettings::values.icon_size, 64);
    WriteSetting(QStringLiteral("row_1_text_id"), UISettings::values.row_1_text_id, 3);
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -97,6 +97,7 @@ void ConfigureGraphics::SetConfiguration() {
    ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
    ui->resolution_factor_combobox->setCurrentIndex(
        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
+    ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
    ui->use_disk_shader_cache->setEnabled(runtime_lock);
    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
@@ -114,6 +115,7 @@ void ConfigureGraphics::ApplyConfiguration() {
    Settings::values.vulkan_device = vulkan_device;
    Settings::values.resolution_factor =
        ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
+    Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
    Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
    Settings::values.use_asynchronous_gpu_emulation =
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -138,6 +138,41 @@
          </item>
         </layout>
        </item>
+        <item>
+         <layout class="QHBoxLayout" name="horizontalLayout_6">
+          <item>
+           <widget class="QLabel" name="ar_label">
+            <property name="text">
+             <string>Aspect Ratio:</string>
+            </property>
+           </widget>
+          </item>
+          <item>
+           <widget class="QComboBox" name="aspect_ratio_combobox">
+            <item>
+             <property name="text">
+              <string>Default (16:9)</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>Force 4:3</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>Force 21:9</string>
+             </property>
+            </item>
+            <item>
+             <property name="text">
+              <string>Stretch to Window</string>
+             </property>
+            </item>
+           </widget>
+          </item>
+         </layout>
+        </item>
        <item>
         <layout class="QHBoxLayout" name="horizontalLayout_3">
          <item>
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Nguyen Dac Nam	fd02386bbc	nit: typo	2020-02-23 12:34:06 +07:00
Nguyen Dac Nam	afc4701967	shader: FMUL: add static to post factor LUT	2020-02-23 12:33:25 +07:00
Nguyen Dac Nam	952a9a0429	clang-format	2020-02-22 09:56:20 +07:00
Nguyen Dac Nam	26b928fb67	shader: move post factor LUT to function and fix incorrect order.	2020-02-22 09:43:07 +07:00
Nguyen Dac Nam	33efc9c88b	shader: remove post factor LUT.	2020-02-22 09:41:33 +07:00
Nguyen Dac Nam	fa5d1ffd18	clang-format & add missing import	2020-02-21 23:38:10 +07:00
Nguyen Dac Nam	b8c5b7afba	nit: mistype	2020-02-21 23:23:25 +07:00
Nguyen Dac Nam	f37e333fe8	Update src/video_core/engines/shader_bytecode.h Co-Authored-By: Mat M. <mathew1800@gmail.com>	2020-02-21 23:22:18 +07:00
Nguyen Dac Nam	793c5741fb	shader: FMUL apply LUT	2020-02-21 23:21:56 +07:00
Nguyen Dac Nam	bfb4950bd5	shader: add FmulPostFactor LUT table	2020-02-21 23:19:39 +07:00
bunnei	fe8e5d8ae4	Merge pull request #3438 from bunnei/gpu-mem-manager-fix video_core: memory_manager: Flush/invalidate asynchronously when possible.	2020-02-20 20:04:05 -05:00
bunnei	2342c0d50e	Merge pull request #3432 from brianclinkenbeard/update-httplib Update httplib to 0.5.5	2020-02-19 21:15:06 -05:00
bunnei	bf0c929d4c	Merge pull request #3415 from ReinUsesLisp/texture-code shader/texture: Allow 2D shadow arrays and simplify code	2020-02-19 20:06:14 -05:00
bunnei	d65fa7d65c	video_core: memory_manager: Flush/invalidate asynchronously on Unmap. - Minor perf improvement.	2020-02-19 20:03:52 -05:00
Brian Clinkenbeard	d31156931d	fix issue with windows getnameinfo()	2020-02-19 16:16:49 -08:00
bunnei	b2bc7682b4	Merge pull request #3414 from ReinUsesLisp/maxwell-3d-draw maxwell_3d: Unify draw methods	2020-02-19 16:13:50 -05:00
bunnei	c8261a1a57	Merge pull request #3411 from ReinUsesLisp/specific-funcs gl_rasterizer: Use the least generic OpenGL draw function possible	2020-02-19 15:37:41 -05:00
bunnei	fd4c5463e8	Merge pull request #3437 from namkazt/patch-5 shader_conversion: add conversion I2F for Short	2020-02-19 11:27:28 -05:00
Nguyen Dac Nam	1b6308727c	shader_conversion: I2F : add Assert for case src_size is Short	2020-02-19 11:40:35 +07:00
Nguyen Dac Nam	a2c2c5768f	fix warning	2020-02-19 11:10:26 +07:00
Nguyen Dac Nam	a8508f2bc0	clang-format fix	2020-02-19 11:02:59 +07:00
Nguyen Dac Nam	556f3a6e9a	shader_conversion: add conversion I2F for Short	2020-02-19 10:54:37 +07:00
bunnei	e545c2322c	Merge pull request #3410 from ReinUsesLisp/vk-draw-index vk_shader_decompiler: Fix vertex id and instance id	2020-02-18 22:37:33 -05:00
Brian Clinkenbeard	ad4e5c15fb	httplib compatibility	2020-02-18 18:04:33 -08:00
Brian Clinkenbeard	7f6c686d55	update httplib to latest commit	2020-02-18 17:11:40 -08:00
Fernando Sahmkow	93acfbd3a5	Merge pull request #3409 from ReinUsesLisp/host-queries query_cache: Implement a query cache and query 21 (samples passed)	2020-02-18 11:31:06 -04:00
Brian Clinkenbeard	9e42025e5b	update httplib README	2020-02-17 22:54:09 -08:00
Brian Clinkenbeard	76b55c3624	0.4.2 works too	2020-02-17 22:53:25 -08:00
Brian Clinkenbeard	293d4d553a	update httplib to 0.2.6	2020-02-17 20:13:24 -08:00
bunnei	72d4c6fee0	Merge pull request #3412 from Morph1984/aspect-ratio GUI: Add aspect ratio dropdown	2020-02-17 22:02:18 -05:00
bunnei	7f380f4ffa	Merge pull request #3429 from brianclinkenbeard/fix-cmake-sdl2-arch Fix CMake build errors for certain SDL2 installations	2020-02-17 18:33:20 -05:00
Brian Clinkenbeard	13b02a1414	fix CMake build errors for certain SDL2 installations	2020-02-17 13:01:40 -08:00
bunnei	26006cbd2c	Merge pull request #3420 from namkazt/master2 nvhost_gpu: implement ChannelSetTimeslice	2020-02-17 00:31:11 -05:00
bunnei	af29e9d98e	Merge pull request #3421 from namkazt/patch-1 IUserLocalCommunicationService: stub function Initialize2	2020-02-16 04:01:42 -05:00
Nguyen Dac Nam	5257a83ebe	IUserLocalCommunicationService: add function Initialize2	2020-02-16 13:24:34 +07:00
Nguyen Dac Nam	6c0eb6026b	HLE: correct function name of IUserLocalCommunicationService 402: function name should be Initialize2 (7.0.0+) not SetOperationMode Follow by: https://switchbrew.org/wiki/LDN_services#IUserLocalCommunicationService	2020-02-16 13:14:06 +07:00
namkazy	7fadc9c180	nvhost_gpu: implement ChannelSetTimeslice	2020-02-16 11:53:03 +07:00
bunnei	619f64d7f4	Merge pull request #3419 from yuzu-emu/revert-3386-gpu-mem-interface Revert "video_core: memory_manager: Use GPU interface for cache functions."	2020-02-15 22:37:29 -05:00
bunnei	0f70f68fb3	Revert "video_core: memory_manager: Use GPU interface for cache functions."	2020-02-15 17:47:15 -05:00
ReinUsesLisp	6910ade146	shader/texture: Allow 2D shadow arrays and simplify code Shadow sampler 2D arrays are supported on OpenGL, so there's no reason to forbid these. Enable textureLod usage on these. Minor style changes.	2020-02-15 02:36:28 -03:00
ReinUsesLisp	91aa58e410	maxwell_3d: Unify draw methods Pass instanced state of a draw invocation as an argument instead of having two separate virtual methods.	2020-02-14 18:09:40 -03:00
ReinUsesLisp	6d3a046caa	query_cache: Address feedback	2020-02-14 17:38:27 -03:00
ReinUsesLisp	54a00ee4cf	query_cache: Fix ambiguity in CacheAddr getter	2020-02-14 17:38:27 -03:00
ReinUsesLisp	cc0694559f	query_cache: Add a recursive mutex for concurrent usage	2020-02-14 17:38:27 -03:00
ReinUsesLisp	bcd348f238	vk_query_cache: Implement generic query cache on Vulkan	2020-02-14 17:38:27 -03:00
ReinUsesLisp	c31382ced5	query_cache: Abstract OpenGL implementation Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation.	2020-02-14 17:38:27 -03:00
ReinUsesLisp	73d2d3342d	gl_query_cache: Optimize query cache Use a custom cache instead of relying on a ranged cache.	2020-02-14 17:38:27 -03:00
ReinUsesLisp	aae8c180cb	gl_query_cache: Implement host queries using a deferred cache Instead of waiting immediately for executed commands, defer the query until the guest CPU reads it. This way we get closer to what the guest program is doing. To archive this we have to build a dependency queue, because host APIs (like OpenGL and Vulkan) use ranged queries instead of counters like NVN. Waiting for queries implicitly uses fences and this requires a command being queued, otherwise the driver will lock waiting until a timeout. To fix this when there are no commands queued, we explicitly call glFlush.	2020-02-14 17:33:13 -03:00
ReinUsesLisp	ef9920e164	gl_rasterizer: Sort method declarations	2020-02-14 17:27:17 -03:00
ReinUsesLisp	fe1238be7a	gl_rasterizer: Add queued commands counter Keep track of the queued OpenGL commands that can signal a fence if waited on. As a side effect, we avoid calls to glFlush when no commands are queued.	2020-02-14 17:27:17 -03:00
ReinUsesLisp	2b58652f08	maxwell_3d: Slow implementation of passed samples (query 21) Implements GL_SAMPLES_PASSED by waiting immediately for queries.	2020-02-14 17:27:17 -03:00
bunnei	f552d553ba	Merge pull request #3401 from FernandoS27/synchronization Set of refactors for Kernel Synchronization and Hardware Constants	2020-02-14 14:40:20 -05:00
Morph	c3d0a0d627	Add 4:3 aspect ratio and address feedback	2020-02-14 14:39:04 -05:00
bunnei	63a59b9935	Merge pull request #3379 from ReinUsesLisp/cbuf-offset shader/decode: Fix constant buffer offsets	2020-02-14 13:22:53 -05:00
Zach Hilman	4501bd8ca9	Merge pull request #3398 from brianclinkenbeard/fix-cmake-sdl2 Use config mode for finding SDL2 with CMake	2020-02-14 09:11:47 -05:00
Fernando Sahmkow	829d8c0d6b	Core: Correct compilition in GCC	2020-02-14 05:53:30 -04:00
Morph	20dc2e3622	Address feedback	2020-02-14 00:06:26 -05:00
Morph	22f58cca5e	Use enumeration instead of magic numbers	2020-02-13 23:13:23 -05:00
Morph	27e19f87c6	Add following aspect ratios: 16:9, 21:9, Stretch to Window Available as a drop down within the configure graphics tab.	2020-02-13 22:17:28 -05:00
bunnei	74feed372c	Merge pull request #3400 from makigumo/patch-1 update hwopus DecodeInterleaved for FW 7.0.0+	2020-02-13 21:26:13 -05:00
ReinUsesLisp	3217400dd1	gl_resource_manager: Add managed query class	2020-02-13 22:25:55 -03:00
bunnei	3563af2364	Merge pull request #3395 from FernandoS27/queries GPU: Refactor queries implementation and correct GPU Clock.	2020-02-13 20:18:26 -05:00
ReinUsesLisp	336a4f8e99	gl_rasterizer: Use the least generic OpenGL draw function possible This may help some implementations.	2020-02-13 21:55:21 -03:00
ReinUsesLisp	cbea8c74de	vk_shader_decompiler: Fix vertex id and instance id Vulkan's VertexIndex and InstanceIndex don't match with hardware. This is because Nvidia implements gl_VertexID and gl_InstanceID. The math that relates these is: gl_VertexIndex = gl_BaseVertex + gl_VertexID gl_InstanceIndex = gl_InstanceIndex + gl_InstanceID To emulate it using what Vulkan's SPIR-V offers (the Index variants) this commit substracts gl_Base from gl_*Index to obtain the OpenGL and hardware's equivalent.	2020-02-13 20:25:28 -03:00
Fernando Sahmkow	2bc949628d	Core: Address Feedback	2020-02-13 19:10:33 -04:00
Fernando Sahmkow	d6ed31b9fa	GPU: Address Feedback.	2020-02-13 18:16:07 -04:00
bunnei	8b9a56033a	Merge pull request #3405 from lioncash/thread address_arbiter: Minor cleanup to list querying	2020-02-12 21:46:00 -05:00
Brian Clinkenbeard	0d85b6bfe1	Merge branch 'master' into fix-cmake-sdl2	2020-02-12 16:07:07 -08:00
Lioncash	be269e21a5	address_arbiter: Collapse loops in InsertThread() and RemoveThread() Same behavior, but without the need to explicitly loop through everything manually.	2020-02-12 15:34:07 -05:00
Lioncash	9f2c703137	address_arbiter: Simplify GetThreadsWaitingOnAddress() Simplifies the overall function and also allows for it to become a const-qualified member function.	2020-02-12 15:10:16 -05:00
bunnei	8f8dda2d5b	Merge pull request #3403 from lioncash/debug bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear()	2020-02-12 11:17:43 -05:00
bunnei	2506f7b3a1	Merge pull request #3402 from lioncash/sys-global kernel/thread: Remove trivial usages of the global system accessor	2020-02-12 10:10:00 -05:00
Lioncash	f00a54f508	bcat/backend: Make formatting of passphrase consistent in NullBackend::SetPassphrase() Aligns the '=' to be consistent with the rest of the logs within this source file.	2020-02-12 01:18:29 -05:00
Lioncash	eefd97e80d	bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear() A formatting specifier within Clear wasn't being used, which will cause fmt to throw an exception. This fixes that.	2020-02-12 01:14:47 -05:00
Lioncash	b80c348b09	kernel/thread: Remove trivial usages of the global system accessor We can just use the kernel member variable directly instead of going through the system to obtain the same thing.	2020-02-12 01:00:41 -05:00
Fernando Sahmkow	1e6f8aba04	Core: Set all hardware emulation constants in a single file.	2020-02-11 20:19:11 -04:00
Fernando Sahmkow	d23d504d77	Kernel: Refactor synchronization to better match RE	2020-02-11 18:47:31 -04:00
makigumo	926ea5a16d	update hwopus DecodeInterleaved for FW 7.0.0+ trivial change, see https://switchbrew.org/wiki/Audio_services#IHardwareOpusDecoder	2020-02-11 18:41:04 +01:00
Fernando Sahmkow	c5aefe42aa	Kernel: Change WaitObject to Synchronization object. In order to better reflect RE.	2020-02-11 10:46:25 -04:00
bunnei	37f1cf8cbd	Merge pull request #3376 from ReinUsesLisp/point-sprite gl_rasterizer: Implement GL_POINT_SPRITE	2020-02-11 08:26:07 -05:00
Brian Clinkenbeard	68043dd233	use config mode for finding SDL2 with CMake	2020-02-10 19:56:33 -08:00
Fernando Sahmkow	8e9a4944db	GPU: Implement GPU Clock correctly.	2020-02-10 10:44:54 -04:00
Fernando Sahmkow	0cb3bcfbb7	Maxwell3D: Correct query reporting.	2020-02-10 10:41:43 -04:00
bunnei	84ea9c2b42	Merge pull request #3372 from ReinUsesLisp/fix-back-stencil maxwell_3d: Fix stencil back mask	2020-02-09 22:29:28 -05:00
Zach Hilman	21c3f48279	Merge pull request #3391 from Morph1984/remove-unknown Remove option "Show files with type 'Unknown'"	2020-02-09 12:08:01 -05:00
Morph	fcf3425b1b	Remove option "Show files with type 'Unknown'"	2020-02-09 11:30:02 -05:00
bunnei	a952fbc5b3	Merge pull request #3388 from bunnei/service-shared-ptr hle: services: Use std::shared_ptr instead of copy by value. - This is a prerequisite to adding a mutex to `ServiceFramework`, which cannot be copied. - This will be used for threaded services.	2020-02-08 21:35:30 -05:00
bunnei	e210835dd0	Merge pull request #3387 from bunnei/gpu-mpscqueue gpu_thread: Use MPSCQueue for GPU commands.	2020-02-08 21:15:48 -05:00
bunnei	6536cc9741	Merge pull request #3386 from bunnei/gpu-mem-interface video_core: memory_manager: Use GPU interface for cache functions.	2020-02-08 21:15:27 -05:00
bunnei	7b07e521ca	hle: services: Use std::shared_ptr instead of copy by value.	2020-02-07 23:02:26 -05:00
bunnei	b5c13ee0eb	gpu_thread: Use MPSCQueue for GPU commands. - Necessary for multiple service threads.	2020-02-07 23:01:23 -05:00
bunnei	7cacb08cdf	video_core: memory_manager: Use GPU interface for cache functions.	2020-02-07 22:59:35 -05:00
bunnei	90bda66028	Merge pull request #3378 from ReinUsesLisp/uscaled maxwell_to_gl: Implement R8G8_USCALED	2020-02-07 22:55:52 -05:00
ReinUsesLisp	bf9a822b87	shader/decode: Fix constant buffer offsets Some instances were using cbuf34.offset instead of cbuf34.GetOffset(). This returned the an invalid offset. Address those instances and rename offset to "shifted_offset" to avoid future bugs.	2020-02-05 12:19:09 -03:00
ReinUsesLisp	8bb9eef97b	maxwell_to_gl: Implement R8G8_USCALED	2020-02-04 21:32:36 -03:00
ReinUsesLisp	c81c361e82	maxwell_to_gl: Reduce unimplemented formats to LOG_ERROR	2020-02-04 21:32:08 -03:00
ReinUsesLisp	7da52673d0	gl_rasterizer: Implement GL_POINT_SPRITE OpenGL core defaults to GL_POINT_SPRITE, meanwhile on OpenGL compatibility we have to explicitly enable it. This fixes gl_PointCoord's behaviour.	2020-02-04 15:19:45 -03:00
ReinUsesLisp	4eed744277	maxwell_3d: Fix stencil back mask	2020-02-02 17:50:46 -03:00