General: Fix clang build

Allows building on clang to work again
Merge pull request #4889 from lioncash/setting-global
2020-11-05 10:07:16 -05:00 · 2020-11-04 17:09:19 -08:00 · 2020-11-04 12:10:10 -08:00 · 2020-11-04 04:16:37 -05:00 · 2020-11-04 18:36:55 +11:00 · 2020-11-03 16:34:07 -08:00
124 changed files with 5688 additions and 1536 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "inih"]
    path = externals/inih/inih
-    url = https://github.com/svn2github/inih
+    url = https://github.com/benhoyt/inih.git
 [submodule "cubeb"]
    path = externals/cubeb
    url = https://github.com/kinetiknz/cubeb.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -161,7 +161,7 @@ macro(yuzu_find_packages)
    #    Cmake Pkg Prefix  Version     Conan Pkg
        "Boost             1.73        boost/1.73.0"
        "Catch2            2.13        catch2/2.13.0"
-        "fmt               7.0         fmt/7.0.3"
+        "fmt               7.1         fmt/7.1.0"
    # can't use until https://github.com/bincrafters/community/issues/1173
        #"libzip            1.5         libzip/1.5.2@bincrafters/stable"
        "lz4               1.8         lz4/1.9.2"
@@ -263,6 +263,7 @@ if (CONAN_REQUIRED_LIBS)
        libzip:with_openssl=False
        libzip:enable_windows_crypto=False
    )
+
    conan_check(VERSION 1.24.0 REQUIRED)
    # Add the bincrafters remote
    conan_add_remote(NAME bincrafters
@@ -354,6 +355,19 @@ if (NOT LIBUSB_FOUND)
    set(LIBUSB_LIBRARIES usb)
 endif()

+# Use system installed ffmpeg.
+if (NOT MSVC)
+    find_package(FFmpeg REQUIRED)
+else()
+    set(FFMPEG_EXT_NAME "ffmpeg-4.2.1")
+    set(FFMPEG_PATH "${CMAKE_BINARY_DIR}/externals/${FFMPEG_EXT_NAME}")
+    download_bundled_external("ffmpeg/" ${FFMPEG_EXT_NAME} "")
+    set(FFMPEG_FOUND YES)
+    set(FFMPEG_INCLUDE_DIR "${FFMPEG_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
+    set(FFMPEG_LIBRARY_DIR "${FFMPEG_PATH}/bin" CACHE PATH "Path to FFmpeg library" FORCE)
+    set(FFMPEG_DLL_DIR "${FFMPEG_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
+endif()
+
 # Prefer the -pthread flag on Linux.
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
--- a/CMakeModules/CopyYuzuFFmpegDeps.cmake
+++ b/CMakeModules/CopyYuzuFFmpegDeps.cmake
@@ -0,0 +1,10 @@
+function(copy_yuzu_FFmpeg_deps target_dir)
+    include(WindowsCopyFiles)
+    set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/")
+    windows_copy_files(${target_dir} ${FFMPEG_DLL_DIR} ${DLL_DEST}
+        avcodec-58.dll
+        avutil-56.dll
+        swresample-3.dll
+        swscale-5.dll
+    )
+endfunction(copy_yuzu_FFmpeg_deps)
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -73,17 +73,20 @@ if (NOT LIBZIP_FOUND)
 endif()

 if (ENABLE_WEB_SERVICE)
-    # LibreSSL
-    set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
-    add_subdirectory(libressl EXCLUDE_FROM_ALL)
-    target_include_directories(ssl INTERFACE ./libressl/include)
-    target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
-    get_directory_property(OPENSSL_LIBRARIES
-        DIRECTORY libressl
-        DEFINITION OPENSSL_LIBS)
-
-    # lurlparser
-    add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
+    find_package(OpenSSL 1.1)
+    if (OPENSSL_FOUND)
+        set(OPENSSL_LIBRARIES OpenSSL::SSL OpenSSL::Crypto)
+    else()
+        # LibreSSL
+        set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
+        set(OPENSSLDIR "/etc/ssl/")
+        add_subdirectory(libressl EXCLUDE_FROM_ALL)
+        target_include_directories(ssl INTERFACE ./libressl/include)
+        target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
+        get_directory_property(OPENSSL_LIBRARIES
+            DIRECTORY libressl
+            DEFINITION OPENSSL_LIBS)
+    endif()

    # httplib
    add_library(httplib INTERFACE)
--- a/externals/find-modules/FindFFmpeg.cmake
+++ b/externals/find-modules/FindFFmpeg.cmake
@@ -0,0 +1,100 @@
+# - Try to find ffmpeg libraries (libavcodec, libavformat and libavutil)
+# Once done this will define
+#
+# FFMPEG_FOUND - system has ffmpeg or libav
+# FFMPEG_INCLUDE_DIR - the ffmpeg include directory
+# FFMPEG_LIBRARIES - Link these to use ffmpeg
+# FFMPEG_LIBAVCODEC
+# FFMPEG_LIBAVFORMAT
+# FFMPEG_LIBAVUTIL
+#
+# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
+# Modified for other libraries by Lasse Kärkkäinen <tronic>
+# Modified for Hedgewars by Stepik777
+# Modified for FFmpeg-example Tuukka Pasanen 2018
+# Modified for yuzu toastUnlimted 2020
+#
+# Redistribution and use is allowed according to the terms of the New
+# BSD license.
+#
+
+include(FindPackageHandleStandardArgs)
+
+find_package_handle_standard_args(FFMPEG
+  FOUND_VAR FFMPEG_FOUND
+  REQUIRED_VARS
+      FFMPEG_LIBRARY
+      FFMPEG_INCLUDE_DIR
+  VERSION_VAR FFMPEG_VERSION
+)
+
+if(FFMPEG_LIBRARIES AND FFMPEG_INCLUDE_DIR)
+  # in cache already
+  set(FFMPEG_FOUND TRUE)
+else()
+  # use pkg-config to get the directories and then use these values
+  # in the FIND_PATH() and FIND_LIBRARY() calls
+  find_package(PkgConfig)
+  if(PKG_CONFIG_FOUND)
+    pkg_check_modules(_FFMPEG_AVCODEC libavcodec)
+    pkg_check_modules(_FFMPEG_AVUTIL libavutil)
+    pkg_check_modules(_FFMPEG_SWSCALE libswscale)
+  endif()
+
+  find_path(FFMPEG_AVCODEC_INCLUDE_DIR
+    NAMES libavcodec/avcodec.h
+    PATHS ${_FFMPEG_AVCODEC_INCLUDE_DIRS}
+      /usr/include
+      /usr/local/include
+      /opt/local/include
+      /sw/include
+    PATH_SUFFIXES ffmpeg libav)
+
+  find_library(FFMPEG_LIBAVCODEC
+    NAMES avcodec
+    PATHS ${_FFMPEG_AVCODEC_LIBRARY_DIRS}
+      /usr/lib
+      /usr/local/lib
+      /opt/local/lib
+      /sw/lib)
+
+  find_library(FFMPEG_LIBAVUTIL
+    NAMES avutil
+    PATHS ${_FFMPEG_AVUTIL_LIBRARY_DIRS}
+      /usr/lib
+      /usr/local/lib
+      /opt/local/lib
+      /sw/lib)
+
+  find_library(FFMPEG_LIBSWSCALE
+    NAMES swscale
+    PATHS ${_FFMPEG_SWSCALE_LIBRARY_DIRS}
+      /usr/lib
+      /usr/local/lib
+      /opt/local/lib
+      /sw/lib)
+
+  if(FFMPEG_LIBAVCODEC AND FFMPEG_LIBAVUTIL AND FFMPEG_LIBSWSCALE)
+    set(FFMPEG_FOUND TRUE)
+  endif()
+
+  if(FFMPEG_FOUND)
+    set(FFMPEG_INCLUDE_DIR ${FFMPEG_AVCODEC_INCLUDE_DIR})
+    set(FFMPEG_LIBRARIES
+      ${FFMPEG_LIBAVCODEC}
+      ${FFMPEG_LIBAVUTIL}
+      ${FFMPEG_LIBSWSCALE})
+  endif()
+
+  if(FFMPEG_FOUND)
+    if(NOT FFMPEG_FIND_QUIETLY)
+      message(STATUS
+      "Found FFMPEG or Libav: ${FFMPEG_LIBRARIES}, ${FFMPEG_INCLUDE_DIR}")
+    endif()
+  else()
+    if(FFMPEG_FIND_REQUIRED)
+      message(FATAL_ERROR
+      "Could not find libavcodec or libavutil or libswscale")
+    endif()
+  endif()
+endif()
--- a/externals/httplib/README.md
+++ b/externals/httplib/README.md
@@ -1,4 +1,4 @@
-From https://github.com/yhirose/cpp-httplib/tree/fce8e6fefdab4ad48bc5b25c98e5ebfda4f3cf53
+From https://github.com/yhirose/cpp-httplib/tree/ff5677ad197947177c158fe857caff4f0e242045 with https://github.com/yhirose/cpp-httplib/pull/701

 MIT License

--- a/externals/httplib/httplib.h
+++ b/externals/httplib/httplib.h
--- a/externals/inih/inih
+++ b/externals/inih/inih
--- a/externals/libressl
+++ b/externals/libressl
--- a/externals/lurlparser/CMakeLists.txt
+++ b/externals/lurlparser/CMakeLists.txt
@@ -1,8 +0,0 @@
-add_library(lurlparser
-        LUrlParser.cpp
-        LUrlParser.h
-)
-
-create_target_directory_groups(lurlparser)
-
-target_include_directories(lurlparser INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
--- a/externals/lurlparser/LUrlParser.cpp
+++ b/externals/lurlparser/LUrlParser.cpp
@@ -1,265 +0,0 @@
-/*
- * Lightweight URL & URI parser (RFC 1738, RFC 3986)
- * https://github.com/corporateshark/LUrlParser
- *
- * The MIT License (MIT)
- *
- * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "LUrlParser.h"
-
-#include <algorithm>
-#include <cstring>
-#include <stdlib.h>
-
-// check if the scheme name is valid
-static bool IsSchemeValid( const std::string& SchemeName )
-{
-    for ( auto c : SchemeName  )
-    {
-        if ( !isalpha( c ) && c != '+' && c != '-' && c != '.' ) return false;
-    }
-
-    return true;
-}
-
-bool LUrlParser::clParseURL::GetPort( int* OutPort ) const
-{
-    if ( !IsValid() ) { return false; }
-
-    int Port = atoi( m_Port.c_str() );
-
-    if ( Port <= 0 || Port > 65535 ) { return false; }
-
-    if ( OutPort ) { *OutPort = Port; }
-
-    return true;
-}
-
-// based on RFC 1738 and RFC 3986
-LUrlParser::clParseURL LUrlParser::clParseURL::ParseURL( const std::string& URL )
-{
-    LUrlParser::clParseURL Result;
-
-    const char* CurrentString = URL.c_str();
-
-    /*
-     *	<scheme>:<scheme-specific-part>
-     *	<scheme> := [a-z\+\-\.]+
-     *	For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
-     */
-
-    // try to read scheme
-    {
-        const char* LocalString = strchr( CurrentString, ':' );
-
-        if ( !LocalString )
-        {
-            return clParseURL( LUrlParserError_NoUrlCharacter );
-        }
-
-        // save the scheme name
-        Result.m_Scheme = std::string( CurrentString, LocalString - CurrentString );
-
-        if ( !IsSchemeValid( Result.m_Scheme ) )
-        {
-            return clParseURL( LUrlParserError_InvalidSchemeName );
-        }
-
-        // scheme should be lowercase
-        std::transform( Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower );
-
-        // skip ':'
-        CurrentString = LocalString+1;
-    }
-
-    /*
-     *	//<user>:<password>@<host>:<port>/<url-path>
-     *	any ":", "@" and "/" must be normalized
-     */
-
-    // skip "//"
-    if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
-    if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
-
-    // check if the user name and password are specified
-    bool bHasUserName = false;
-
-    const char* LocalString = CurrentString;
-
-    while ( *LocalString )
-    {
-        if ( *LocalString == '@' )
-        {
-            // user name and password are specified
-            bHasUserName = true;
-            break;
-        }
-        else if ( *LocalString == '/' )
-        {
-            // end of <host>:<port> specification
-            bHasUserName = false;
-            break;
-        }
-
-        LocalString++;
-    }
-
-    // user name and password
-    LocalString = CurrentString;
-
-    if ( bHasUserName )
-    {
-        // read user name
-        while ( *LocalString && *LocalString != ':' && *LocalString != '@' ) LocalString++;
-
-        Result.m_UserName = std::string( CurrentString, LocalString - CurrentString );
-
-        // proceed with the current pointer
-        CurrentString = LocalString;
-
-        if ( *CurrentString == ':' )
-        {
-            // skip ':'
-            CurrentString++;
-
-            // read password
-            LocalString = CurrentString;
-
-            while ( *LocalString && *LocalString != '@' ) LocalString++;
-
-            Result.m_Password = std::string( CurrentString, LocalString - CurrentString );
-
-            CurrentString = LocalString;
-        }
-
-        // skip '@'
-        if ( *CurrentString != '@' )
-        {
-            return clParseURL( LUrlParserError_NoAtSign );
-        }
-
-        CurrentString++;
-    }
-
-    bool bHasBracket = ( *CurrentString == '[' );
-
-    // go ahead, read the host name
-    LocalString = CurrentString;
-
-    while ( *LocalString )
-    {
-        if ( bHasBracket && *LocalString == ']' )
-        {
-            // end of IPv6 address
-            LocalString++;
-            break;
-        }
-        else if ( !bHasBracket && ( *LocalString == ':' || *LocalString == '/' ) )
-        {
-            // port number is specified
-            break;
-        }
-
-        LocalString++;
-    }
-
-    Result.m_Host = std::string( CurrentString, LocalString - CurrentString );
-
-    CurrentString = LocalString;
-
-    // is port number specified?
-    if ( *CurrentString == ':' )
-    {
-        CurrentString++;
-
-        // read port number
-        LocalString = CurrentString;
-
-        while ( *LocalString && *LocalString != '/' ) LocalString++;
-
-        Result.m_Port = std::string( CurrentString, LocalString - CurrentString );
-
-        CurrentString = LocalString;
-    }
-
-    // end of string
-    if ( !*CurrentString )
-    {
-        Result.m_ErrorCode = LUrlParserError_Ok;
-
-        return Result;
-    }
-
-    // skip '/'
-    if ( *CurrentString != '/' )
-    {
-        return clParseURL( LUrlParserError_NoSlash );
-    }
-
-    CurrentString++;
-
-    // parse the path
-    LocalString = CurrentString;
-
-    while ( *LocalString && *LocalString != '#' && *LocalString != '?' ) LocalString++;
-
-    Result.m_Path = std::string( CurrentString, LocalString - CurrentString );
-
-    CurrentString = LocalString;
-
-    // check for query
-    if ( *CurrentString == '?' )
-    {
-        // skip '?'
-        CurrentString++;
-
-        // read query
-        LocalString = CurrentString;
-
-        while ( *LocalString && *LocalString != '#' ) LocalString++;
-
-        Result.m_Query = std::string( CurrentString, LocalString - CurrentString );
-
-        CurrentString = LocalString;
-    }
-
-    // check for fragment
-    if ( *CurrentString == '#' )
-    {
-        // skip '#'
-        CurrentString++;
-
-        // read fragment
-        LocalString = CurrentString;
-
-        while ( *LocalString ) LocalString++;
-
-        Result.m_Fragment = std::string( CurrentString, LocalString - CurrentString );
-
-        CurrentString = LocalString;
-    }
-
-    Result.m_ErrorCode = LUrlParserError_Ok;
-
-    return Result;
-}
--- a/externals/lurlparser/LUrlParser.h
+++ b/externals/lurlparser/LUrlParser.h
@@ -1,78 +0,0 @@
-/*
- * Lightweight URL & URI parser (RFC 1738, RFC 3986)
- * https://github.com/corporateshark/LUrlParser
- *
- * The MIT License (MIT)
- *
- * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#pragma once
-
-#include <string>
-
-namespace LUrlParser
-{
-enum LUrlParserError
-{
-    LUrlParserError_Ok = 0,
-    LUrlParserError_Uninitialized = 1,
-    LUrlParserError_NoUrlCharacter = 2,
-    LUrlParserError_InvalidSchemeName = 3,
-    LUrlParserError_NoDoubleSlash = 4,
-    LUrlParserError_NoAtSign = 5,
-    LUrlParserError_UnexpectedEndOfLine = 6,
-    LUrlParserError_NoSlash = 7,
-};
-
-class clParseURL
-{
-public:
-    LUrlParserError m_ErrorCode;
-    std::string m_Scheme;
-    std::string m_Host;
-    std::string m_Port;
-    std::string m_Path;
-    std::string m_Query;
-    std::string m_Fragment;
-    std::string m_UserName;
-    std::string m_Password;
-
-    clParseURL()
-            : m_ErrorCode( LUrlParserError_Uninitialized )
-    {}
-
-    /// return 'true' if the parsing was successful
-    bool IsValid() const { return m_ErrorCode == LUrlParserError_Ok; }
-
-    /// helper to convert the port number to int, return 'true' if the port is valid (within the 0..65535 range)
-    bool GetPort( int* OutPort ) const;
-
-    /// parse the URL
-    static clParseURL ParseURL( const std::string& URL );
-
-private:
-    explicit clParseURL( LUrlParserError ErrorCode )
-            : m_ErrorCode( ErrorCode )
-    {}
-};
-
-} // namespace LUrlParser
--- a/externals/lurlparser/README.md
+++ b/externals/lurlparser/README.md
@@ -1,19 +0,0 @@
-From https://github.com/corporateshark/LUrlParser/commit/455d5e2d27e3946f11ad0328fee9ee2628e6a8e2
-
-MIT License
-
-===
-
-Lightweight URL & URI parser (RFC 1738, RFC 3986)
-
-(C) Sergey Kosarevsky, 2015
-
-@corporateshark sk@linderdaum.com
-
-http://www.linderdaum.com
-
-http://blog.linderdaum.com
-
-=============================
-
-A tiny and lightweight URL & URI parser (RFC 1738, RFC 3986) written in C++.
--- a/externals/microprofile/microprofile.h
+++ b/externals/microprofile/microprofile.h
@@ -902,8 +902,10 @@ inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
 #include <windows.h>
 #define snprintf _snprintf

+#ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4244)
+#endif
 int64_t MicroProfileTicksPerSecondCpu()
 {
    static int64_t nTicksPerSecond = 0;
@@ -946,7 +948,11 @@ typedef HANDLE MicroProfileThread;
 DWORD _stdcall ThreadTrampoline(void* pFunc)
 {
    MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
-    return (uint32_t)F(0);
+
+    // The return value of F will always return a void*, however, this is for
+    // compatibility with pthreads. The underlying "address" of the pointer
+    // is always a 32-bit value, so this cast is safe to perform.
+    return static_cast<DWORD>(reinterpret_cast<uint64_t>(F(0)));
 }

 inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
@@ -1742,10 +1748,10 @@ void MicroProfileFlip()
                            }
                        }
                    }
-                    for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
+                    for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
                    {
-                        pLog->nGroupTicks[i] += nGroupTicks[i];
-                        pFrameGroup[i] += nGroupTicks[i];
+                        pLog->nGroupTicks[j] += nGroupTicks[j];
+                        pFrameGroup[j] += nGroupTicks[j];
                    }
                    pLog->nStackPos = nStackPos;
                }
@@ -3328,7 +3334,7 @@ bool MicroProfileIsLocalThread(uint32_t nThreadId)
 #endif
 #else

-bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;}
+bool MicroProfileIsLocalThread([[maybe_unused]] uint32_t nThreadId) { return false; }
 void MicroProfileStopContextSwitchTrace(){}
 void MicroProfileStartContextSwitchTrace(){}

@@ -3576,7 +3582,7 @@ int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu)

 #undef S

-#ifdef _WIN32
+#ifdef _MSC_VER
 #pragma warning(pop)
 #endif

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,7 +32,6 @@ if (MSVC)
    # /Zc:inline          - Let codegen omit inline functions in object files
    # /Zc:throwingNew     - Let codegen assume `operator new` (without std::nothrow) will never return null
    add_compile_options(
-        /W3
        /MP
        /Zi
        /Zo
@@ -43,6 +42,13 @@ if (MSVC)
        /Zc:externConstexpr
        /Zc:inline
        /Zc:throwingNew
+
+        # Warnings
+        /W3
+        /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
+        /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
+        /we4555 # Expression has no effect; expected expression with side-effect
+        /we4834 # Discarding return value of function with 'nodiscard' attribute
    )

    # /GS- - No stack buffer overflow checks
@@ -56,6 +62,7 @@ else()
        -Werror=implicit-fallthrough
        -Werror=missing-declarations
        -Werror=reorder
+        -Werror=unused-result
        -Wextra
        -Wmissing-declarations
        -Wno-attributes
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -150,6 +150,8 @@ add_library(common STATIC
    scope_exit.h
    spin_lock.cpp
    spin_lock.h
+    stream.cpp
+    stream.h
    string_util.cpp
    string_util.h
    swap.h
@@ -188,6 +190,22 @@ if(ARCHITECTURE_x86_64)
    )
 endif()

+if (MSVC)
+  target_compile_definitions(common PRIVATE
+    # The standard library doesn't provide any replacement for codecvt yet
+    # so we can disable this deprecation warning for the time being.
+    _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+  )
+  target_compile_options(common PRIVATE
+    /W4
+    /WX
+  )
+else()
+  target_compile_options(common PRIVATE
+    -Werror
+  )
+endif()
+
 create_target_directory_groups(common)
 find_package(Boost 1.71 COMPONENTS context headers REQUIRED)

--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -79,9 +79,9 @@ void Fiber::Exit() {
    released = true;
 }

-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
    rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
+    rewind_parameter = rewind_param;
 }

 void Fiber::Rewind() {
@@ -91,7 +91,7 @@ void Fiber::Rewind() {
    SwitchToFiber(impl->rewind_handle);
 }

-void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
    to->guard.lock();
@@ -161,9 +161,9 @@ Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_paramete
        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
 }

-void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param) {
    rewind_point = std::move(rewind_func);
-    rewind_parameter = start_parameter;
+    rewind_parameter = rewind_param;
 }

 Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
@@ -199,7 +199,7 @@ void Fiber::Rewind() {
    boost::context::detail::jump_fcontext(impl->rewind_context, this);
 }

-void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+void Fiber::YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to) {
    ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
    ASSERT_MSG(to != nullptr, "Next fiber is null!");
    to->guard.lock();
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -41,15 +41,15 @@ public:
    Fiber(const Fiber&) = delete;
    Fiber& operator=(const Fiber&) = delete;

-    Fiber(Fiber&&) = default;
-    Fiber& operator=(Fiber&&) = default;
+    Fiber(Fiber&&) = delete;
+    Fiber& operator=(Fiber&&) = delete;

    /// Yields control from Fiber 'from' to Fiber 'to'
    /// Fiber 'from' must be the currently running fiber.
-    static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
+    static void YieldTo(std::shared_ptr<Fiber> from, std::shared_ptr<Fiber> to);
    [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();

-    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* rewind_param);

    void Rewind();

--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -472,13 +472,14 @@ u64 ScanDirectoryTree(const std::string& directory, FSTEntry& parent_entry,
 }

 bool DeleteDirRecursively(const std::string& directory, unsigned int recursion) {
-    const auto callback = [recursion](u64* num_entries_out, const std::string& directory,
-                                      const std::string& virtual_name) -> bool {
-        std::string new_path = directory + DIR_SEP_CHR + virtual_name;
+    const auto callback = [recursion](u64*, const std::string& directory,
+                                      const std::string& virtual_name) {
+        const std::string new_path = directory + DIR_SEP_CHR + virtual_name;

        if (IsDirectory(new_path)) {
-            if (recursion == 0)
+            if (recursion == 0) {
                return false;
+            }
            return DeleteDirRecursively(new_path, recursion - 1);
        }
        return Delete(new_path);
@@ -492,7 +493,8 @@ bool DeleteDirRecursively(const std::string& directory, unsigned int recursion)
    return true;
 }

-void CopyDir(const std::string& source_path, const std::string& dest_path) {
+void CopyDir([[maybe_unused]] const std::string& source_path,
+             [[maybe_unused]] const std::string& dest_path) {
 #ifndef _WIN32
    if (source_path == dest_path) {
        return;
@@ -553,7 +555,7 @@ std::optional<std::string> GetCurrentDir() {
    std::string strDir = dir;
 #endif
    free(dir);
-    return std::move(strDir);
+    return strDir;
 }

 bool SetCurrentDir(const std::string& directory) {
@@ -772,21 +774,23 @@ std::size_t ReadFileToString(bool text_file, const std::string& filename, std::s

 void SplitFilename83(const std::string& filename, std::array<char, 9>& short_name,
                     std::array<char, 4>& extension) {
-    const std::string forbidden_characters = ".\"/\\[]:;=, ";
+    static constexpr std::string_view forbidden_characters = ".\"/\\[]:;=, ";

    // On a FAT32 partition, 8.3 names are stored as a 11 bytes array, filled with spaces.
    short_name = {{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'}};
    extension = {{' ', ' ', ' ', '\0'}};

-    std::string::size_type point = filename.rfind('.');
-    if (point == filename.size() - 1)
+    auto point = filename.rfind('.');
+    if (point == filename.size() - 1) {
        point = filename.rfind('.', point);
+    }

    // Get short name.
    int j = 0;
    for (char letter : filename.substr(0, point)) {
-        if (forbidden_characters.find(letter, 0) != std::string::npos)
+        if (forbidden_characters.find(letter, 0) != std::string::npos) {
            continue;
+        }
        if (j == 8) {
            // TODO(Link Mauve): also do that for filenames containing a space.
            // TODO(Link Mauve): handle multiple files having the same short name.
@@ -794,14 +798,15 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
            short_name[7] = '1';
            break;
        }
-        short_name[j++] = toupper(letter);
+        short_name[j++] = static_cast<char>(std::toupper(letter));
    }

    // Get extension.
    if (point != std::string::npos) {
        j = 0;
-        for (char letter : filename.substr(point + 1, 3))
-            extension[j++] = toupper(letter);
+        for (char letter : filename.substr(point + 1, 3)) {
+            extension[j++] = static_cast<char>(std::toupper(letter));
+        }
    }
 }

--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -232,7 +232,7 @@ public:

    void Swap(IOFile& other) noexcept;

-    [[nodiscard]] bool Open(const std::string& filename, const char openmode[], int flags = 0);
+    bool Open(const std::string& filename, const char openmode[], int flags = 0);
    bool Close();

    template <typename T>
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -274,7 +274,6 @@ const char* GetLogClassName(Class log_class) {
    case Class::Count:
        break;
    }
-    UNREACHABLE();
    return "Invalid";
 }

@@ -293,7 +292,6 @@ const char* GetLevelName(Level log_level) {
        break;
    }
 #undef LVL
-    UNREACHABLE();
    return "Invalid";
 }

--- a/src/common/misc.cpp
+++ b/src/common/misc.cpp
@@ -16,16 +16,23 @@
 // Call directly after the command or use the error num.
 // This function might change the error code.
 std::string GetLastErrorMsg() {
-    static const std::size_t buff_size = 255;
+    static constexpr std::size_t buff_size = 255;
    char err_str[buff_size];

 #ifdef _WIN32
    FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
                   MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), err_str, buff_size, nullptr);
+    return std::string(err_str, buff_size);
+#elif defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600))
+    // Thread safe (GNU-specific)
+    const char* str = strerror_r(errno, err_str, buff_size);
+    return std::string(str);
 #else
    // Thread safe (XSI-compliant)
-    strerror_r(errno, err_str, buff_size);
+    const int success = strerror_r(errno, err_str, buff_size);
+    if (success != 0) {
+        return {};
+    }
+    return std::string(err_str);
 #endif
-
-    return std::string(err_str, buff_size);
 }
--- a/src/common/spin_lock.h
+++ b/src/common/spin_lock.h
@@ -15,6 +15,14 @@ namespace Common {
 */
 class SpinLock {
 public:
+    SpinLock() = default;
+
+    SpinLock(const SpinLock&) = delete;
+    SpinLock& operator=(const SpinLock&) = delete;
+
+    SpinLock(SpinLock&&) = delete;
+    SpinLock& operator=(SpinLock&&) = delete;
+
    void lock();
    void unlock();
    [[nodiscard]] bool try_lock();
--- a/src/common/stream.cpp
+++ b/src/common/stream.cpp
@@ -0,0 +1,47 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <stdexcept>
+#include "common/common_types.h"
+#include "common/stream.h"
+
+namespace Common {
+
+Stream::Stream() = default;
+Stream::~Stream() = default;
+
+void Stream::Seek(s32 offset, SeekOrigin origin) {
+    if (origin == SeekOrigin::SetOrigin) {
+        if (offset < 0) {
+            position = 0;
+        } else if (position >= buffer.size()) {
+            position = buffer.size();
+        } else {
+            position = offset;
+        }
+    } else if (origin == SeekOrigin::FromCurrentPos) {
+        Seek(static_cast<s32>(position) + offset, SeekOrigin::SetOrigin);
+    } else if (origin == SeekOrigin::FromEnd) {
+        Seek(static_cast<s32>(buffer.size()) - offset, SeekOrigin::SetOrigin);
+    }
+}
+
+u8 Stream::ReadByte() {
+    if (position < buffer.size()) {
+        return buffer[position++];
+    } else {
+        throw std::out_of_range("Attempting to read a byte not within the buffer range");
+    }
+}
+
+void Stream::WriteByte(u8 byte) {
+    if (position == buffer.size()) {
+        buffer.push_back(byte);
+        position++;
+    } else {
+        buffer.insert(buffer.begin() + position, byte);
+    }
+}
+
+} // namespace Common
--- a/src/common/stream.h
+++ b/src/common/stream.h
@@ -0,0 +1,56 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+
+namespace Common {
+
+enum class SeekOrigin {
+    SetOrigin,
+    FromCurrentPos,
+    FromEnd,
+};
+
+class Stream {
+public:
+    /// Stream creates a bitstream and provides common functionality on the stream.
+    explicit Stream();
+    ~Stream();
+
+    Stream(const Stream&) = delete;
+    Stream& operator=(const Stream&) = delete;
+
+    Stream(Stream&&) = default;
+    Stream& operator=(Stream&&) = default;
+
+    /// Reposition bitstream "cursor" to the specified offset from origin
+    void Seek(s32 offset, SeekOrigin origin);
+
+    /// Reads next byte in the stream buffer and increments position
+    u8 ReadByte();
+
+    /// Writes byte at current position
+    void WriteByte(u8 byte);
+
+    [[nodiscard]] std::size_t GetPosition() const {
+        return position;
+    }
+
+    [[nodiscard]] std::vector<u8>& GetBuffer() {
+        return buffer;
+    }
+
+    [[nodiscard]] const std::vector<u8>& GetBuffer() const {
+        return buffer;
+    }
+
+private:
+    std::vector<u8> buffer;
+    std::size_t position{0};
+};
+
+} // namespace Common
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -8,6 +8,7 @@
 #include <cstdlib>
 #include <locale>
 #include <sstream>
+
 #include "common/common_paths.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -21,14 +22,14 @@ namespace Common {
 /// Make a string lowercase
 std::string ToLower(std::string str) {
    std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
+                   [](unsigned char c) { return static_cast<char>(std::tolower(c)); });
    return str;
 }

 /// Make a string uppercase
 std::string ToUpper(std::string str) {
    std::transform(str.begin(), str.end(), str.begin(),
-                   [](unsigned char c) { return std::toupper(c); });
+                   [](unsigned char c) { return static_cast<char>(std::toupper(c)); });
    return str;
 }

--- a/src/common/timer.cpp
+++ b/src/common/timer.cpp
@@ -142,20 +142,18 @@ std::string Timer::GetTimeFormatted() {
 // ----------------
 double Timer::GetDoubleTime() {
    // Get continuous timestamp
-    u64 TmpSeconds = static_cast<u64>(Common::Timer::GetTimeSinceJan1970().count());
-    double ms = static_cast<u64>(GetTimeMs().count()) % 1000;
+    auto tmp_seconds = static_cast<u64>(GetTimeSinceJan1970().count());
+    const auto ms = static_cast<double>(static_cast<u64>(GetTimeMs().count()) % 1000);

    // Remove a few years. We only really want enough seconds to make
    // sure that we are detecting actual actions, perhaps 60 seconds is
    // enough really, but I leave a year of seconds anyway, in case the
    // user's clock is incorrect or something like that.
-    TmpSeconds = TmpSeconds - (38 * 365 * 24 * 60 * 60);
+    tmp_seconds = tmp_seconds - (38 * 365 * 24 * 60 * 60);

    // Make a smaller integer that fits in the double
-    u32 Seconds = static_cast<u32>(TmpSeconds);
-    double TmpTime = Seconds + ms;
-
-    return TmpTime;
+    const auto seconds = static_cast<u32>(tmp_seconds);
+    return seconds + ms;
 }

 } // Namespace Common
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -53,7 +53,7 @@ public:
        return Common::Divide128On32(temporary, 1000000000).first;
    }

-    void Pause(bool is_paused) override {
+    void Pause([[maybe_unused]] bool is_paused) override {
        // Do nothing in this clock type.
    }

--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -34,7 +34,7 @@ private:
    /// value used to reduce the native clocks accuracy as some apss rely on
    /// undefined behavior where the level of accuracy in the clock shouldn't
    /// be higher.
-    static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
+    static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);

    SpinLock rtsc_serialize{};
    u64 last_measure{};
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -439,6 +439,8 @@ add_library(core STATIC
    hle/service/nvdrv/devices/nvhost_gpu.h
    hle/service/nvdrv/devices/nvhost_nvdec.cpp
    hle/service/nvdrv/devices/nvhost_nvdec.h
+    hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+    hle/service/nvdrv/devices/nvhost_nvdec_common.h
    hle/service/nvdrv/devices/nvhost_nvjpg.cpp
    hle/service/nvdrv/devices/nvhost_nvjpg.h
    hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -452,6 +454,8 @@ add_library(core STATIC
    hle/service/nvdrv/nvdrv.h
    hle/service/nvdrv/nvmemp.cpp
    hle/service/nvdrv/nvmemp.h
+    hle/service/nvdrv/syncpoint_manager.cpp
+    hle/service/nvdrv/syncpoint_manager.h
    hle/service/nvflinger/buffer_queue.cpp
    hle/service/nvflinger/buffer_queue.h
    hle/service/nvflinger/nvflinger.cpp
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -147,10 +147,18 @@ std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContex
    auto fp = ctx.cpu_registers[29];
    auto lr = ctx.cpu_registers[30];
    while (true) {
-        out.push_back({"", 0, lr, 0});
-        if (!fp) {
+        out.push_back({
+            .module = "",
+            .address = 0,
+            .original_address = lr,
+            .offset = 0,
+            .name = {},
+        });
+
+        if (fp == 0) {
            break;
        }
+
        lr = memory.Read64(fp + 8) - 4;
        fp = memory.Read64(fp);
    }
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
        arp_manager.ResetAll();

        telemetry_session = std::make_unique<Core::TelemetrySession>();
+
+        gpu_core = VideoCore::CreateGPU(emu_window, system);
+        if (!gpu_core) {
+            return ResultStatus::ErrorVideoCore;
+        }
+
        service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);

        Service::Init(service_manager, system);
        GDBStub::DeferStart();

        interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
-        gpu_core = VideoCore::CreateGPU(emu_window, system);
-        if (!gpu_core) {
-            return ResultStatus::ErrorVideoCore;
-        }

        // Initialize time manager, which must happen after kernel is created
        time_manager.Initialize();
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -365,6 +365,8 @@ void CpuManager::RunThread(std::size_t core) {
    data.enter_barrier.reset();
    data.exit_barrier.reset();
    data.initialized = false;
+
+    MicroProfileOnThreadExit();
 }

 } // namespace Core
--- a/src/core/frontend/applets/controller.cpp
+++ b/src/core/frontend/applets/controller.cpp
@@ -19,7 +19,7 @@ DefaultControllerApplet::DefaultControllerApplet(Service::SM::ServiceManager& se
 DefaultControllerApplet::~DefaultControllerApplet() = default;

 void DefaultControllerApplet::ReconfigureControllers(std::function<void()> callback,
-                                                     ControllerParameters parameters) const {
+                                                     const ControllerParameters& parameters) const {
    LOG_INFO(Service_HID, "called, deducing the best configuration based on the given parameters!");

    auto& npad =
--- a/src/core/frontend/applets/controller.h
+++ b/src/core/frontend/applets/controller.h
@@ -38,7 +38,7 @@ public:
    virtual ~ControllerApplet();

    virtual void ReconfigureControllers(std::function<void()> callback,
-                                        ControllerParameters parameters) const = 0;
+                                        const ControllerParameters& parameters) const = 0;
 };

 class DefaultControllerApplet final : public ControllerApplet {
@@ -47,7 +47,7 @@ public:
    ~DefaultControllerApplet() override;

    void ReconfigureControllers(std::function<void()> callback,
-                                ControllerParameters parameters) const override;
+                                const ControllerParameters& parameters) const override;

 private:
    Service::SM::ServiceManager& service_manager;
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -86,8 +86,6 @@ struct KernelCore::Impl {
        }
        cores.clear();

-        registered_core_threads.reset();
-
        process_list.clear();
        current_process = nullptr;

@@ -199,9 +197,7 @@ struct KernelCore::Impl {
        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
        ASSERT(it == end);
-        ASSERT(!registered_core_threads[core_id]);
        InsertHostThread(static_cast<u32>(core_id));
-        registered_core_threads.set(core_id);
    }

    void RegisterHostThread() {
@@ -332,7 +328,6 @@ struct KernelCore::Impl {

    // 0-3 IDs represent core threads, >3 represent others
    std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
-    std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;

    // Number of host threads is a relatively high number to avoid overflowing
    static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -4,6 +4,7 @@

 #include <algorithm>
 #include <bitset>
+#include <ctime>
 #include <memory>
 #include <random>
 #include "common/alignment.h"
@@ -123,7 +124,7 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,
                                                              : kernel.CreateNewUserProcessID();
    process->capabilities.InitializeForMetadatalessProcess();

-    std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(0));
+    std::mt19937 rng(Settings::values.rng_seed.GetValue().value_or(std::time(nullptr)));
    std::uniform_int_distribution<u64> distribution;
    std::generate(process->random_entropy.begin(), process->random_entropy.end(),
                  [&] { return distribution(rng); });
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1201,6 +1201,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
        {151, nullptr, "TryPopFromNotificationStorageChannel"},
        {160, nullptr, "GetHealthWarningDisappearedSystemEvent"},
        {170, nullptr, "SetHdcpAuthenticationActivated"},
+        {180, nullptr, "GetLaunchRequiredVersion"},
+        {181, nullptr, "UpgradeLaunchRequiredVersion"},
        {500, nullptr, "StartContinuousRecordingFlushForDebug"},
        {1000, nullptr, "CreateMovieMaker"},
        {1001, nullptr, "PrepareForJit"},
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -260,7 +260,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {404, nullptr, "HasLeftRightBattery"},
        {405, nullptr, "GetNpadInterfaceType"},
        {406, nullptr, "GetNpadLeftRightInterfaceType"},
-        {407, nullptr, "GetNpadOfHighestBatteryLevelForJoyLeft"},
+        {407, nullptr, "GetNpadOfHighestBatteryLevel"},
        {408, nullptr, "GetNpadOfHighestBatteryLevelForJoyRight"},
        {500, nullptr, "GetPalmaConnectionHandle"},
        {501, nullptr, "InitializePalma"},
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -166,7 +166,7 @@ public:
            {0, &RelocatableObject::LoadNro, "LoadNro"},
            {1, &RelocatableObject::UnloadNro, "UnloadNro"},
            {2, &RelocatableObject::LoadNrr, "LoadNrr"},
-            {3, nullptr, "UnloadNrr"},
+            {3, &RelocatableObject::UnloadNrr, "UnloadNrr"},
            {4, &RelocatableObject::Initialize, "Initialize"},
            {10, nullptr, "LoadNrrEx"},
        };
@@ -272,6 +272,20 @@ public:
        rb.Push(RESULT_SUCCESS);
    }

+    void UnloadNrr(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto pid = rp.Pop<u64>();
+        const auto nrr_address = rp.Pop<VAddr>();
+
+        LOG_DEBUG(Service_LDR, "called with pid={}, nrr_address={:016X}", pid, nrr_address);
+
+        nrr.erase(nrr_address);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+
+        rb.Push(RESULT_SUCCESS);
+    }
+
    bool ValidateRegionForMap(Kernel::Memory::PageTable& page_table, VAddr start,
                              std::size_t size) const {
        constexpr std::size_t padding_size{4 * Kernel::Memory::PageSize};
--- a/src/core/hle/service/mii/mii.cpp
+++ b/src/core/hle/service/mii/mii.cpp
@@ -47,6 +47,7 @@ public:
            {23, nullptr, "Convert"},
            {24, nullptr, "ConvertCoreDataToCharInfo"},
            {25, nullptr, "ConvertCharInfoToCoreData"},
+            {26, nullptr, "Append"},
        };
        // clang-format on

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,8 +15,9 @@

 namespace Service::Nvidia::Devices {

-nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
-    : nvdevice(system), events_interface{events_interface} {}
+nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager)
+    : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
 nvhost_ctrl::~nvhost_ctrl() = default;

 u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -36,8 +37,8 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::v
        return IocCtrlEventRegister(input, output);
    case IoctlCommand::IocCtrlEventUnregisterCommand:
        return IocCtrlEventUnregister(input, output);
-    case IoctlCommand::IocCtrlEventSignalCommand:
-        return IocCtrlEventSignal(input, output);
+    case IoctlCommand::IocCtrlClearEventWaitCommand:
+        return IocCtrlClearEventWait(input, output);
    default:
        UNIMPLEMENTED_MSG("Unimplemented ioctl");
        return 0;
@@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
        return NvResult::BadParameter;
    }

+    if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
+    if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
+        syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
+        params.value = new_value;
+        std::memcpy(output.data(), &params, sizeof(params));
+        return NvResult::Success;
+    }
+
    auto event = events_interface.events[event_id];
    auto& gpu = system.GPU();
+
    // This is mostly to take into account unimplemented features. As synced
    // gpu is always synced.
    if (!gpu.IsAsync()) {
-        event.writable->Signal();
+        event.event.writable->Signal();
        return NvResult::Success;
    }
    auto lock = gpu.LockSync();
-    const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
+    const u32 current_syncpoint_value = event.fence.value;
    const s32 diff = current_syncpoint_value - params.threshold;
    if (diff >= 0) {
-        event.writable->Signal();
+        event.event.writable->Signal();
        params.value = current_syncpoint_value;
        std::memcpy(output.data(), &params, sizeof(params));
        return NvResult::Success;
@@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
            params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
        }
        params.value |= event_id;
-        event.writable->Clear();
+        event.event.writable->Clear();
        gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
        if (!is_async && ctrl.fresh_call) {
            ctrl.must_delay = true;
@@ -154,24 +169,22 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
    return NvResult::Success;
 }

-u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCtrlEventSignalParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
-    // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
-    // It is believed from RE to cancel the GPU Event. However, better research is required
-    u32 event_id = params.user_event_id & 0x00FF;
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
+
+    u32 event_id = params.event_id & 0x00FF;
+    LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
+
    if (event_id >= MaxNvEvents) {
        return NvResult::BadParameter;
    }
    if (events_interface.status[event_id] == EventState::Waiting) {
-        auto& gpu = system.GPU();
-        if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
-                                      events_interface.assigned_value[event_id])) {
-            events_interface.LiberateEvent(event_id);
-            events_interface.events[event_id].writable->Signal();
-        }
+        events_interface.LiberateEvent(event_id);
    }
+
+    syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
+
    return NvResult::Success;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {

 class nvhost_ctrl final : public nvdevice {
 public:
-    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
+    explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
+                         SyncpointManager& syncpoint_manager);
    ~nvhost_ctrl() override;

    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -31,7 +32,7 @@ private:
        IocSyncptWaitexCommand = 0xC0100019,
        IocSyncptReadMaxCommand = 0xC008001A,
        IocGetConfigCommand = 0xC183001B,
-        IocCtrlEventSignalCommand = 0xC004001C,
+        IocCtrlClearEventWaitCommand = 0xC004001C,
        IocCtrlEventWaitCommand = 0xC010001D,
        IocCtrlEventWaitAsyncCommand = 0xC010001E,
        IocCtrlEventRegisterCommand = 0xC004001F,
@@ -94,7 +95,7 @@ private:
    static_assert(sizeof(IocGetConfigParams) == 387, "IocGetConfigParams is incorrect size");

    struct IocCtrlEventSignalParams {
-        u32_le user_event_id;
+        u32_le event_id;
    };
    static_assert(sizeof(IocCtrlEventSignalParams) == 4,
                  "IocCtrlEventSignalParams is incorrect size");
@@ -142,9 +143,10 @@ private:

    u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);

-    u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);

    EventInterface& events_interface;
+    SyncpointManager& syncpoint_manager;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,14 +7,20 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/memory.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

 namespace Service::Nvidia::Devices {

-nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
-    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                       SyncpointManager& syncpoint_manager)
+    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
+    channel_fence.id = syncpoint_manager.AllocateSyncpoint();
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+}
+
 nvhost_gpu::~nvhost_gpu() = default;

 u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                params.unk3);

-    auto& gpu = system.GPU();
-    params.fence_out.id = assigned_syncpoints;
-    params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
-    assigned_syncpoints++;
+    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
+
+    params.fence_out = channel_fence;
+
    std::memcpy(output.data(), &params, output.size());
    return 0;
 }
@@ -145,37 +151,98 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
    return 0;
 }

+static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
+    return {
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {fence.value},
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
+    };
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+
+    for (u32 count = 0; count < add_increment; ++count) {
+        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
+                                                      Tegra::SubmissionMode::Increasing));
+        result.emplace_back(
+            Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
+    }
+
+    return result;
+}
+
+static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
+                                                                          u32 add_increment) {
+    std::vector<Tegra::CommandHeader> result{
+        Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
+                                  Tegra::SubmissionMode::Increasing),
+        {}};
+    const std::vector<Tegra::CommandHeader> increment{
+        BuildIncrementCommandList(fence, add_increment)};
+
+    result.insert(result.end(), increment.begin(), increment.end());
+
+    return result;
+}
+
+u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                                 Tegra::CommandList&& entries) {
+    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
+              params.num_entries, params.flags.raw);
+
+    auto& gpu = system.GPU();
+
+    params.fence_out.id = channel_fence.id;
+
+    if (params.flags.add_wait.Value() &&
+        !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
+        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+    }
+
+    if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
+        const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
+        params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
+            params.fence_out.id, params.AddIncrementValue() + increment_value);
+    } else {
+        params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
+    }
+
+    entries.RefreshIntegrityChecks(gpu);
+    gpu.PushGPUEntries(std::move(entries));
+
+    if (params.flags.add_increment.Value()) {
+        if (params.flags.suppress_wfi) {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+        } else {
+            gpu.PushGPUEntries(Tegra::CommandList{
+                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+        }
+    }
+
+    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
+    return 0;
+}
+
 u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
        UNIMPLEMENTED();
    }
    IoctlSubmitGpfifo params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
-    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
-              params.num_entries, params.flags.raw);
-
-    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
-                                   params.num_entries * sizeof(Tegra::CommandListHeader),
-               "Incorrect input size");

    Tegra::CommandList entries(params.num_entries);
-    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
+    std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
                params.num_entries * sizeof(Tegra::CommandListHeader));

-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
-
-    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
-    } else {
-        params.fence_out.value = current_syncpoint_value;
-    }
-    gpu.PushGPUEntries(std::move(entries));
-
-    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
-    return 0;
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
 }

 u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
@@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
    }
    IoctlSubmitGpfifo params{};
    std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
-    LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
-              params.num_entries, params.flags.raw);

    Tegra::CommandList entries(params.num_entries);
    if (version == IoctlVersion::Version2) {
-        std::memcpy(entries.data(), input2.data(),
+        std::memcpy(entries.command_lists.data(), input2.data(),
                    params.num_entries * sizeof(Tegra::CommandListHeader));
    } else {
-        system.Memory().ReadBlock(params.address, entries.data(),
+        system.Memory().ReadBlock(params.address, entries.command_lists.data(),
                                  params.num_entries * sizeof(Tegra::CommandListHeader));
    }
-    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
-    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);

-    auto& gpu = system.GPU();
-    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
-    if (params.flags.increment.Value()) {
-        params.fence_out.value += current_syncpoint_value;
-    } else {
-        params.fence_out.value = current_syncpoint_value;
-    }
-    gpu.PushGPUEntries(std::move(entries));
-
-    std::memcpy(output.data(), &params, output.size());
-    return 0;
+    return SubmitGPFIFOImpl(params, output, std::move(entries));
 }

 u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,6 +11,11 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "video_core/dma_pusher.h"
+
+namespace Service::Nvidia {
+class SyncpointManager;
+}

 namespace Service::Nvidia::Devices {

@@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);

 class nvhost_gpu final : public nvdevice {
 public:
-    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
+                        SyncpointManager& syncpoint_manager);
    ~nvhost_gpu() override;

    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -162,10 +168,15 @@ private:
            u32_le raw;
            BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
            BitField<1, 1, u32_le> add_increment; // append an increment to the list
-            BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+            BitField<2, 1, u32_le> new_hw_format; // mostly ignored
+            BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
            BitField<8, 1, u32_le> increment;     // increment the returned fence
        } flags;
        Fence fence_out; // returned new fence object for others to wait on
+
+        u32 AddIncrementValue() const {
+            return flags.add_increment.Value() << 1;
+        }
    };
    static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
                  "IoctlSubmitGpfifo is incorrect size");
@@ -190,6 +201,8 @@ private:
    u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
    u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
+                         Tegra::CommandList&& entries);
    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
    u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
                  const std::vector<u8>& input2, IoctlVersion version);
@@ -198,7 +211,8 @@ private:
    u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);

    std::shared_ptr<nvmap> nvmap_dev;
-    u32 assigned_syncpoints{};
+    SyncpointManager& syncpoint_manager;
+    Fence channel_fence;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -2,15 +2,17 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <cstring>
-
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_base.h"

 namespace Service::Nvidia::Devices {

-nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {}
+nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
+    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
 nvhost_nvdec::~nvhost_nvdec() = default;

 u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -21,7 +23,7 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::

    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input, output);
+        return SetNVMAPfd(input);
    case IoctlCommand::IocSubmit:
        return Submit(input, output);
    case IoctlCommand::IocGetSyncpoint:
@@ -29,79 +31,29 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
    case IoctlCommand::IocGetWaitbase:
        return GetWaitbase(input, output);
    case IoctlCommand::IocMapBuffer:
-        return MapBuffer(input, output);
+    case IoctlCommand::IocMapBuffer2:
+    case IoctlCommand::IocMapBuffer3:
    case IoctlCommand::IocMapBufferEx:
-        return MapBufferEx(input, output);
-    case IoctlCommand::IocUnmapBufferEx:
-        return UnmapBufferEx(input, output);
+        return MapBuffer(input, output);
+    case IoctlCommand::IocUnmapBufferEx: {
+        // This command is sent when the video stream has ended, flush all video contexts
+        // This is usually sent in the folowing order: vic, nvdec, vic.
+        // Inform the GPU to clear any remaining nvdec buffers when this is detected.
+        LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
+        Tegra::ChCommandHeaderList cmdlist(1);
+        cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
+        system.GPU().PushCommandBuffer(cmdlist);
+        [[fallthrough]]; // fallthrough to unmap buffers
+    };
+    case IoctlCommand::IocUnmapBuffer:
+    case IoctlCommand::IocUnmapBuffer2:
+    case IoctlCommand::IocUnmapBuffer3:
+        return UnmapBuffer(input, output);
+    case IoctlCommand::IocSetSubmitTimeout:
+        return SetSubmitTimeout(input, output);
    }

-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
-}
-
-u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlSetNvmapFD params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
-    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
-
-    nvmap_fd = params.nvmap_fd;
-    return 0;
-}
-
-u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlSubmit params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
-    return 0;
-}
-
-u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlGetSyncpoint params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
-    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
-    params.value = 0; // Seems to be hard coded at 0
-    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
-    return 0;
-}
-
-u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlGetWaitbase params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
-    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
-    params.value = 0; // Seems to be hard coded at 0
-    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
-    return 0;
-}
-
-u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlMapBuffer params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
-                params.address_1);
-    params.address_1 = 0;
-    params.address_2 = 0;
-    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
-    return 0;
-}
-
-u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlMapBufferEx params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
-                params.address_1);
-    params.address_1 = 0;
-    params.address_2 = 0;
-    std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
-    return 0;
-}
-
-u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlUnmapBufferEx params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-    std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
+    UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
    return 0;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -4,16 +4,14 @@

 #pragma once

-#include <vector>
-#include "common/common_types.h"
-#include "common/swap.h"
-#include "core/hle/service/nvdrv/devices/nvdevice.h"
+#include <memory>
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"

 namespace Service::Nvidia::Devices {

-class nvhost_nvdec final : public nvdevice {
+class nvhost_nvdec final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_nvdec(Core::System& system);
+    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_nvdec() override;

    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -27,62 +25,15 @@ private:
        IocGetSyncpoint = 0xC0080002,
        IocGetWaitbase = 0xC0080003,
        IocMapBuffer = 0xC01C0009,
+        IocMapBuffer2 = 0xC16C0009,
+        IocMapBuffer3 = 0xC15C0009,
        IocMapBufferEx = 0xC0A40009,
-        IocUnmapBufferEx = 0xC0A4000A,
+        IocUnmapBuffer = 0xC0A4000A,
+        IocUnmapBuffer2 = 0xC16C000A,
+        IocUnmapBufferEx = 0xC01C000A,
+        IocUnmapBuffer3 = 0xC15C000A,
+        IocSetSubmitTimeout = 0x40040007,
    };
-
-    struct IoctlSetNvmapFD {
-        u32_le nvmap_fd;
-    };
-    static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size");
-
-    struct IoctlSubmit {
-        INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size");
-
-    struct IoctlGetSyncpoint {
-        u32 unknown; // seems to be ignored? Nintendo added this
-        u32 value;
-    };
-    static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size");
-
-    struct IoctlGetWaitbase {
-        u32 unknown; // seems to be ignored? Nintendo added this
-        u32 value;
-    };
-    static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size");
-
-    struct IoctlMapBuffer {
-        u32 unknown;
-        u32 address_1;
-        u32 address_2;
-        INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
-
-    struct IoctlMapBufferEx {
-        u32 unknown;
-        u32 address_1;
-        u32 address_2;
-        INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size");
-
-    struct IoctlUnmapBufferEx {
-        INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size");
-
-    u32_le nvmap_fd{};
-
-    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -0,0 +1,234 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstring>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
+#include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_base.h"
+
+namespace Service::Nvidia::Devices {
+
+namespace {
+// Splice vectors will copy count amount of type T from the input vector into the dst vector.
+template <typename T>
+std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
+                          std::size_t offset) {
+    std::memcpy(dst.data(), input.data() + offset, count * sizeof(T));
+    offset += count * sizeof(T);
+    return offset;
+}
+
+// Write vectors will write data to the output buffer
+template <typename T>
+std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
+    std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
+    offset += src.size() * sizeof(T);
+    return offset;
+}
+} // Anonymous namespace
+
+namespace NvErrCodes {
+constexpr u32 Success{};
+[[maybe_unused]] constexpr u32 OutOfMemory{static_cast<u32>(-12)};
+constexpr u32 InvalidInput{static_cast<u32>(-22)};
+} // namespace NvErrCodes
+
+nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
+    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
+nvhost_nvdec_common::~nvhost_nvdec_common() = default;
+
+u32 nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
+    IoctlSetNvmapFD params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
+    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
+
+    nvmap_fd = params.nvmap_fd;
+    return 0;
+}
+
+u32 nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSubmit params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
+    LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
+
+    // Instantiate param buffers
+    std::size_t offset = sizeof(IoctlSubmit);
+    std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
+    std::vector<Reloc> relocs(params.relocation_count);
+    std::vector<u32> reloc_shifts(params.relocation_count);
+    std::vector<SyncptIncr> syncpt_increments(params.syncpoint_count);
+    std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
+    std::vector<Fence> fences(params.fence_count);
+
+    // Splice input into their respective buffers
+    offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
+    offset = SpliceVectors(input, relocs, params.relocation_count, offset);
+    offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
+    offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
+    offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
+    offset = SpliceVectors(input, fences, params.fence_count, offset);
+
+    // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
+
+    auto& gpu = system.GPU();
+
+    for (const auto& cmd_buffer : command_buffers) {
+        auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
+        ASSERT_OR_EXECUTE(object, return NvErrCodes::InvalidInput;);
+        const auto map = FindBufferMap(object->dma_map_addr);
+        if (!map) {
+            LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}",
+                      object->addr, object->dma_map_addr);
+            return 0;
+        }
+        Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
+        gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(),
+                                      cmdlist.size() * sizeof(u32));
+        gpu.PushCommandBuffer(cmdlist);
+    }
+
+    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
+    // Some games expect command_buffers to be written back
+    offset = sizeof(IoctlSubmit);
+    offset = WriteVectors(output, command_buffers, offset);
+    offset = WriteVectors(output, relocs, offset);
+    offset = WriteVectors(output, reloc_shifts, offset);
+    offset = WriteVectors(output, syncpt_increments, offset);
+    offset = WriteVectors(output, wait_checks, offset);
+
+    return NvErrCodes::Success;
+}
+
+u32 nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetSyncpoint params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
+    LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
+
+    // We found that implementing this causes deadlocks with async gpu, along with degraded
+    // performance. TODO: RE the nvdec async implementation
+    params.value = 0;
+    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
+
+    return NvErrCodes::Success;
+}
+
+u32 nvhost_nvdec_common::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlGetWaitbase params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
+    params.value = 0; // Seems to be hard coded at 0
+    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
+    return 0;
+}
+
+u32 nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBuffer params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
+    std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
+
+    SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
+
+    auto& gpu = system.GPU();
+
+    for (auto& cmf_buff : cmd_buffer_handles) {
+        auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
+        if (!object) {
+            LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
+            std::memcpy(output.data(), &params, output.size());
+            return NvErrCodes::InvalidInput;
+        }
+        if (object->dma_map_addr == 0) {
+            // NVDEC and VIC memory is in the 32-bit address space
+            // MapAllocate32 will attempt to map a lower 32-bit value in the shared gpu memory space
+            const GPUVAddr low_addr = gpu.MemoryManager().MapAllocate32(object->addr, object->size);
+            object->dma_map_addr = static_cast<u32>(low_addr);
+            // Ensure that the dma_map_addr is indeed in the lower 32-bit address space.
+            ASSERT(object->dma_map_addr == low_addr);
+        }
+        if (!object->dma_map_addr) {
+            LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
+        } else {
+            cmf_buff.map_address = object->dma_map_addr;
+            AddBufferMap(object->dma_map_addr, object->size, object->addr,
+                         object->status == nvmap::Object::Status::Allocated);
+        }
+    }
+    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
+    std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(),
+                cmd_buffer_handles.size() * sizeof(MapBufferEntry));
+
+    return NvErrCodes::Success;
+}
+
+u32 nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlMapBuffer params{};
+    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
+    std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
+    SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
+
+    auto& gpu = system.GPU();
+
+    for (auto& cmf_buff : cmd_buffer_handles) {
+        const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
+        if (!object) {
+            LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
+            std::memcpy(output.data(), &params, output.size());
+            return NvErrCodes::InvalidInput;
+        }
+        if (const auto size{RemoveBufferMap(object->dma_map_addr)}; size) {
+            gpu.MemoryManager().Unmap(object->dma_map_addr, *size);
+        } else {
+            // This occurs quite frequently, however does not seem to impact functionality
+            LOG_DEBUG(Service_NVDRV, "invalid offset=0x{:X} dma=0x{:X}", object->addr,
+                      object->dma_map_addr);
+        }
+        object->dma_map_addr = 0;
+    }
+    std::memset(output.data(), 0, output.size());
+    return NvErrCodes::Success;
+}
+
+u32 nvhost_nvdec_common::SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output) {
+    std::memcpy(&submit_timeout, input.data(), input.size());
+    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
+    return NvErrCodes::Success;
+}
+
+std::optional<nvhost_nvdec_common::BufferMap> nvhost_nvdec_common::FindBufferMap(
+    GPUVAddr gpu_addr) const {
+    const auto it = std::find_if(
+        buffer_mappings.begin(), buffer_mappings.upper_bound(gpu_addr), [&](const auto& entry) {
+            return (gpu_addr >= entry.second.StartAddr() && gpu_addr < entry.second.EndAddr());
+        });
+
+    ASSERT(it != buffer_mappings.end());
+    return it->second;
+}
+
+void nvhost_nvdec_common::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
+                                       bool is_allocated) {
+    buffer_mappings.insert_or_assign(gpu_addr, BufferMap{gpu_addr, size, cpu_addr, is_allocated});
+}
+
+std::optional<std::size_t> nvhost_nvdec_common::RemoveBufferMap(GPUVAddr gpu_addr) {
+    const auto iter{buffer_mappings.find(gpu_addr)};
+    if (iter == buffer_mappings.end()) {
+        return std::nullopt;
+    }
+    std::size_t size = 0;
+    if (iter->second.IsAllocated()) {
+        size = iter->second.Size();
+    }
+    buffer_mappings.erase(iter);
+    return size;
+}
+
+} // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -0,0 +1,168 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service::Nvidia::Devices {
+class nvmap;
+
+class nvhost_nvdec_common : public nvdevice {
+public:
+    explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    ~nvhost_nvdec_common() override;
+
+    virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
+                      std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
+                      IoctlVersion version) = 0;
+
+protected:
+    class BufferMap final {
+    public:
+        constexpr BufferMap() = default;
+
+        constexpr BufferMap(GPUVAddr start_addr, std::size_t size)
+            : start_addr{start_addr}, end_addr{start_addr + size} {}
+
+        constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr,
+                            bool is_allocated)
+            : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr},
+              is_allocated{is_allocated} {}
+
+        constexpr VAddr StartAddr() const {
+            return start_addr;
+        }
+
+        constexpr VAddr EndAddr() const {
+            return end_addr;
+        }
+
+        constexpr std::size_t Size() const {
+            return end_addr - start_addr;
+        }
+
+        constexpr VAddr CpuAddr() const {
+            return cpu_addr;
+        }
+
+        constexpr bool IsAllocated() const {
+            return is_allocated;
+        }
+
+    private:
+        GPUVAddr start_addr{};
+        GPUVAddr end_addr{};
+        VAddr cpu_addr{};
+        bool is_allocated{};
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    struct IoctlSubmitCommandBuffer {
+        u32_le id;
+        u32_le offset;
+        u32_le count;
+    };
+    static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
+                  "IoctlSubmitCommandBuffer is incorrect size");
+    struct IoctlSubmit {
+        u32_le cmd_buffer_count;
+        u32_le relocation_count;
+        u32_le syncpoint_count;
+        u32_le fence_count;
+    };
+    static_assert(sizeof(IoctlSubmit) == 0x10, "IoctlSubmit has incorrect size");
+
+    struct CommandBuffer {
+        s32 memory_id;
+        u32 offset;
+        s32 word_count;
+    };
+    static_assert(sizeof(CommandBuffer) == 0xC, "CommandBuffer has incorrect size");
+
+    struct Reloc {
+        s32 cmdbuffer_memory;
+        s32 cmdbuffer_offset;
+        s32 target;
+        s32 target_offset;
+    };
+    static_assert(sizeof(Reloc) == 0x10, "CommandBuffer has incorrect size");
+
+    struct SyncptIncr {
+        u32 id;
+        u32 increments;
+    };
+    static_assert(sizeof(SyncptIncr) == 0x8, "CommandBuffer has incorrect size");
+
+    struct Fence {
+        u32 id;
+        u32 value;
+    };
+    static_assert(sizeof(Fence) == 0x8, "CommandBuffer has incorrect size");
+
+    struct IoctlGetSyncpoint {
+        // Input
+        u32_le param;
+        // Output
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlGetSyncpoint) == 8, "IocGetIdParams has wrong size");
+
+    struct IoctlGetWaitbase {
+        u32_le unknown; // seems to be ignored? Nintendo added this
+        u32_le value;
+    };
+    static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
+
+    struct IoctlMapBuffer {
+        u32_le num_entries;
+        u32_le data_address; // Ignored by the driver.
+        u32_le attach_host_ch_das;
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
+
+    struct IocGetIdParams {
+        // Input
+        u32_le param;
+        // Output
+        u32_le value;
+    };
+    static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
+
+    // Used for mapping and unmapping command buffers
+    struct MapBufferEntry {
+        u32_le map_handle;
+        u32_le map_address;
+    };
+    static_assert(sizeof(IoctlMapBuffer) == 0x0C, "IoctlMapBuffer is incorrect size");
+
+    /// Ioctl command implementations
+    u32 SetNVMAPfd(const std::vector<u8>& input);
+    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
+    u32 SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
+
+    std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const;
+    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
+    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
+
+    u32_le nvmap_fd{};
+    u32_le submit_timeout{};
+    std::shared_ptr<nvmap> nvmap_dev;
+
+    // This is expected to be ordered, therefore we must use a map, not unordered_map
+    std::map<GPUVAddr, BufferMap> buffer_mappings;
+};
+}; // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -2,15 +2,17 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <cstring>
-
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_vic.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_base.h"

 namespace Service::Nvidia::Devices {
+nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
+    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}

-nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {}
 nvhost_vic::~nvhost_vic() = default;

 u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -21,7 +23,7 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve

    switch (static_cast<IoctlCommand>(command.raw)) {
    case IoctlCommand::IocSetNVMAPfdCommand:
-        return SetNVMAPfd(input, output);
+        return SetNVMAPfd(input);
    case IoctlCommand::IocSubmit:
        return Submit(input, output);
    case IoctlCommand::IocGetSyncpoint:
@@ -29,83 +31,19 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
    case IoctlCommand::IocGetWaitbase:
        return GetWaitbase(input, output);
    case IoctlCommand::IocMapBuffer:
-        return MapBuffer(input, output);
+    case IoctlCommand::IocMapBuffer2:
+    case IoctlCommand::IocMapBuffer3:
+    case IoctlCommand::IocMapBuffer4:
    case IoctlCommand::IocMapBufferEx:
        return MapBuffer(input, output);
+    case IoctlCommand::IocUnmapBuffer:
+    case IoctlCommand::IocUnmapBuffer2:
+    case IoctlCommand::IocUnmapBuffer3:
    case IoctlCommand::IocUnmapBufferEx:
-        return UnmapBufferEx(input, output);
+        return UnmapBuffer(input, output);
    }

-    UNIMPLEMENTED_MSG("Unimplemented ioctl");
-    return 0;
-}
-
-u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlSetNvmapFD params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
-    LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
-
-    nvmap_fd = params.nvmap_fd;
-    return 0;
-}
-
-u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlSubmit params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-
-    // Workaround for Luigi's Mansion 3, as nvhost_vic is not implemented for asynch GPU
-    params.command_buffer = {};
-
-    std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
-    return 0;
-}
-
-u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlGetSyncpoint params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
-    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
-    params.value = 0; // Seems to be hard coded at 0
-    std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
-    return 0;
-}
-
-u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlGetWaitbase params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
-    LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
-    params.value = 0; // Seems to be hard coded at 0
-    std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
-    return 0;
-}
-
-u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlMapBuffer params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
-                params.address_1);
-    params.address_1 = 0;
-    params.address_2 = 0;
-    std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
-    return 0;
-}
-
-u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlMapBufferEx params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
-                params.address_1);
-    params.address_1 = 0;
-    params.address_2 = 0;
-    std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
-    return 0;
-}
-
-u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
-    IoctlUnmapBufferEx params{};
-    std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called");
-    std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
+    UNIMPLEMENTED_MSG("Unimplemented ioctl 0x{:X}", command.raw);
    return 0;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -4,19 +4,15 @@

 #pragma once

-#include <array>
-#include <vector>
-#include "common/common_types.h"
-#include "common/swap.h"
-#include "core/hle/service/nvdrv/devices/nvdevice.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"

 namespace Service::Nvidia::Devices {
+class nvmap;

-class nvhost_vic final : public nvdevice {
+class nvhost_vic final : public nvhost_nvdec_common {
 public:
-    explicit nvhost_vic(Core::System& system);
-    ~nvhost_vic() override;
-
+    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
+    ~nvhost_vic();
    u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
              std::vector<u8>& output, std::vector<u8>& output2, IoctlCtrl& ctrl,
              IoctlVersion version) override;
@@ -28,74 +24,14 @@ private:
        IocGetSyncpoint = 0xC0080002,
        IocGetWaitbase = 0xC0080003,
        IocMapBuffer = 0xC01C0009,
+        IocMapBuffer2 = 0xC0340009,
+        IocMapBuffer3 = 0xC0140009,
+        IocMapBuffer4 = 0xC00C0009,
        IocMapBufferEx = 0xC03C0009,
-        IocUnmapBufferEx = 0xC03C000A,
+        IocUnmapBuffer = 0xC03C000A,
+        IocUnmapBuffer2 = 0xC034000A,
+        IocUnmapBuffer3 = 0xC00C000A,
+        IocUnmapBufferEx = 0xC01C000A,
    };
-
-    struct IoctlSetNvmapFD {
-        u32_le nvmap_fd;
-    };
-    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
-
-    struct IoctlSubmitCommandBuffer {
-        u32 id;
-        u32 offset;
-        u32 count;
-    };
-    static_assert(sizeof(IoctlSubmitCommandBuffer) == 0xC,
-                  "IoctlSubmitCommandBuffer is incorrect size");
-
-    struct IoctlSubmit {
-        u32 command_buffer_count;
-        u32 relocations_count;
-        u32 syncpt_count;
-        u32 wait_count;
-        std::array<IoctlSubmitCommandBuffer, 4> command_buffer;
-    };
-    static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size");
-
-    struct IoctlGetSyncpoint {
-        u32 unknown; // seems to be ignored? Nintendo added this
-        u32 value;
-    };
-    static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size");
-
-    struct IoctlGetWaitbase {
-        u32 unknown; // seems to be ignored? Nintendo added this
-        u32 value;
-    };
-    static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
-
-    struct IoctlMapBuffer {
-        u32 unknown;
-        u32 address_1;
-        u32 address_2;
-        INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
-
-    struct IoctlMapBufferEx {
-        u32 unknown;
-        u32 address_1;
-        u32 address_2;
-        INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size");
-
-    struct IoctlUnmapBufferEx {
-        INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure
-    };
-    static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size");
-
-    u32_le nvmap_fd{};
-
-    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
-    u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
 };
-
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -37,6 +37,7 @@ public:
        VAddr addr;
        Status status;
        u32 refcount;
+        u32 dma_map_addr;
    };

    std::shared_ptr<Object> GetObject(u32 handle) const {
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -21,6 +21,7 @@
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvdrv/nvmemp.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/nvflinger/nvflinger.h"

 namespace Service::Nvidia {
@@ -36,24 +37,26 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
    nvflinger.SetNVDrvInstance(module_);
 }

-Module::Module(Core::System& system) {
+Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
    auto& kernel = system.Kernel();
    for (u32 i = 0; i < MaxNvEvents; i++) {
        std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
-        events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
+        events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
        events_interface.status[i] = EventState::Free;
        events_interface.registered[i] = false;
    }
    auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
    devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
-    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
+    devices["/dev/nvhost-gpu"] =
+        std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
    devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
-    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
-    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system);
+    devices["/dev/nvhost-ctrl"] =
+        std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
+    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
    devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
-    devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system);
+    devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
 }

 Module::~Module() = default;
@@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
        if (events_interface.assigned_syncpt[i] == syncpoint_id &&
            events_interface.assigned_value[i] == value) {
            events_interface.LiberateEvent(i);
-            events_interface.events[i].writable->Signal();
+            events_interface.events[i].event.writable->Signal();
        }
    }
 }

 std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
-    return events_interface.events[event_id].readable;
+    return events_interface.events[event_id].event.readable;
 }

 std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
-    return events_interface.events[event_id].writable;
+    return events_interface.events[event_id].event.writable;
 }

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nvdrv/nvdata.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
 #include "core/hle/service/service.h"

 namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;

 namespace Service::Nvidia {

+class SyncpointManager;
+
 namespace Devices {
 class nvdevice;
 }

+/// Represents an Nvidia event
+struct NvEvent {
+    Kernel::EventPair event;
+    Fence fence{};
+};
+
 struct EventInterface {
    // Mask representing currently busy events
    u64 events_mask{};
    // Each kernel event associated to an NV event
-    std::array<Kernel::EventPair, MaxNvEvents> events;
+    std::array<NvEvent, MaxNvEvents> events;
    // The status of the current NVEvent
    std::array<EventState, MaxNvEvents> status{};
    // Tells if an NVEvent is registered or not
@@ -119,6 +128,9 @@ public:
    std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;

 private:
+    /// Manages syncpoints on the host
+    SyncpointManager syncpoint_manager;
+
    /// Id to use for the next open file descriptor.
    u32 next_fd = 1;

--- a/src/core/hle/service/nvdrv/syncpoint_manager.cpp
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/hle/service/nvdrv/syncpoint_manager.h"
+#include "video_core/gpu.h"
+
+namespace Service::Nvidia {
+
+SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
+
+SyncpointManager::~SyncpointManager() = default;
+
+u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
+    syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
+    return GetSyncpointMin(syncpoint_id);
+}
+
+u32 SyncpointManager::AllocateSyncpoint() {
+    for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
+        if (!syncpoints[syncpoint_id].is_allocated) {
+            syncpoints[syncpoint_id].is_allocated = true;
+            return syncpoint_id;
+        }
+    }
+    UNREACHABLE_MSG("No more available syncpoints!");
+    return {};
+}
+
+u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
+    for (u32 index = 0; index < value; ++index) {
+        syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    return GetSyncpointMax(syncpoint_id);
+}
+
+} // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/syncpoint_manager.h
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Service::Nvidia {
+
+class SyncpointManager final {
+public:
+    explicit SyncpointManager(Tegra::GPU& gpu);
+    ~SyncpointManager();
+
+    /**
+     * Returns true if the specified syncpoint is expired for the given value.
+     * @param syncpoint_id Syncpoint ID to check.
+     * @param value Value to check against the specified syncpoint.
+     * @returns True if the specified syncpoint is expired for the given value, otherwise False.
+     */
+    bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
+        return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
+    }
+
+    /**
+     * Gets the lower bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the lower bound for.
+     * @returns The lower bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMin(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Gets the uper bound for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to get the upper bound for.
+     * @returns The upper bound for the specified syncpoint.
+     */
+    u32 GetSyncpointMax(u32 syncpoint_id) const {
+        return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
+    }
+
+    /**
+     * Refreshes the minimum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be refreshed.
+     * @returns The new syncpoint minimum value.
+     */
+    u32 RefreshSyncpoint(u32 syncpoint_id);
+
+    /**
+     * Allocates a new syncoint.
+     * @returns The syncpoint ID for the newly allocated syncpoint.
+     */
+    u32 AllocateSyncpoint();
+
+    /**
+     * Increases the maximum value for the specified syncpoint.
+     * @param syncpoint_id Syncpoint ID to be increased.
+     * @param value Value to increase the specified syncpoint by.
+     * @returns The new syncpoint maximum value.
+     */
+    u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
+
+private:
+    struct Syncpoint {
+        std::atomic<u32> min;
+        std::atomic<u32> max;
+        std::atomic<bool> is_allocated;
+    };
+
+    std::array<Syncpoint, MaxSyncPoints> syncpoints{};
+
+    Tegra::GPU& gpu;
+};
+
+} // namespace Service::Nvidia
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -29,6 +29,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
        .slot = slot,
        .status = Buffer::Status::Free,
        .igbp_buffer = igbp_buffer,
+        .transform = {},
+        .crop_rect = {},
+        .swap_interval = 0,
+        .multi_fence = {},
    });

    buffer_wait_event.writable->Signal();
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

+        if (!system.IsPoweredOn()) {
+            return; // We are likely shutting down
+        }
+
        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        guard->unlock();
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -202,6 +202,7 @@ SET::SET() : ServiceFramework("set") {
        {8, &SET::GetQuestFlag, "GetQuestFlag"},
        {9, &SET::GetKeyCodeMap2, "GetKeyCodeMap2"},
        {10, nullptr, "GetFirmwareVersionForDebug"},
+        {11, nullptr, "GetDeviceNickName"},
    };
    // clang-format on

--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -300,6 +300,8 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
        {198, nullptr, "SetButtonConfigRegisteredSettingsEmbedded"},
        {199, nullptr, "GetButtonConfigRegisteredSettings"},
        {200, nullptr, "SetButtonConfigRegisteredSettings"},
+        {201, nullptr, "GetFieldTestingFlag"},
+        {202, nullptr, "SetFieldTestingFlag"},
    };
    // clang-format on

--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -14,7 +14,7 @@
 namespace Settings {

 Values values = {};
-bool configuring_global = true;
+static bool configuring_global = true;

 std::string GetTimeZoneString() {
    static constexpr std::array timezones{
@@ -63,6 +63,7 @@ void LogSettings() {
    log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
    log_setting("Renderer_UseAsynchronousGpuEmulation",
                values.use_asynchronous_gpu_emulation.GetValue());
+    log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
    log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
    log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
    log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
@@ -80,11 +81,12 @@ void LogSettings() {
    log_setting("Services_BCATBoxcatLocal", values.bcat_boxcat_local);
 }

-float Volume() {
-    if (values.audio_muted) {
-        return 0.0f;
-    }
-    return values.volume.GetValue();
+bool IsConfiguringGlobal() {
+    return configuring_global;
+}
+
+void SetConfiguringGlobal(bool is_global) {
+    configuring_global = is_global;
 }

 bool IsGPULevelExtreme() {
@@ -96,6 +98,13 @@ bool IsGPULevelHigh() {
           values.gpu_accuracy.GetValue() == GPUAccuracy::High;
 }

+float Volume() {
+    if (values.audio_muted) {
+        return 0.0f;
+    }
+    return values.volume.GetValue();
+}
+
 void RestoreGlobalState() {
    // If a game is running, DO NOT restore the global settings state
    if (Core::System::GetInstance().IsPoweredOn()) {
@@ -119,6 +128,7 @@ void RestoreGlobalState() {
    values.use_disk_shader_cache.SetGlobal(true);
    values.gpu_accuracy.SetGlobal(true);
    values.use_asynchronous_gpu_emulation.SetGlobal(true);
+    values.use_nvdec_emulation.SetGlobal(true);
    values.use_vsync.SetGlobal(true);
    values.use_assembly_shaders.SetGlobal(true);
    values.use_asynchronous_shaders.SetGlobal(true);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -33,8 +33,6 @@ enum class CPUAccuracy {
    DebugMode = 2,
 };

-extern bool configuring_global;
-
 template <typename Type>
 class Setting final {
 public:
@@ -111,6 +109,7 @@ struct Values {
    Setting<bool> use_disk_shader_cache;
    Setting<GPUAccuracy> gpu_accuracy;
    Setting<bool> use_asynchronous_gpu_emulation;
+    Setting<bool> use_nvdec_emulation;
    Setting<bool> use_vsync;
    Setting<bool> use_assembly_shaders;
    Setting<bool> use_asynchronous_shaders;
@@ -197,13 +196,18 @@ struct Values {

    // Add-Ons
    std::map<u64, std::vector<std::string>> disabled_addons;
-} extern values;
+};

-float Volume();
+extern Values values;
+
+bool IsConfiguringGlobal();
+void SetConfiguringGlobal(bool is_global);

 bool IsGPULevelExtreme();
 bool IsGPULevelHigh();

+float Volume();
+
 std::string GetTimeZoneString();

 void Apply();
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -206,6 +206,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation.GetValue());
+    AddField(field_type, "Renderer_UseNvdecEmulation",
+             Settings::values.use_nvdec_emulation.GetValue());
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
    AddField(field_type, "Renderer_UseAssemblyShaders",
             Settings::values.use_assembly_shaders.GetValue());
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@@ -21,26 +21,6 @@

 namespace GCAdapter {

-// Used to loop through and assign button in poller
-constexpr std::array<PadButton, 12> PadButtonArray{
-    PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT, PadButton::PAD_BUTTON_DOWN,
-    PadButton::PAD_BUTTON_UP,   PadButton::PAD_TRIGGER_Z,    PadButton::PAD_TRIGGER_R,
-    PadButton::PAD_TRIGGER_L,   PadButton::PAD_BUTTON_A,     PadButton::PAD_BUTTON_B,
-    PadButton::PAD_BUTTON_X,    PadButton::PAD_BUTTON_Y,     PadButton::PAD_BUTTON_START,
-};
-
-static void PadToState(const GCPadStatus& pad, GCState& out_state) {
-    for (const auto& button : PadButtonArray) {
-        const auto button_key = static_cast<u16>(button);
-        const auto button_value = (pad.button & button_key) != 0;
-        out_state.buttons.insert_or_assign(static_cast<s32>(button_key), button_value);
-    }
-
-    for (std::size_t i = 0; i < pad.axis_values.size(); ++i) {
-        out_state.axes.insert_or_assign(static_cast<u32>(i), pad.axis_values[i]);
-    }
-}
-
 Adapter::Adapter() {
    if (usb_adapter_handle != nullptr) {
        return;
@@ -49,168 +29,263 @@ Adapter::Adapter() {

    const int init_res = libusb_init(&libusb_ctx);
    if (init_res == LIBUSB_SUCCESS) {
-        Setup();
+        adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
    } else {
        LOG_ERROR(Input, "libusb could not be initialized. failed with error = {}", init_res);
    }
 }

-GCPadStatus Adapter::GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload) {
-    GCPadStatus pad = {};
-    const std::size_t offset = 1 + (9 * port);
+Adapter::~Adapter() {
+    Reset();
+}

-    adapter_controllers_status[port] = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+void Adapter::AdapterInputThread() {
+    LOG_DEBUG(Input, "GC Adapter input thread started");
+    s32 payload_size{};
+    AdapterPayload adapter_payload{};
+
+    if (adapter_scan_thread.joinable()) {
+        adapter_scan_thread.join();
+    }
+
+    while (adapter_input_thread_running) {
+        libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
+                                  static_cast<s32>(adapter_payload.size()), &payload_size, 16);
+        if (IsPayloadCorrect(adapter_payload, payload_size)) {
+            UpdateControllers(adapter_payload);
+            UpdateVibrations();
+        }
+        std::this_thread::yield();
+    }
+
+    if (restart_scan_thread) {
+        adapter_scan_thread = std::thread(&Adapter::AdapterScanThread, this);
+        restart_scan_thread = false;
+    }
+}
+
+bool Adapter::IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size) {
+    if (payload_size != static_cast<s32>(adapter_payload.size()) ||
+        adapter_payload[0] != LIBUSB_DT_HID) {
+        LOG_DEBUG(Input, "Error reading payload (size: {}, type: {:02x})", payload_size,
+                  adapter_payload[0]);
+        if (input_error_counter++ > 20) {
+            LOG_ERROR(Input, "GC adapter timeout, Is the adapter connected?");
+            adapter_input_thread_running = false;
+            restart_scan_thread = true;
+        }
+        return false;
+    }
+
+    input_error_counter = 0;
+    return true;
+}
+
+void Adapter::UpdateControllers(const AdapterPayload& adapter_payload) {
+    for (std::size_t port = 0; port < pads.size(); ++port) {
+        const std::size_t offset = 1 + (9 * port);
+        const auto type = static_cast<ControllerTypes>(adapter_payload[offset] >> 4);
+        UpdatePadType(port, type);
+        if (DeviceConnected(port)) {
+            const u8 b1 = adapter_payload[offset + 1];
+            const u8 b2 = adapter_payload[offset + 2];
+            UpdateStateButtons(port, b1, b2);
+            UpdateStateAxes(port, adapter_payload);
+            if (configuring) {
+                UpdateYuzuSettings(port);
+            }
+        }
+    }
+}
+
+void Adapter::UpdatePadType(std::size_t port, ControllerTypes pad_type) {
+    if (pads[port].type == pad_type) {
+        return;
+    }
+    // Device changed reset device and set new type
+    ResetDevice(port);
+    pads[port].type = pad_type;
+}
+
+void Adapter::UpdateStateButtons(std::size_t port, u8 b1, u8 b2) {
+    if (port >= pads.size()) {
+        return;
+    }

    static constexpr std::array<PadButton, 8> b1_buttons{
-        PadButton::PAD_BUTTON_A,    PadButton::PAD_BUTTON_B,    PadButton::PAD_BUTTON_X,
-        PadButton::PAD_BUTTON_Y,    PadButton::PAD_BUTTON_LEFT, PadButton::PAD_BUTTON_RIGHT,
-        PadButton::PAD_BUTTON_DOWN, PadButton::PAD_BUTTON_UP,
+        PadButton::ButtonA,    PadButton::ButtonB,     PadButton::ButtonX,    PadButton::ButtonY,
+        PadButton::ButtonLeft, PadButton::ButtonRight, PadButton::ButtonDown, PadButton::ButtonUp,
    };

    static constexpr std::array<PadButton, 4> b2_buttons{
-        PadButton::PAD_BUTTON_START,
-        PadButton::PAD_TRIGGER_Z,
-        PadButton::PAD_TRIGGER_R,
-        PadButton::PAD_TRIGGER_L,
+        PadButton::ButtonStart,
+        PadButton::TriggerZ,
+        PadButton::TriggerR,
+        PadButton::TriggerL,
    };
+    pads[port].buttons = 0;
+    for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
+        if ((b1 & (1U << i)) != 0) {
+            pads[port].buttons =
+                static_cast<u16>(pads[port].buttons | static_cast<u16>(b1_buttons[i]));
+            pads[port].last_button = b1_buttons[i];
+        }
+    }

+    for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
+        if ((b2 & (1U << j)) != 0) {
+            pads[port].buttons =
+                static_cast<u16>(pads[port].buttons | static_cast<u16>(b2_buttons[j]));
+            pads[port].last_button = b2_buttons[j];
+        }
+    }
+}
+
+void Adapter::UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload) {
+    if (port >= pads.size()) {
+        return;
+    }
+
+    const std::size_t offset = 1 + (9 * port);
    static constexpr std::array<PadAxes, 6> axes{
        PadAxes::StickX,    PadAxes::StickY,      PadAxes::SubstickX,
        PadAxes::SubstickY, PadAxes::TriggerLeft, PadAxes::TriggerRight,
    };

-    if (adapter_controllers_status[port] == ControllerTypes::None && !get_origin[port]) {
-        // Controller may have been disconnected, recalibrate if reconnected.
-        get_origin[port] = true;
+    for (const PadAxes axis : axes) {
+        const auto index = static_cast<std::size_t>(axis);
+        const u8 axis_value = adapter_payload[offset + 3 + index];
+        if (pads[port].axis_origin[index] == 255) {
+            pads[port].axis_origin[index] = axis_value;
+        }
+        pads[port].axis_values[index] =
+            static_cast<s16>(axis_value - pads[port].axis_origin[index]);
    }
-
-    if (adapter_controllers_status[port] != ControllerTypes::None) {
-        const u8 b1 = adapter_payload[offset + 1];
-        const u8 b2 = adapter_payload[offset + 2];
-
-        for (std::size_t i = 0; i < b1_buttons.size(); ++i) {
-            if ((b1 & (1U << i)) != 0) {
-                pad.button = static_cast<u16>(pad.button | static_cast<u16>(b1_buttons[i]));
-            }
-        }
-
-        for (std::size_t j = 0; j < b2_buttons.size(); ++j) {
-            if ((b2 & (1U << j)) != 0) {
-                pad.button = static_cast<u16>(pad.button | static_cast<u16>(b2_buttons[j]));
-            }
-        }
-        for (PadAxes axis : axes) {
-            const auto index = static_cast<std::size_t>(axis);
-            pad.axis_values[index] = adapter_payload[offset + 3 + index];
-        }
-
-        if (get_origin[port]) {
-            origin_status[port].axis_values = pad.axis_values;
-            get_origin[port] = false;
-        }
-    }
-    return pad;
 }

-void Adapter::Read() {
-    LOG_DEBUG(Input, "GC Adapter Read() thread started");
+void Adapter::UpdateYuzuSettings(std::size_t port) {
+    if (port >= pads.size()) {
+        return;
+    }

-    int payload_size;
-    std::array<u8, 37> adapter_payload;
-    std::array<GCPadStatus, 4> pads;
+    constexpr u8 axis_threshold = 50;
+    GCPadStatus pad_status = {.port = port};

-    while (adapter_thread_running) {
-        libusb_interrupt_transfer(usb_adapter_handle, input_endpoint, adapter_payload.data(),
-                                  sizeof(adapter_payload), &payload_size, 16);
+    if (pads[port].buttons != 0) {
+        pad_status.button = pads[port].last_button;
+        pad_queue.Push(pad_status);
+    }

-        if (payload_size != sizeof(adapter_payload) || adapter_payload[0] != LIBUSB_DT_HID) {
-            LOG_ERROR(Input,
-                      "Error reading payload (size: {}, type: {:02x}) Is the adapter connected?",
-                      payload_size, adapter_payload[0]);
-            adapter_thread_running = false; // error reading from adapter, stop reading.
-            break;
+    // Accounting for a threshold here to ensure an intentional press
+    for (std::size_t i = 0; i < pads[port].axis_values.size(); ++i) {
+        const s16 value = pads[port].axis_values[i];
+
+        if (value > axis_threshold || value < -axis_threshold) {
+            pad_status.axis = static_cast<PadAxes>(i);
+            pad_status.axis_value = value;
+            pad_status.axis_threshold = axis_threshold;
+            pad_queue.Push(pad_status);
        }
-        for (std::size_t port = 0; port < pads.size(); ++port) {
-            pads[port] = GetPadStatus(port, adapter_payload);
-            if (DeviceConnected(port) && configuring) {
-                if (pads[port].button != 0) {
-                    pad_queue[port].Push(pads[port]);
-                }
+    }
+}

-                // Accounting for a threshold here to ensure an intentional press
-                for (size_t i = 0; i < pads[port].axis_values.size(); ++i) {
-                    const u8 value = pads[port].axis_values[i];
-                    const u8 origin = origin_status[port].axis_values[i];
+void Adapter::UpdateVibrations() {
+    // Use 8 states to keep the switching between on/off fast enough for
+    // a human to not notice the difference between switching from on/off
+    // More states = more rumble strengths = slower update time
+    constexpr u8 vibration_states = 8;

-                    if (value > origin + pads[port].THRESHOLD ||
-                        value < origin - pads[port].THRESHOLD) {
-                        pads[port].axis = static_cast<PadAxes>(i);
-                        pads[port].axis_value = pads[port].axis_values[i];
-                        pad_queue[port].Push(pads[port]);
-                    }
-                }
-            }
-            PadToState(pads[port], state[port]);
+    vibration_counter = (vibration_counter + 1) % vibration_states;
+
+    for (GCController& pad : pads) {
+        const bool vibrate = pad.rumble_amplitude > vibration_counter;
+        vibration_changed |= vibrate != pad.enable_vibration;
+        pad.enable_vibration = vibrate;
+    }
+    SendVibrations();
+}
+
+void Adapter::SendVibrations() {
+    if (!rumble_enabled || !vibration_changed) {
+        return;
+    }
+    s32 size{};
+    constexpr u8 rumble_command = 0x11;
+    const u8 p1 = pads[0].enable_vibration;
+    const u8 p2 = pads[1].enable_vibration;
+    const u8 p3 = pads[2].enable_vibration;
+    const u8 p4 = pads[3].enable_vibration;
+    std::array<u8, 5> payload = {rumble_command, p1, p2, p3, p4};
+    const int err = libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, payload.data(),
+                                              static_cast<s32>(payload.size()), &size, 16);
+    if (err) {
+        LOG_DEBUG(Input, "Adapter libusb write failed: {}", libusb_error_name(err));
+        if (output_error_counter++ > 5) {
+            LOG_ERROR(Input, "GC adapter output timeout, Rumble disabled");
+            rumble_enabled = false;
        }
-        std::this_thread::yield();
+        return;
+    }
+    output_error_counter = 0;
+    vibration_changed = false;
+}
+
+bool Adapter::RumblePlay(std::size_t port, f32 amplitude) {
+    amplitude = std::clamp(amplitude, 0.0f, 1.0f);
+    const auto raw_amp = static_cast<u8>(amplitude * 0x8);
+    pads[port].rumble_amplitude = raw_amp;
+
+    return rumble_enabled;
+}
+
+void Adapter::AdapterScanThread() {
+    adapter_scan_thread_running = true;
+    adapter_input_thread_running = false;
+    if (adapter_input_thread.joinable()) {
+        adapter_input_thread.join();
+    }
+    ClearLibusbHandle();
+    ResetDevices();
+    while (adapter_scan_thread_running && !adapter_input_thread_running) {
+        Setup();
+        std::this_thread::sleep_for(std::chrono::seconds(1));
    }
 }

 void Adapter::Setup() {
-    // Initialize all controllers as unplugged
-    adapter_controllers_status.fill(ControllerTypes::None);
-    // Initialize all ports to store axis origin values
-    get_origin.fill(true);
+    usb_adapter_handle = libusb_open_device_with_vid_pid(libusb_ctx, 0x057e, 0x0337);

-    // pointer to list of connected usb devices
-    libusb_device** devices{};
-
-    // populate the list of devices, get the count
-    const ssize_t device_count = libusb_get_device_list(libusb_ctx, &devices);
-    if (device_count < 0) {
-        LOG_ERROR(Input, "libusb_get_device_list failed with error: {}", device_count);
+    if (usb_adapter_handle == NULL) {
+        return;
+    }
+    if (!CheckDeviceAccess()) {
+        ClearLibusbHandle();
        return;
    }

-    if (devices != nullptr) {
-        for (std::size_t index = 0; index < static_cast<std::size_t>(device_count); ++index) {
-            if (CheckDeviceAccess(devices[index])) {
-                // GC Adapter found and accessible, registering it
-                GetGCEndpoint(devices[index]);
-                break;
-            }
-        }
-        libusb_free_device_list(devices, 1);
+    libusb_device* device = libusb_get_device(usb_adapter_handle);
+
+    LOG_INFO(Input, "GC adapter is now connected");
+    // GC Adapter found and accessible, registering it
+    if (GetGCEndpoint(device)) {
+        adapter_scan_thread_running = false;
+        adapter_input_thread_running = true;
+        rumble_enabled = true;
+        input_error_counter = 0;
+        output_error_counter = 0;
+        adapter_input_thread = std::thread(&Adapter::AdapterInputThread, this);
    }
 }

-bool Adapter::CheckDeviceAccess(libusb_device* device) {
-    libusb_device_descriptor desc;
-    const int get_descriptor_error = libusb_get_device_descriptor(device, &desc);
-    if (get_descriptor_error) {
-        // could not acquire the descriptor, no point in trying to use it.
-        LOG_ERROR(Input, "libusb_get_device_descriptor failed with error: {}",
-                  get_descriptor_error);
-        return false;
+bool Adapter::CheckDeviceAccess() {
+    // This fixes payload problems from offbrand GCAdapters
+    const s32 control_transfer_error =
+        libusb_control_transfer(usb_adapter_handle, 0x21, 11, 0x0001, 0, nullptr, 0, 1000);
+    if (control_transfer_error < 0) {
+        LOG_ERROR(Input, "libusb_control_transfer failed with error= {}", control_transfer_error);
    }

-    if (desc.idVendor != 0x057e || desc.idProduct != 0x0337) {
-        // This isn't the device we are looking for.
-        return false;
-    }
-    const int open_error = libusb_open(device, &usb_adapter_handle);
-
-    if (open_error == LIBUSB_ERROR_ACCESS) {
-        LOG_ERROR(Input, "Yuzu can not gain access to this device: ID {:04X}:{:04X}.",
-                  desc.idVendor, desc.idProduct);
-        return false;
-    }
-    if (open_error) {
-        LOG_ERROR(Input, "libusb_open failed to open device with error = {}", open_error);
-        return false;
-    }
-
-    int kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
+    s32 kernel_driver_error = libusb_kernel_driver_active(usb_adapter_handle, 0);
    if (kernel_driver_error == 1) {
        kernel_driver_error = libusb_detach_kernel_driver(usb_adapter_handle, 0);
        if (kernel_driver_error != 0 && kernel_driver_error != LIBUSB_ERROR_NOT_SUPPORTED) {
@@ -236,13 +311,13 @@ bool Adapter::CheckDeviceAccess(libusb_device* device) {
    return true;
 }

-void Adapter::GetGCEndpoint(libusb_device* device) {
+bool Adapter::GetGCEndpoint(libusb_device* device) {
    libusb_config_descriptor* config = nullptr;
    const int config_descriptor_return = libusb_get_config_descriptor(device, 0, &config);
    if (config_descriptor_return != LIBUSB_SUCCESS) {
        LOG_ERROR(Input, "libusb_get_config_descriptor failed with error = {}",
                  config_descriptor_return);
-        return;
+        return false;
    }

    for (u8 ic = 0; ic < config->bNumInterfaces; ic++) {
@@ -264,31 +339,51 @@ void Adapter::GetGCEndpoint(libusb_device* device) {
    unsigned char clear_payload = 0x13;
    libusb_interrupt_transfer(usb_adapter_handle, output_endpoint, &clear_payload,
                              sizeof(clear_payload), nullptr, 16);
-
-    adapter_thread_running = true;
-    adapter_input_thread = std::thread(&Adapter::Read, this);
+    return true;
 }

-Adapter::~Adapter() {
-    Reset();
-}
+void Adapter::JoinThreads() {
+    restart_scan_thread = false;
+    adapter_input_thread_running = false;
+    adapter_scan_thread_running = false;

-void Adapter::Reset() {
-    if (adapter_thread_running) {
-        adapter_thread_running = false;
+    if (adapter_scan_thread.joinable()) {
+        adapter_scan_thread.join();
    }
+
    if (adapter_input_thread.joinable()) {
        adapter_input_thread.join();
    }
+}

-    adapter_controllers_status.fill(ControllerTypes::None);
-    get_origin.fill(true);
-
+void Adapter::ClearLibusbHandle() {
    if (usb_adapter_handle) {
        libusb_release_interface(usb_adapter_handle, 1);
        libusb_close(usb_adapter_handle);
        usb_adapter_handle = nullptr;
    }
+}
+
+void Adapter::ResetDevices() {
+    for (std::size_t i = 0; i < pads.size(); ++i) {
+        ResetDevice(i);
+    }
+}
+
+void Adapter::ResetDevice(std::size_t port) {
+    pads[port].type = ControllerTypes::None;
+    pads[port].enable_vibration = false;
+    pads[port].rumble_amplitude = 0;
+    pads[port].buttons = 0;
+    pads[port].last_button = PadButton::Undefined;
+    pads[port].axis_values.fill(0);
+    pads[port].axis_origin.fill(255);
+}
+
+void Adapter::Reset() {
+    JoinThreads();
+    ClearLibusbHandle();
+    ResetDevices();

    if (libusb_ctx) {
        libusb_exit(libusb_ctx);
@@ -297,11 +392,11 @@ void Adapter::Reset() {

 std::vector<Common::ParamPackage> Adapter::GetInputDevices() const {
    std::vector<Common::ParamPackage> devices;
-    for (std::size_t port = 0; port < state.size(); ++port) {
+    for (std::size_t port = 0; port < pads.size(); ++port) {
        if (!DeviceConnected(port)) {
            continue;
        }
-        std::string name = fmt::format("Gamecube Controller {}", port);
+        std::string name = fmt::format("Gamecube Controller {}", port + 1);
        devices.emplace_back(Common::ParamPackage{
            {"class", "gcpad"},
            {"display", std::move(name)},
@@ -318,18 +413,18 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
    // This list also excludes any button that can't be really mapped
    static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 12>
        switch_to_gcadapter_button = {
-            std::pair{Settings::NativeButton::A, PadButton::PAD_BUTTON_A},
-            {Settings::NativeButton::B, PadButton::PAD_BUTTON_B},
-            {Settings::NativeButton::X, PadButton::PAD_BUTTON_X},
-            {Settings::NativeButton::Y, PadButton::PAD_BUTTON_Y},
-            {Settings::NativeButton::Plus, PadButton::PAD_BUTTON_START},
-            {Settings::NativeButton::DLeft, PadButton::PAD_BUTTON_LEFT},
-            {Settings::NativeButton::DUp, PadButton::PAD_BUTTON_UP},
-            {Settings::NativeButton::DRight, PadButton::PAD_BUTTON_RIGHT},
-            {Settings::NativeButton::DDown, PadButton::PAD_BUTTON_DOWN},
-            {Settings::NativeButton::SL, PadButton::PAD_TRIGGER_L},
-            {Settings::NativeButton::SR, PadButton::PAD_TRIGGER_R},
-            {Settings::NativeButton::R, PadButton::PAD_TRIGGER_Z},
+            std::pair{Settings::NativeButton::A, PadButton::ButtonA},
+            {Settings::NativeButton::B, PadButton::ButtonB},
+            {Settings::NativeButton::X, PadButton::ButtonX},
+            {Settings::NativeButton::Y, PadButton::ButtonY},
+            {Settings::NativeButton::Plus, PadButton::ButtonStart},
+            {Settings::NativeButton::DLeft, PadButton::ButtonLeft},
+            {Settings::NativeButton::DUp, PadButton::ButtonUp},
+            {Settings::NativeButton::DRight, PadButton::ButtonRight},
+            {Settings::NativeButton::DDown, PadButton::ButtonDown},
+            {Settings::NativeButton::SL, PadButton::TriggerL},
+            {Settings::NativeButton::SR, PadButton::TriggerR},
+            {Settings::NativeButton::R, PadButton::TriggerZ},
        };
    if (!params.Has("port")) {
        return {};
@@ -352,8 +447,10 @@ InputCommon::ButtonMapping Adapter::GetButtonMappingForDevice(
    for (const auto& [switch_button, gcadapter_axis] : switch_to_gcadapter_axis) {
        Common::ParamPackage button_params({{"engine", "gcpad"}});
        button_params.Set("port", params.Get("port", 0));
-        button_params.Set("button", static_cast<int>(PadButton::PAD_STICK));
-        button_params.Set("axis", static_cast<int>(gcadapter_axis));
+        button_params.Set("button", static_cast<s32>(PadButton::Stick));
+        button_params.Set("axis", static_cast<s32>(gcadapter_axis));
+        button_params.Set("threshold", 0.5f);
+        button_params.Set("direction", "+");
        mapping.insert_or_assign(switch_button, std::move(button_params));
    }
    return mapping;
@@ -382,46 +479,33 @@ InputCommon::AnalogMapping Adapter::GetAnalogMappingForDevice(
 }

 bool Adapter::DeviceConnected(std::size_t port) const {
-    return adapter_controllers_status[port] != ControllerTypes::None;
-}
-
-void Adapter::ResetDeviceType(std::size_t port) {
-    adapter_controllers_status[port] = ControllerTypes::None;
+    return pads[port].type != ControllerTypes::None;
 }

 void Adapter::BeginConfiguration() {
-    get_origin.fill(true);
-    for (auto& pq : pad_queue) {
-        pq.Clear();
-    }
+    pad_queue.Clear();
    configuring = true;
 }

 void Adapter::EndConfiguration() {
-    for (auto& pq : pad_queue) {
-        pq.Clear();
-    }
+    pad_queue.Clear();
    configuring = false;
 }

-std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() {
+Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() {
    return pad_queue;
 }

-const std::array<Common::SPSCQueue<GCPadStatus>, 4>& Adapter::GetPadQueue() const {
+const Common::SPSCQueue<GCPadStatus>& Adapter::GetPadQueue() const {
    return pad_queue;
 }

-std::array<GCState, 4>& Adapter::GetPadState() {
-    return state;
+GCController& Adapter::GetPadState(std::size_t port) {
+    return pads.at(port);
 }

-const std::array<GCState, 4>& Adapter::GetPadState() const {
-    return state;
-}
-
-int Adapter::GetOriginValue(u32 port, u32 axis) const {
-    return origin_status[port].axis_values[axis];
+const GCController& Adapter::GetPadState(std::size_t port) const {
+    return pads.at(port);
 }

 } // namespace GCAdapter
--- a/src/input_common/gcadapter/gc_adapter.h
+++ b/src/input_common/gcadapter/gc_adapter.h
@@ -19,24 +19,23 @@ struct libusb_device_handle;
 namespace GCAdapter {

 enum class PadButton {
-    PAD_BUTTON_LEFT = 0x0001,
-    PAD_BUTTON_RIGHT = 0x0002,
-    PAD_BUTTON_DOWN = 0x0004,
-    PAD_BUTTON_UP = 0x0008,
-    PAD_TRIGGER_Z = 0x0010,
-    PAD_TRIGGER_R = 0x0020,
-    PAD_TRIGGER_L = 0x0040,
-    PAD_BUTTON_A = 0x0100,
-    PAD_BUTTON_B = 0x0200,
-    PAD_BUTTON_X = 0x0400,
-    PAD_BUTTON_Y = 0x0800,
-    PAD_BUTTON_START = 0x1000,
+    Undefined = 0x0000,
+    ButtonLeft = 0x0001,
+    ButtonRight = 0x0002,
+    ButtonDown = 0x0004,
+    ButtonUp = 0x0008,
+    TriggerZ = 0x0010,
+    TriggerR = 0x0020,
+    TriggerL = 0x0040,
+    ButtonA = 0x0100,
+    ButtonB = 0x0200,
+    ButtonX = 0x0400,
+    ButtonY = 0x0800,
+    ButtonStart = 0x1000,
    // Below is for compatibility with "AxisButton" type
-    PAD_STICK = 0x2000,
+    Stick = 0x2000,
 };

-extern const std::array<PadButton, 12> PadButtonArray;
-
 enum class PadAxes : u8 {
    StickX,
    StickY,
@@ -47,87 +46,122 @@ enum class PadAxes : u8 {
    Undefined,
 };

+enum class ControllerTypes {
+    None,
+    Wired,
+    Wireless,
+};
+
 struct GCPadStatus {
-    u16 button{}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits
+    std::size_t port{};

-    std::array<u8, 6> axis_values{};    // Triggers and sticks, following indices defined in PadAxes
-    static constexpr u8 THRESHOLD = 50; // Threshold for axis press for polling
+    PadButton button{PadButton::Undefined}; // Or-ed PAD_BUTTON_* and PAD_TRIGGER_* bits

-    u8 port{};
    PadAxes axis{PadAxes::Undefined};
-    u8 axis_value{255};
+    s16 axis_value{};
+    u8 axis_threshold{50};
 };

-struct GCState {
-    std::unordered_map<int, bool> buttons;
-    std::unordered_map<u32, u16> axes;
+struct GCController {
+    ControllerTypes type{};
+    bool enable_vibration{};
+    u8 rumble_amplitude{};
+    u16 buttons{};
+    PadButton last_button{};
+    std::array<s16, 6> axis_values{};
+    std::array<u8, 6> axis_origin{};
 };

-enum class ControllerTypes { None, Wired, Wireless };
-
 class Adapter {
 public:
-    /// Initialize the GC Adapter capture and read sequence
    Adapter();
-
-    /// Close the adapter read thread and release the adapter
    ~Adapter();
+
+    /// Request a vibration for a controlelr
+    bool RumblePlay(std::size_t port, f32 amplitude);
+
    /// Used for polling
    void BeginConfiguration();
    void EndConfiguration();

-    std::vector<Common::ParamPackage> GetInputDevices() const;
-    InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const;
-    InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const;
+    Common::SPSCQueue<GCPadStatus>& GetPadQueue();
+    const Common::SPSCQueue<GCPadStatus>& GetPadQueue() const;
+
+    GCController& GetPadState(std::size_t port);
+    const GCController& GetPadState(std::size_t port) const;

    /// Returns true if there is a device connected to port
    bool DeviceConnected(std::size_t port) const;

-    std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue();
-    const std::array<Common::SPSCQueue<GCPadStatus>, 4>& GetPadQueue() const;
-
-    std::array<GCState, 4>& GetPadState();
-    const std::array<GCState, 4>& GetPadState() const;
-
-    int GetOriginValue(u32 port, u32 axis) const;
+    /// Used for automapping features
+    std::vector<Common::ParamPackage> GetInputDevices() const;
+    InputCommon::ButtonMapping GetButtonMappingForDevice(const Common::ParamPackage& params) const;
+    InputCommon::AnalogMapping GetAnalogMappingForDevice(const Common::ParamPackage& params) const;

 private:
-    GCPadStatus GetPadStatus(std::size_t port, const std::array<u8, 37>& adapter_payload);
+    using AdapterPayload = std::array<u8, 37>;

-    void Read();
+    void UpdatePadType(std::size_t port, ControllerTypes pad_type);
+    void UpdateControllers(const AdapterPayload& adapter_payload);
+    void UpdateYuzuSettings(std::size_t port);
+    void UpdateStateButtons(std::size_t port, u8 b1, u8 b2);
+    void UpdateStateAxes(std::size_t port, const AdapterPayload& adapter_payload);
+    void UpdateVibrations();

-    /// Resets status of device connected to port
-    void ResetDeviceType(std::size_t port);
+    void AdapterInputThread();

-    /// Returns true if we successfully gain access to GC Adapter
-    bool CheckDeviceAccess(libusb_device* device);
+    void AdapterScanThread();

-    /// Captures GC Adapter endpoint address,
-    void GetGCEndpoint(libusb_device* device);
+    bool IsPayloadCorrect(const AdapterPayload& adapter_payload, s32 payload_size);

-    /// For shutting down, clear all data, join all threads, release usb
-    void Reset();
+    // Updates vibration state of all controllers
+    void SendVibrations();

    /// For use in initialization, querying devices to find the adapter
    void Setup();

+    /// Resets status of all GC controller devices to a disconected state
+    void ResetDevices();
+
+    /// Resets status of device connected to a disconected state
+    void ResetDevice(std::size_t port);
+
+    /// Returns true if we successfully gain access to GC Adapter
+    bool CheckDeviceAccess();
+
+    /// Captures GC Adapter endpoint address
+    /// Returns true if the endpoind was set correctly
+    bool GetGCEndpoint(libusb_device* device);
+
+    /// For shutting down, clear all data, join all threads, release usb
+    void Reset();
+
+    // Join all threads
+    void JoinThreads();
+
+    // Release usb handles
+    void ClearLibusbHandle();
+
    libusb_device_handle* usb_adapter_handle = nullptr;
+    std::array<GCController, 4> pads;
+    Common::SPSCQueue<GCPadStatus> pad_queue;

    std::thread adapter_input_thread;
-    bool adapter_thread_running;
+    std::thread adapter_scan_thread;
+    bool adapter_input_thread_running;
+    bool adapter_scan_thread_running;
+    bool restart_scan_thread;

    libusb_context* libusb_ctx;

-    u8 input_endpoint = 0;
-    u8 output_endpoint = 0;
+    u8 input_endpoint{0};
+    u8 output_endpoint{0};
+    u8 input_error_counter{0};
+    u8 output_error_counter{0};
+    int vibration_counter{0};

-    bool configuring = false;
-
-    std::array<GCState, 4> state;
-    std::array<bool, 4> get_origin;
-    std::array<GCPadStatus, 4> origin_status;
-    std::array<Common::SPSCQueue<GCPadStatus>, 4> pad_queue;
-    std::array<ControllerTypes, 4> adapter_controllers_status{};
+    bool configuring{false};
+    bool rumble_enabled{true};
+    bool vibration_changed{true};
 };
-
 } // namespace GCAdapter
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -15,22 +15,30 @@ namespace InputCommon {

 class GCButton final : public Input::ButtonDevice {
 public:
-    explicit GCButton(u32 port_, int button_, const GCAdapter::Adapter* adapter)
+    explicit GCButton(u32 port_, s32 button_, GCAdapter::Adapter* adapter)
        : port(port_), button(button_), gcadapter(adapter) {}

    ~GCButton() override;

    bool GetStatus() const override {
        if (gcadapter->DeviceConnected(port)) {
-            return gcadapter->GetPadState()[port].buttons.at(button);
+            return (gcadapter->GetPadState(port).buttons & button) != 0;
        }
        return false;
    }

+    bool SetRumblePlay(f32 amp_high, f32 amp_low, f32 freq_high, f32 freq_low) const override {
+        const float amplitude = amp_high + amp_low > 2.0f ? 1.0f : (amp_high + amp_low) * 0.5f;
+        const auto new_amp =
+            static_cast<f32>(pow(amplitude, 0.5f) * (3.0f - 2.0f * pow(amplitude, 0.15f)));
+
+        return gcadapter->RumblePlay(port, new_amp);
+    }
+
 private:
    const u32 port;
-    const int button;
-    const GCAdapter::Adapter* gcadapter;
+    const s32 button;
+    GCAdapter::Adapter* gcadapter;
 };

 class GCAxisButton final : public Input::ButtonDevice {
@@ -38,13 +46,12 @@ public:
    explicit GCAxisButton(u32 port_, u32 axis_, float threshold_, bool trigger_if_greater_,
                          const GCAdapter::Adapter* adapter)
        : port(port_), axis(axis_), threshold(threshold_), trigger_if_greater(trigger_if_greater_),
-          gcadapter(adapter),
-          origin_value(static_cast<float>(adapter->GetOriginValue(port_, axis_))) {}
+          gcadapter(adapter) {}

    bool GetStatus() const override {
        if (gcadapter->DeviceConnected(port)) {
-            const float current_axis_value = gcadapter->GetPadState()[port].axes.at(axis);
-            const float axis_value = (current_axis_value - origin_value) / 128.0f;
+            const float current_axis_value = gcadapter->GetPadState(port).axis_values.at(axis);
+            const float axis_value = current_axis_value / 128.0f;
            if (trigger_if_greater) {
                // TODO: Might be worthwile to set a slider for the trigger threshold. It is
                // currently always set to 0.5 in configure_input_player.cpp ZL/ZR HandleClick
@@ -61,7 +68,6 @@ private:
    float threshold;
    bool trigger_if_greater;
    const GCAdapter::Adapter* gcadapter;
-    const float origin_value;
 };

 GCButtonFactory::GCButtonFactory(std::shared_ptr<GCAdapter::Adapter> adapter_)
@@ -73,7 +79,7 @@ std::unique_ptr<Input::ButtonDevice> GCButtonFactory::Create(const Common::Param
    const auto button_id = params.Get("button", 0);
    const auto port = static_cast<u32>(params.Get("port", 0));

-    constexpr int PAD_STICK_ID = static_cast<u16>(GCAdapter::PadButton::PAD_STICK);
+    constexpr s32 PAD_STICK_ID = static_cast<s32>(GCAdapter::PadButton::Stick);

    // button is not an axis/stick button
    if (button_id != PAD_STICK_ID) {
@@ -106,32 +112,25 @@ Common::ParamPackage GCButtonFactory::GetNextInput() const {
    Common::ParamPackage params;
    GCAdapter::GCPadStatus pad;
    auto& queue = adapter->GetPadQueue();
-    for (std::size_t port = 0; port < queue.size(); ++port) {
-        while (queue[port].Pop(pad)) {
-            // This while loop will break on the earliest detected button
-            params.Set("engine", "gcpad");
-            params.Set("port", static_cast<int>(port));
-            for (const auto& button : GCAdapter::PadButtonArray) {
-                const u16 button_value = static_cast<u16>(button);
-                if (pad.button & button_value) {
-                    params.Set("button", button_value);
-                    break;
-                }
-            }
+    while (queue.Pop(pad)) {
+        // This while loop will break on the earliest detected button
+        params.Set("engine", "gcpad");
+        params.Set("port", static_cast<s32>(pad.port));
+        if (pad.button != GCAdapter::PadButton::Undefined) {
+            params.Set("button", static_cast<u16>(pad.button));
+        }

-            // For Axis button implementation
-            if (pad.axis != GCAdapter::PadAxes::Undefined) {
-                params.Set("axis", static_cast<u8>(pad.axis));
-                params.Set("button", static_cast<u16>(GCAdapter::PadButton::PAD_STICK));
-                if (pad.axis_value > 128) {
-                    params.Set("direction", "+");
-                    params.Set("threshold", "0.25");
-                } else {
-                    params.Set("direction", "-");
-                    params.Set("threshold", "-0.25");
-                }
-                break;
+        // For Axis button implementation
+        if (pad.axis != GCAdapter::PadAxes::Undefined) {
+            params.Set("axis", static_cast<u8>(pad.axis));
+            params.Set("button", static_cast<u16>(GCAdapter::PadButton::Stick));
+            params.Set("threshold", "0.25");
+            if (pad.axis_value > 0) {
+                params.Set("direction", "+");
+            } else {
+                params.Set("direction", "-");
            }
+            break;
        }
    }
    return params;
@@ -152,17 +151,14 @@ public:
    explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_,
                      const GCAdapter::Adapter* adapter, float range_)
        : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter),
-          origin_value_x(static_cast<float>(adapter->GetOriginValue(port_, axis_x_))),
-          origin_value_y(static_cast<float>(adapter->GetOriginValue(port_, axis_y_))),
          range(range_) {}

    float GetAxis(u32 axis) const {
        if (gcadapter->DeviceConnected(port)) {
            std::lock_guard lock{mutex};
-            const auto origin_value = axis % 2 == 0 ? origin_value_x : origin_value_y;
            const auto axis_value =
-                static_cast<float>(gcadapter->GetPadState()[port].axes.at(axis));
-            return (axis_value - origin_value) / (100.0f * range);
+                static_cast<float>(gcadapter->GetPadState(port).axis_values.at(axis));
+            return (axis_value) / (100.0f * range);
        }
        return 0.0f;
    }
@@ -215,8 +211,6 @@ private:
    const u32 axis_y;
    const float deadzone;
    const GCAdapter::Adapter* gcadapter;
-    const float origin_value_x;
-    const float origin_value_y;
    const float range;
    mutable std::mutex mutex;
 };
@@ -254,26 +248,44 @@ void GCAnalogFactory::EndConfiguration() {

 Common::ParamPackage GCAnalogFactory::GetNextInput() {
    GCAdapter::GCPadStatus pad;
+    Common::ParamPackage params;
    auto& queue = adapter->GetPadQueue();
-    for (std::size_t port = 0; port < queue.size(); ++port) {
-        while (queue[port].Pop(pad)) {
-            if (pad.axis == GCAdapter::PadAxes::Undefined ||
-                std::abs((static_cast<float>(pad.axis_value) - 128.0f) / 128.0f) < 0.1f) {
-                continue;
-            }
-            // An analog device needs two axes, so we need to store the axis for later and wait for
-            // a second input event. The axes also must be from the same joystick.
-            const u8 axis = static_cast<u8>(pad.axis);
-            if (analog_x_axis == -1) {
-                analog_x_axis = axis;
-                controller_number = static_cast<int>(port);
-            } else if (analog_y_axis == -1 && analog_x_axis != axis &&
-                       controller_number == static_cast<int>(port)) {
-                analog_y_axis = axis;
-            }
+    while (queue.Pop(pad)) {
+        if (pad.button != GCAdapter::PadButton::Undefined) {
+            params.Set("engine", "gcpad");
+            params.Set("port", static_cast<s32>(pad.port));
+            params.Set("button", static_cast<u16>(pad.button));
+            return params;
+        }
+        if (pad.axis == GCAdapter::PadAxes::Undefined ||
+            std::abs(static_cast<float>(pad.axis_value) / 128.0f) < 0.1f) {
+            continue;
+        }
+        // An analog device needs two axes, so we need to store the axis for later and wait for
+        // a second input event. The axes also must be from the same joystick.
+        const u8 axis = static_cast<u8>(pad.axis);
+        if (axis == 0 || axis == 1) {
+            analog_x_axis = 0;
+            analog_y_axis = 1;
+            controller_number = static_cast<s32>(pad.port);
+            break;
+        }
+        if (axis == 2 || axis == 3) {
+            analog_x_axis = 2;
+            analog_y_axis = 3;
+            controller_number = static_cast<s32>(pad.port);
+            break;
+        }
+
+        if (analog_x_axis == -1) {
+            analog_x_axis = axis;
+            controller_number = static_cast<s32>(pad.port);
+        } else if (analog_y_axis == -1 && analog_x_axis != axis &&
+                   controller_number == static_cast<s32>(pad.port)) {
+            analog_y_axis = axis;
+            break;
        }
    }
-    Common::ParamPackage params;
    if (analog_x_axis != -1 && analog_y_axis != -1) {
        params.Set("engine", "gcpad");
        params.Set("port", controller_number);
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -6,18 +6,40 @@
 #include <cstdlib>
 #include <functional>
 #include <memory>
+#include <mutex>
+#include <stdexcept>
 #include <thread>
 #include <unordered_map>
 #include <vector>

 #include <catch2/catch.hpp>
-#include <math.h>
+
 #include "common/common_types.h"
 #include "common/fiber.h"
-#include "common/spin_lock.h"

 namespace Common {

+class ThreadIds {
+public:
+    void Register(u32 id) {
+        const auto thread_id = std::this_thread::get_id();
+        std::scoped_lock lock{mutex};
+        if (ids.contains(thread_id)) {
+            throw std::logic_error{"Registering the same thread twice"};
+        }
+        ids.emplace(thread_id, id);
+    }
+
+    [[nodiscard]] u32 Get() const {
+        std::scoped_lock lock{mutex};
+        return ids.at(std::this_thread::get_id());
+    }
+
+private:
+    mutable std::mutex mutex;
+    std::unordered_map<std::thread::id, u32> ids;
+};
+
 class TestControl1 {
 public:
    TestControl1() = default;
@@ -26,7 +48,7 @@ public:

    void ExecuteThread(u32 id);

-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
    std::vector<u32> items;
@@ -39,8 +61,7 @@ static void WorkControl1(void* control) {
 }

 void TestControl1::DoWork() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    u32 value = items[id];
    for (u32 i = 0; i < id; i++) {
        value++;
@@ -50,8 +71,7 @@ void TestControl1::DoWork() {
 }

 void TestControl1::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
    work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
@@ -98,8 +118,7 @@ public:
            value1 += i;
        }
        Fiber::YieldTo(fiber1, fiber3);
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        assert1 = id == 1;
        value2 += 5000;
        Fiber::YieldTo(fiber1, thread_fibers[id]);
@@ -115,8 +134,7 @@ public:
    }

    void DoWork3() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        assert2 = id == 0;
        value1 += 1000;
        Fiber::YieldTo(fiber3, thread_fibers[id]);
@@ -125,14 +143,12 @@ public:
    void ExecuteThread(u32 id);

    void CallFiber1() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber1);
    }

    void CallFiber2() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber2);
    }

@@ -145,7 +161,7 @@ public:
    u32 value2{};
    std::atomic<bool> trap{true};
    std::atomic<bool> trap2{true};
-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::shared_ptr<Common::Fiber> fiber1;
    std::shared_ptr<Common::Fiber> fiber2;
@@ -168,15 +184,13 @@ static void WorkControl2_3(void* control) {
 }

 void TestControl2::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
 }

 void TestControl2::Exit() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    thread_fibers[id]->Exit();
 }

@@ -228,24 +242,21 @@ public:
    void DoWork1() {
        value1 += 1;
        Fiber::YieldTo(fiber1, fiber2);
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        value3 += 1;
        Fiber::YieldTo(fiber1, thread_fibers[id]);
    }

    void DoWork2() {
        value2 += 1;
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(fiber2, thread_fibers[id]);
    }

    void ExecuteThread(u32 id);

    void CallFiber1() {
-        std::thread::id this_id = std::this_thread::get_id();
-        u32 id = ids[this_id];
+        const u32 id = thread_ids.Get();
        Fiber::YieldTo(thread_fibers[id], fiber1);
    }

@@ -254,7 +265,7 @@ public:
    u32 value1{};
    u32 value2{};
    u32 value3{};
-    std::unordered_map<std::thread::id, u32> ids;
+    ThreadIds thread_ids;
    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
    std::shared_ptr<Common::Fiber> fiber1;
    std::shared_ptr<Common::Fiber> fiber2;
@@ -271,15 +282,13 @@ static void WorkControl3_2(void* control) {
 }

 void TestControl3::ExecuteThread(u32 id) {
-    std::thread::id this_id = std::this_thread::get_id();
-    ids[this_id] = id;
+    thread_ids.Register(id);
    auto thread_fiber = Fiber::ThreadToFiber();
    thread_fibers[id] = thread_fiber;
 }

 void TestControl3::Exit() {
-    std::thread::id this_id = std::this_thread::get_id();
-    u32 id = ids[this_id];
+    const u32 id = thread_ids.Get();
    thread_fibers[id]->Exit();
 }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,24 @@ add_library(video_core STATIC
    buffer_cache/buffer_cache.h
    buffer_cache/map_interval.cpp
    buffer_cache/map_interval.h
+    cdma_pusher.cpp
+    cdma_pusher.h
+    command_classes/codecs/codec.cpp
+    command_classes/codecs/codec.h
+    command_classes/codecs/h264.cpp
+    command_classes/codecs/h264.h
+    command_classes/codecs/vp9.cpp
+    command_classes/codecs/vp9.h
+    command_classes/codecs/vp9_types.h
+    command_classes/host1x.cpp
+    command_classes/host1x.h
+    command_classes/nvdec.cpp
+    command_classes/nvdec.h
+    command_classes/nvdec_common.h
+    command_classes/sync_manager.cpp
+    command_classes/sync_manager.h
+    command_classes/vic.cpp
+    command_classes/vic.h
    compatible_formats.cpp
    compatible_formats.h
    dirty_flags.cpp
@@ -250,6 +268,14 @@ create_target_directory_groups(video_core)
 target_link_libraries(video_core PUBLIC common core)
 target_link_libraries(video_core PRIVATE glad xbyak)

+if (MSVC)
+    target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
+    target_link_libraries(video_core PUBLIC ${FFMPEG_LIBRARY_DIR}/swscale.lib ${FFMPEG_LIBRARY_DIR}/avcodec.lib ${FFMPEG_LIBRARY_DIR}/avutil.lib)
+else()
+    target_include_directories(video_core PRIVATE ${FFMPEG_INCLUDE_DIR})
+    target_link_libraries(video_core PRIVATE ${FFMPEG_LIBRARIES})
+endif()
+
 add_dependencies(video_core host_shaders)
 target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})

@@ -276,7 +302,10 @@ else()
    target_compile_options(video_core PRIVATE
        -Werror=conversion
        -Wno-error=sign-conversion
+        -Werror=pessimizing-move
+        -Werror=redundant-move
        -Werror=switch
+        -Werror=type-limits
        -Werror=unused-variable

        $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess>
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -0,0 +1,171 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include "command_classes/host1x.h"
+#include "command_classes/nvdec.h"
+#include "command_classes/vic.h"
+#include "common/bit_util.h"
+#include "video_core/cdma_pusher.h"
+#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+CDmaPusher::CDmaPusher(GPU& gpu)
+    : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)),
+      vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
+      host1x_processor(std::make_unique<Host1x>(gpu)),
+      nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
+      vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
+
+CDmaPusher::~CDmaPusher() = default;
+
+void CDmaPusher::Push(ChCommandHeaderList&& entries) {
+    cdma_queue.push(std::move(entries));
+}
+
+void CDmaPusher::DispatchCalls() {
+    while (!cdma_queue.empty()) {
+        Step();
+    }
+}
+
+void CDmaPusher::Step() {
+    const auto entries{cdma_queue.front()};
+    cdma_queue.pop();
+
+    std::vector<u32> values(entries.size());
+    std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
+
+    for (const u32 value : values) {
+        if (mask != 0) {
+            const u32 lbs = Common::CountTrailingZeroes32(mask);
+            mask &= ~(1U << lbs);
+            ExecuteCommand(static_cast<u32>(offset + lbs), value);
+            continue;
+        } else if (count != 0) {
+            --count;
+            ExecuteCommand(static_cast<u32>(offset), value);
+            if (incrementing) {
+                ++offset;
+            }
+            continue;
+        }
+        const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
+        switch (mode) {
+        case ChSubmissionMode::SetClass: {
+            mask = value & 0x3f;
+            offset = (value >> 16) & 0xfff;
+            current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
+            break;
+        }
+        case ChSubmissionMode::Incrementing:
+        case ChSubmissionMode::NonIncrementing:
+            count = value & 0xffff;
+            offset = (value >> 16) & 0xfff;
+            incrementing = mode == ChSubmissionMode::Incrementing;
+            break;
+        case ChSubmissionMode::Mask:
+            mask = value & 0xffff;
+            offset = (value >> 16) & 0xfff;
+            break;
+        case ChSubmissionMode::Immediate: {
+            const u32 data = value & 0xfff;
+            offset = (value >> 16) & 0xfff;
+            ExecuteCommand(static_cast<u32>(offset), data);
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
+            break;
+        }
+    }
+}
+
+void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
+    switch (current_class) {
+    case ChClassId::NvDec:
+        ThiStateWrite(nvdec_thi_state, offset, {data});
+        switch (static_cast<ThiMethod>(offset)) {
+        case ThiMethod::IncSyncpt: {
+            LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
+            const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+            const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+            if (cond == 0) {
+                nvdec_sync->Increment(syncpoint_id);
+            } else {
+                nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
+                nvdec_sync->SignalDone(syncpoint_id);
+            }
+            break;
+        }
+        case ThiMethod::SetMethod1:
+            LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
+                      static_cast<u32>(nvdec_thi_state.method_0));
+            nvdec_processor->ProcessMethod(
+                static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data});
+            break;
+        default:
+            break;
+        }
+        break;
+    case ChClassId::GraphicsVic:
+        ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data});
+        switch (static_cast<ThiMethod>(offset)) {
+        case ThiMethod::IncSyncpt: {
+            LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
+            const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+            const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+            if (cond == 0) {
+                vic_sync->Increment(syncpoint_id);
+            } else {
+                vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
+                vic_sync->SignalDone(syncpoint_id);
+            }
+            break;
+        }
+        case ThiMethod::SetMethod1:
+            LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
+                      static_cast<u32>(vic_thi_state.method_0), data);
+            vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0),
+                                         {data});
+            break;
+        default:
+            break;
+        }
+        break;
+    case ChClassId::Host1x:
+        // This device is mainly for syncpoint synchronization
+        LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
+        host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data});
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
+        break;
+    }
+}
+
+void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) {
+    u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset;
+    std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size());
+}
+
+} // namespace Tegra
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -0,0 +1,138 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <queue>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/command_classes/sync_manager.h"
+
+namespace Tegra {
+
+class GPU;
+class Nvdec;
+class Vic;
+class Host1x;
+
+enum class ChSubmissionMode : u32 {
+    SetClass = 0,
+    Incrementing = 1,
+    NonIncrementing = 2,
+    Mask = 3,
+    Immediate = 4,
+    Restart = 5,
+    Gather = 6,
+};
+
+enum class ChClassId : u32 {
+    NoClass = 0x0,
+    Host1x = 0x1,
+    VideoEncodeMpeg = 0x20,
+    VideoEncodeNvEnc = 0x21,
+    VideoStreamingVi = 0x30,
+    VideoStreamingIsp = 0x32,
+    VideoStreamingIspB = 0x34,
+    VideoStreamingViI2c = 0x36,
+    GraphicsVic = 0x5d,
+    Graphics3D = 0x60,
+    GraphicsGpu = 0x61,
+    Tsec = 0xe0,
+    TsecB = 0xe1,
+    NvJpg = 0xc0,
+    NvDec = 0xf0
+};
+
+enum class ChMethod : u32 {
+    Empty = 0,
+    SetMethod = 0x10,
+    SetData = 0x11,
+};
+
+union ChCommandHeader {
+    u32 raw;
+    BitField<0, 16, u32> value;
+    BitField<16, 12, ChMethod> method_offset;
+    BitField<28, 4, ChSubmissionMode> submission_mode;
+};
+static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
+
+struct ChCommand {
+    ChClassId class_id{};
+    int method_offset{};
+    std::vector<u32> arguments;
+};
+
+using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>;
+using ChCommandList = std::vector<Tegra::ChCommand>;
+
+struct ThiRegisters {
+    u32_le increment_syncpt{};
+    INSERT_PADDING_WORDS(1);
+    u32_le increment_syncpt_error{};
+    u32_le ctx_switch_incremement_syncpt{};
+    INSERT_PADDING_WORDS(4);
+    u32_le ctx_switch{};
+    INSERT_PADDING_WORDS(1);
+    u32_le ctx_syncpt_eof{};
+    INSERT_PADDING_WORDS(5);
+    u32_le method_0{};
+    u32_le method_1{};
+    INSERT_PADDING_WORDS(12);
+    u32_le int_status{};
+    u32_le int_mask{};
+};
+
+enum class ThiMethod : u32 {
+    IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
+    SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
+    SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
+};
+
+class CDmaPusher {
+public:
+    explicit CDmaPusher(GPU& gpu);
+    ~CDmaPusher();
+
+    /// Push NVDEC command buffer entries into queue
+    void Push(ChCommandHeaderList&& entries);
+
+    /// Process queued command buffer entries
+    void DispatchCalls();
+
+    /// Process one queue element
+    void Step();
+
+    /// Invoke command class devices to execute the command based on the current state
+    void ExecuteCommand(u32 offset, u32 data);
+
+private:
+    /// Write arguments value to the ThiRegisters member at the specified offset
+    void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments);
+
+    GPU& gpu;
+
+    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+    std::unique_ptr<Tegra::Vic> vic_processor;
+    std::unique_ptr<Tegra::Host1x> host1x_processor;
+    std::unique_ptr<SyncptIncrManager> nvdec_sync;
+    std::unique_ptr<SyncptIncrManager> vic_sync;
+    ChClassId current_class{};
+    ThiRegisters vic_thi_state{};
+    ThiRegisters nvdec_thi_state{};
+
+    s32 count{};
+    s32 offset{};
+    s32 mask{};
+    bool incrementing{};
+
+    // Queue of command lists to be processed
+    std::queue<ChCommandHeaderList> cdma_queue;
+};
+
+} // namespace Tegra
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -0,0 +1,115 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <fstream>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/command_classes/codecs/codec.h"
+#include "video_core/command_classes/codecs/h264.h"
+#include "video_core/command_classes/codecs/vp9.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+extern "C" {
+#include <libavutil/opt.h>
+}
+
+namespace Tegra {
+
+Codec::Codec(GPU& gpu_)
+    : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)),
+      vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
+
+Codec::~Codec() {
+    if (!initialized) {
+        return;
+    }
+    // Free libav memory
+    avcodec_send_packet(av_codec_ctx, nullptr);
+    avcodec_receive_frame(av_codec_ctx, av_frame);
+    avcodec_flush_buffers(av_codec_ctx);
+
+    av_frame_unref(av_frame);
+    av_free(av_frame);
+    avcodec_close(av_codec_ctx);
+}
+
+void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
+    LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
+    current_codec = codec;
+}
+
+void Codec::StateWrite(u32 offset, u64 arguments) {
+    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64);
+    std::memcpy(state_offset, &arguments, sizeof(u64));
+}
+
+void Codec::Decode() {
+    bool is_first_frame = false;
+
+    if (!initialized) {
+        if (current_codec == NvdecCommon::VideoCodec::H264) {
+            av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
+        } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
+            av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9);
+        } else {
+            LOG_ERROR(Service_NVDRV, "Unknown video codec {}", static_cast<u32>(current_codec));
+            return;
+        }
+
+        av_codec_ctx = avcodec_alloc_context3(av_codec);
+        av_frame = av_frame_alloc();
+        av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+
+        // TODO(ameerj): libavcodec gpu hw acceleration
+
+        const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
+        if (av_error < 0) {
+            LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
+            av_frame_unref(av_frame);
+            av_free(av_frame);
+            avcodec_close(av_codec_ctx);
+            return;
+        }
+        initialized = true;
+        is_first_frame = true;
+    }
+    bool vp9_hidden_frame = false;
+
+    AVPacket packet{};
+    av_init_packet(&packet);
+    std::vector<u8> frame_data;
+
+    if (current_codec == NvdecCommon::VideoCodec::H264) {
+        frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
+    } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
+        frame_data = vp9_decoder->ComposeFrameHeader(state);
+        vp9_hidden_frame = vp9_decoder->WasFrameHidden();
+    }
+
+    packet.data = frame_data.data();
+    packet.size = static_cast<int>(frame_data.size());
+
+    avcodec_send_packet(av_codec_ctx, &packet);
+
+    if (!vp9_hidden_frame) {
+        // Only receive/store visible frames
+        avcodec_receive_frame(av_codec_ctx, av_frame);
+    }
+}
+
+AVFrame* Codec::GetCurrentFrame() {
+    return av_frame;
+}
+
+const AVFrame* Codec::GetCurrentFrame() const {
+    return av_frame;
+}
+
+NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
+    return current_codec;
+}
+
+} // namespace Tegra
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -0,0 +1,66 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include "common/common_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libavcodec/avcodec.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+namespace Tegra {
+class GPU;
+struct VicRegisters;
+
+namespace Decoder {
+class H264;
+class VP9;
+} // namespace Decoder
+
+class Codec {
+public:
+    explicit Codec(GPU& gpu);
+    ~Codec();
+
+    /// Sets NVDEC video stream codec
+    void SetTargetCodec(NvdecCommon::VideoCodec codec);
+
+    /// Populate NvdecRegisters state with argument value at the provided offset
+    void StateWrite(u32 offset, u64 arguments);
+
+    /// Call decoders to construct headers, decode AVFrame with ffmpeg
+    void Decode();
+
+    /// Returns most recently decoded frame
+    [[nodiscard]] AVFrame* GetCurrentFrame();
+    [[nodiscard]] const AVFrame* GetCurrentFrame() const;
+
+    /// Returns the value of current_codec
+    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
+private:
+    bool initialized{};
+    NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
+
+    AVCodec* av_codec{nullptr};
+    AVCodecContext* av_codec_ctx{nullptr};
+    AVFrame* av_frame{nullptr};
+
+    GPU& gpu;
+    std::unique_ptr<Decoder::H264> h264_decoder;
+    std::unique_ptr<Decoder::VP9> vp9_decoder;
+
+    NvdecCommon::NvdecRegisters state{};
+};
+
+} // namespace Tegra
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -0,0 +1,293 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <array>
+#include "common/bit_util.h"
+#include "video_core/command_classes/codecs/h264.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Decoder {
+namespace {
+// ZigZag LUTs from libavcodec.
+constexpr std::array<u8, 64> zig_zag_direct{
+    0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,  12, 19, 26, 33, 40, 48,
+    41, 34, 27, 20, 13, 6,  7,  14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
+    30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
+};
+
+constexpr std::array<u8, 16> zig_zag_scan{
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
+} // Anonymous namespace
+
+H264::H264(GPU& gpu_) : gpu(gpu_) {}
+
+H264::~H264() = default;
+
+const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                bool is_first_frame) {
+    H264DecoderContext context{};
+    gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
+
+    const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff);
+    if (!is_first_frame && frame_number != 0) {
+        frame.resize(context.frame_data_size);
+
+        gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
+    } else {
+        /// Encode header
+        H264BitWriter writer{};
+        writer.WriteU(1, 24);
+        writer.WriteU(0, 1);
+        writer.WriteU(3, 2);
+        writer.WriteU(7, 5);
+        writer.WriteU(100, 8);
+        writer.WriteU(0, 8);
+        writer.WriteU(31, 8);
+        writer.WriteUe(0);
+        const auto chroma_format_idc =
+            static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3);
+        writer.WriteUe(chroma_format_idc);
+        if (chroma_format_idc == 3) {
+            writer.WriteBit(false);
+        }
+
+        writer.WriteUe(0);
+        writer.WriteUe(0);
+        writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
+        writer.WriteBit(false); // Scaling matrix present flag
+
+        const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3);
+        writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf));
+        writer.WriteUe(order_cnt_type);
+        if (order_cnt_type == 0) {
+            writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt);
+        } else if (order_cnt_type == 1) {
+            writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
+
+            writer.WriteSe(0);
+            writer.WriteSe(0);
+            writer.WriteUe(0);
+        }
+
+        const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units /
+                               (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
+
+        writer.WriteUe(16);
+        writer.WriteBit(false);
+        writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
+        writer.WriteUe(pic_height - 1);
+        writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
+
+        if (!context.h264_parameter_set.frame_mbs_only_flag) {
+            writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0);
+        }
+
+        writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0);
+        writer.WriteBit(false); // Frame cropping flag
+        writer.WriteBit(false); // VUI parameter present flag
+
+        writer.End();
+
+        // H264 PPS
+        writer.WriteU(1, 24);
+        writer.WriteU(0, 1);
+        writer.WriteU(3, 2);
+        writer.WriteU(8, 5);
+
+        writer.WriteUe(0);
+        writer.WriteUe(0);
+
+        writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
+        writer.WriteBit(false);
+        writer.WriteUe(0);
+        writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
+        writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
+        writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0);
+        writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2);
+        s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f);
+        pic_init_qp = (pic_init_qp << 26) >> 26;
+        writer.WriteSe(pic_init_qp);
+        writer.WriteSe(0);
+        s32 chroma_qp_index_offset =
+            static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f);
+        chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27;
+
+        writer.WriteSe(chroma_qp_index_offset);
+        writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0);
+        writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0);
+        writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0);
+        writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
+
+        writer.WriteBit(true);
+
+        for (s32 index = 0; index < 6; index++) {
+            writer.WriteBit(true);
+            const auto matrix_x4 =
+                std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end());
+            writer.WriteScalingList(matrix_x4, index * 16, 16);
+        }
+
+        if (context.h264_parameter_set.transform_8x8_mode_flag) {
+            for (s32 index = 0; index < 2; index++) {
+                writer.WriteBit(true);
+                const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(),
+                                                       context.scaling_matrix_8.end());
+
+                writer.WriteScalingList(matrix_x8, index * 64, 64);
+            }
+        }
+
+        s32 chroma_qp_index_offset2 =
+            static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f);
+        chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27;
+
+        writer.WriteSe(chroma_qp_index_offset2);
+
+        writer.End();
+
+        const auto& encoded_header = writer.GetByteArray();
+        frame.resize(encoded_header.size() + context.frame_data_size);
+        std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
+
+        gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
+                                      frame.data() + encoded_header.size(),
+                                      context.frame_data_size);
+    }
+
+    return frame;
+}
+
+H264BitWriter::H264BitWriter() = default;
+
+H264BitWriter::~H264BitWriter() = default;
+
+void H264BitWriter::WriteU(s32 value, s32 value_sz) {
+    WriteBits(value, value_sz);
+}
+
+void H264BitWriter::WriteSe(s32 value) {
+    WriteExpGolombCodedInt(value);
+}
+
+void H264BitWriter::WriteUe(u32 value) {
+    WriteExpGolombCodedUInt(value);
+}
+
+void H264BitWriter::End() {
+    WriteBit(true);
+    Flush();
+}
+
+void H264BitWriter::WriteBit(bool state) {
+    WriteBits(state ? 1 : 0, 1);
+}
+
+void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) {
+    std::vector<u8> scan(count);
+    if (count == 16) {
+        std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
+    } else {
+        std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
+    }
+    u8 last_scale = 8;
+
+    for (s32 index = 0; index < count; index++) {
+        const u8 value = list[start + scan[index]];
+        const s32 delta_scale = static_cast<s32>(value - last_scale);
+
+        WriteSe(delta_scale);
+
+        last_scale = value;
+    }
+}
+
+std::vector<u8>& H264BitWriter::GetByteArray() {
+    return byte_array;
+}
+
+const std::vector<u8>& H264BitWriter::GetByteArray() const {
+    return byte_array;
+}
+
+void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
+    s32 value_pos = 0;
+
+    s32 remaining = bit_count;
+
+    while (remaining > 0) {
+        s32 copy_size = remaining;
+
+        const s32 free_bits = GetFreeBufferBits();
+
+        if (copy_size > free_bits) {
+            copy_size = free_bits;
+        }
+
+        const s32 mask = (1 << copy_size) - 1;
+
+        const s32 src_shift = (bit_count - value_pos) - copy_size;
+        const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
+
+        buffer |= ((value >> src_shift) & mask) << dst_shift;
+
+        value_pos += copy_size;
+        buffer_pos += copy_size;
+        remaining -= copy_size;
+    }
+}
+
+void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
+    const s32 sign = value <= 0 ? 0 : 1;
+    if (value < 0) {
+        value = -value;
+    }
+    value = (value << 1) - sign;
+    WriteExpGolombCodedUInt(value);
+}
+
+void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
+    const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1));
+    WriteBits(1, size);
+
+    value -= (1U << (size - 1)) - 1;
+    WriteBits(static_cast<s32>(value), size - 1);
+}
+
+s32 H264BitWriter::GetFreeBufferBits() {
+    if (buffer_pos == buffer_size) {
+        Flush();
+    }
+
+    return buffer_size - buffer_pos;
+}
+
+void H264BitWriter::Flush() {
+    if (buffer_pos == 0) {
+        return;
+    }
+    byte_array.push_back(static_cast<u8>(buffer));
+
+    buffer = 0;
+    buffer_pos = 0;
+}
+} // namespace Tegra::Decoder
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -0,0 +1,118 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#pragma once
+
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+namespace Tegra {
+class GPU;
+namespace Decoder {
+
+class H264BitWriter {
+public:
+    H264BitWriter();
+    ~H264BitWriter();
+
+    /// The following Write methods are based on clause 9.1 in the H.264 specification.
+    /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
+    void WriteU(s32 value, s32 value_sz);
+    void WriteSe(s32 value);
+    void WriteUe(u32 value);
+
+    /// Finalize the bitstream
+    void End();
+
+    /// append a bit to the stream, equivalent value to the state parameter
+    void WriteBit(bool state);
+
+    /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
+    /// Writes the scaling matrices of the sream
+    void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
+
+    /// Return the bitstream as a vector.
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
+
+private:
+    void WriteBits(s32 value, s32 bit_count);
+    void WriteExpGolombCodedInt(s32 value);
+    void WriteExpGolombCodedUInt(u32 value);
+    [[nodiscard]] s32 GetFreeBufferBits();
+    void Flush();
+
+    s32 buffer_size{8};
+
+    s32 buffer{};
+    s32 buffer_pos{};
+    std::vector<u8> byte_array;
+};
+
+class H264 {
+public:
+    explicit H264(GPU& gpu);
+    ~H264();
+
+    /// Compose the H264 header of the frame for FFmpeg decoding
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
+                                                            bool is_first_frame = false);
+
+private:
+    struct H264ParameterSet {
+        u32 log2_max_pic_order_cnt{};
+        u32 delta_pic_order_always_zero_flag{};
+        u32 frame_mbs_only_flag{};
+        u32 pic_width_in_mbs{};
+        u32 pic_height_in_map_units{};
+        INSERT_PADDING_WORDS(1);
+        u32 entropy_coding_mode_flag{};
+        u32 bottom_field_pic_order_flag{};
+        u32 num_refidx_l0_default_active{};
+        u32 num_refidx_l1_default_active{};
+        u32 deblocking_filter_control_flag{};
+        u32 redundant_pic_count_flag{};
+        u32 transform_8x8_mode_flag{};
+        INSERT_PADDING_WORDS(9);
+        u64 flags{};
+        u32 frame_number{};
+        u32 frame_number2{};
+    };
+    static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size");
+
+    struct H264DecoderContext {
+        INSERT_PADDING_BYTES(0x48);
+        u32 frame_data_size{};
+        INSERT_PADDING_BYTES(0xc);
+        H264ParameterSet h264_parameter_set{};
+        INSERT_PADDING_BYTES(0x100);
+        std::array<u8, 0x60> scaling_matrix_4;
+        std::array<u8, 0x80> scaling_matrix_8;
+    };
+    static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size");
+
+    std::vector<u8> frame;
+    GPU& gpu;
+};
+
+} // namespace Decoder
+} // namespace Tegra
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -0,0 +1,196 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/stream.h"
+#include "video_core/command_classes/codecs/vp9_types.h"
+#include "video_core/command_classes/nvdec_common.h"
+
+namespace Tegra {
+class GPU;
+enum class FrameType { KeyFrame = 0, InterFrame = 1 };
+namespace Decoder {
+
+/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
+/// VP9 header bitstreams.
+
+class VpxRangeEncoder {
+public:
+    VpxRangeEncoder();
+    ~VpxRangeEncoder();
+
+    VpxRangeEncoder(const VpxRangeEncoder&) = delete;
+    VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
+
+    VpxRangeEncoder(VpxRangeEncoder&&) = default;
+    VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
+
+    /// Writes the rightmost value_size bits from value into the stream
+    void Write(s32 value, s32 value_size);
+
+    /// Writes a single bit with half probability
+    void Write(bool bit);
+
+    /// Writes a bit to the base_stream encoded with probability
+    void Write(bool bit, s32 probability);
+
+    /// Signal the end of the bitstream
+    void End();
+
+    [[nodiscard]] std::vector<u8>& GetBuffer() {
+        return base_stream.GetBuffer();
+    }
+
+    [[nodiscard]] const std::vector<u8>& GetBuffer() const {
+        return base_stream.GetBuffer();
+    }
+
+private:
+    u8 PeekByte();
+    Common::Stream base_stream{};
+    u32 low_value{};
+    u32 range{0xff};
+    s32 count{-24};
+    s32 half_probability{128};
+};
+
+class VpxBitStreamWriter {
+public:
+    VpxBitStreamWriter();
+    ~VpxBitStreamWriter();
+
+    VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
+    VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
+
+    VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
+    VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
+
+    /// Write an unsigned integer value
+    void WriteU(u32 value, u32 value_size);
+
+    /// Write a signed integer value
+    void WriteS(s32 value, u32 value_size);
+
+    /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
+    void WriteDeltaQ(u32 value);
+
+    /// Write a single bit.
+    void WriteBit(bool state);
+
+    /// Pushes current buffer into buffer_array, resets buffer
+    void Flush();
+
+    /// Returns byte_array
+    [[nodiscard]] std::vector<u8>& GetByteArray();
+
+    /// Returns const byte_array
+    [[nodiscard]] const std::vector<u8>& GetByteArray() const;
+
+private:
+    /// Write bit_count bits from value into buffer
+    void WriteBits(u32 value, u32 bit_count);
+
+    /// Gets next available position in buffer, invokes Flush() if buffer is full
+    s32 GetFreeBufferBits();
+
+    s32 buffer_size{8};
+
+    s32 buffer{};
+    s32 buffer_pos{};
+    std::vector<u8> byte_array;
+};
+
+class VP9 {
+public:
+    explicit VP9(GPU& gpu);
+    ~VP9();
+
+    VP9(const VP9&) = delete;
+    VP9& operator=(const VP9&) = delete;
+
+    VP9(VP9&&) = default;
+    VP9& operator=(VP9&&) = delete;
+
+    /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
+    /// documentation
+    [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
+
+    /// Returns true if the most recent frame was a hidden frame.
+    [[nodiscard]] bool WasFrameHidden() const {
+        return hidden;
+    }
+
+private:
+    /// Generates compressed header probability updates in the bitstream writer
+    template <typename T, std::size_t N>
+    void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+                                const std::array<T, N>& old_prob);
+
+    /// Generates compressed header probability updates in the bitstream writer
+    /// If probs are not equal, WriteProbabilityDelta is invoked
+    void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+    /// Generates compressed header probability deltas in the bitstream writer
+    void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+    /// Inverse of 6.3.4 Decode term subexp
+    void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
+
+    /// Writes if the value is less than the test value
+    bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
+
+    /// Writes probability updates for the Coef probabilities
+    void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
+                                    const std::array<u8, 2304>& new_prob,
+                                    const std::array<u8, 2304>& old_prob);
+
+    /// Write probabilities for 4-byte aligned structures
+    template <typename T, std::size_t N>
+    void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
+                                        const std::array<T, N>& old_prob);
+
+    /// Write motion vector probability updates. 6.3.17 in the spec
+    void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
+
+    /// Returns VP9 information from NVDEC provided offset and size
+    [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state);
+
+    /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
+    void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
+
+    /// Returns frame to be decoded after buffering
+    [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state);
+
+    /// Use NVDEC providied information to compose the headers for the current frame
+    [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
+    [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
+
+    GPU& gpu;
+    std::vector<u8> frame;
+
+    std::array<s8, 4> loop_filter_ref_deltas{};
+    std::array<s8, 2> loop_filter_mode_deltas{};
+
+    bool hidden = false;
+    s64 current_frame_number = -2; // since we buffer 2 frames
+    s32 grace_period = 6;          // frame offsets need to stabilize
+    std::array<FrameContexts, 4> frame_ctxs{};
+    Vp9FrameContainer next_frame{};
+    Vp9FrameContainer next_next_frame{};
+    bool swap_next_golden{};
+
+    Vp9PictureInfo current_frame_info{};
+    Vp9EntropyProbs prev_frame_probs{};
+
+    s32 diff_update_probability = 252;
+    s32 frame_sync_code = 0x498342;
+};
+
+} // namespace Decoder
+} // namespace Tegra
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -0,0 +1,366 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+
+namespace Decoder {
+struct Vp9FrameDimensions {
+    s16 width{};
+    s16 height{};
+    s16 luma_pitch{};
+    s16 chroma_pitch{};
+};
+static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
+
+enum FrameFlags : u32 {
+    IsKeyFrame = 1 << 0,
+    LastFrameIsKeyFrame = 1 << 1,
+    FrameSizeChanged = 1 << 2,
+    ErrorResilientMode = 1 << 3,
+    LastShowFrame = 1 << 4,
+    IntraOnly = 1 << 5,
+};
+
+enum class MvJointType {
+    MvJointZero = 0,   /* Zero vector */
+    MvJointHnzvz = 1,  /* Vert zero, hor nonzero */
+    MvJointHzvnz = 2,  /* Hor zero, vert nonzero */
+    MvJointHnzvnz = 3, /* Both components nonzero */
+};
+enum class MvClassType {
+    MvClass0 = 0,   /* (0, 2]     integer pel */
+    MvClass1 = 1,   /* (2, 4]     integer pel */
+    MvClass2 = 2,   /* (4, 8]     integer pel */
+    MvClass3 = 3,   /* (8, 16]    integer pel */
+    MvClass4 = 4,   /* (16, 32]   integer pel */
+    MvClass5 = 5,   /* (32, 64]   integer pel */
+    MvClass6 = 6,   /* (64, 128]  integer pel */
+    MvClass7 = 7,   /* (128, 256] integer pel */
+    MvClass8 = 8,   /* (256, 512] integer pel */
+    MvClass9 = 9,   /* (512, 1024] integer pel */
+    MvClass10 = 10, /* (1024,2048] integer pel */
+};
+
+enum class BlockSize {
+    Block4x4 = 0,
+    Block4x8 = 1,
+    Block8x4 = 2,
+    Block8x8 = 3,
+    Block8x16 = 4,
+    Block16x8 = 5,
+    Block16x16 = 6,
+    Block16x32 = 7,
+    Block32x16 = 8,
+    Block32x32 = 9,
+    Block32x64 = 10,
+    Block64x32 = 11,
+    Block64x64 = 12,
+    BlockSizes = 13,
+    BlockInvalid = BlockSizes
+};
+
+enum class PredictionMode {
+    DcPred = 0,   // Average of above and left pixels
+    VPred = 1,    // Vertical
+    HPred = 2,    // Horizontal
+    D45Pred = 3,  // Directional 45  deg = round(arctan(1 / 1) * 180 / pi)
+    D135Pred = 4, // Directional 135 deg = 180 - 45
+    D117Pred = 5, // Directional 117 deg = 180 - 63
+    D153Pred = 6, // Directional 153 deg = 180 - 27
+    D207Pred = 7, // Directional 207 deg = 180 + 27
+    D63Pred = 8,  // Directional 63  deg = round(arctan(2 / 1) * 180 / pi)
+    TmPred = 9,   // True-motion
+    NearestMv = 10,
+    NearMv = 11,
+    ZeroMv = 12,
+    NewMv = 13,
+    MbModeCount = 14
+};
+
+enum class TxSize {
+    Tx4x4 = 0,   // 4x4 transform
+    Tx8x8 = 1,   // 8x8 transform
+    Tx16x16 = 2, // 16x16 transform
+    Tx32x32 = 3, // 32x32 transform
+    TxSizes = 4
+};
+
+enum class TxMode {
+    Only4X4 = 0,      // Only 4x4 transform used
+    Allow8X8 = 1,     // Allow block transform size up to 8x8
+    Allow16X16 = 2,   // Allow block transform size up to 16x16
+    Allow32X32 = 3,   // Allow block transform size up to 32x32
+    TxModeSelect = 4, // Transform specified for each block
+    TxModes = 5
+};
+
+enum class reference_mode {
+    SingleReference = 0,
+    CompoundReference = 1,
+    ReferenceModeSelect = 2,
+    ReferenceModes = 3
+};
+
+struct Segmentation {
+    u8 enabled{};
+    u8 update_map{};
+    u8 temporal_update{};
+    u8 abs_delta{};
+    std::array<u32, 8> feature_mask{};
+    std::array<std::array<s16, 4>, 8> feature_data{};
+};
+static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
+
+struct LoopFilter {
+    u8 mode_ref_delta_enabled{};
+    std::array<s8, 4> ref_deltas{};
+    std::array<s8, 2> mode_deltas{};
+};
+static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
+
+struct Vp9EntropyProbs {
+    std::array<u8, 36> y_mode_prob{};
+    std::array<u8, 64> partition_prob{};
+    std::array<u8, 2304> coef_probs{};
+    std::array<u8, 8> switchable_interp_prob{};
+    std::array<u8, 28> inter_mode_prob{};
+    std::array<u8, 4> intra_inter_prob{};
+    std::array<u8, 5> comp_inter_prob{};
+    std::array<u8, 10> single_ref_prob{};
+    std::array<u8, 5> comp_ref_prob{};
+    std::array<u8, 6> tx_32x32_prob{};
+    std::array<u8, 4> tx_16x16_prob{};
+    std::array<u8, 2> tx_8x8_prob{};
+    std::array<u8, 3> skip_probs{};
+    std::array<u8, 3> joints{};
+    std::array<u8, 2> sign{};
+    std::array<u8, 20> classes{};
+    std::array<u8, 2> class_0{};
+    std::array<u8, 20> prob_bits{};
+    std::array<u8, 12> class_0_fr{};
+    std::array<u8, 6> fr{};
+    std::array<u8, 2> class_0_hp{};
+    std::array<u8, 2> high_precision{};
+};
+static_assert(sizeof(Vp9EntropyProbs) == 0x9F4, "Vp9EntropyProbs is an invalid size");
+
+struct Vp9PictureInfo {
+    bool is_key_frame{};
+    bool intra_only{};
+    bool last_frame_was_key{};
+    bool frame_size_changed{};
+    bool error_resilient_mode{};
+    bool last_frame_shown{};
+    bool show_frame{};
+    std::array<s8, 4> ref_frame_sign_bias{};
+    s32 base_q_index{};
+    s32 y_dc_delta_q{};
+    s32 uv_dc_delta_q{};
+    s32 uv_ac_delta_q{};
+    bool lossless{};
+    s32 transform_mode{};
+    bool allow_high_precision_mv{};
+    s32 interp_filter{};
+    s32 reference_mode{};
+    s8 comp_fixed_ref{};
+    std::array<s8, 2> comp_var_ref{};
+    s32 log2_tile_cols{};
+    s32 log2_tile_rows{};
+    bool segment_enabled{};
+    bool segment_map_update{};
+    bool segment_map_temporal_update{};
+    s32 segment_abs_delta{};
+    std::array<u32, 8> segment_feature_enable{};
+    std::array<std::array<s16, 4>, 8> segment_feature_data{};
+    bool mode_ref_delta_enabled{};
+    bool use_prev_in_find_mv_refs{};
+    std::array<s8, 4> ref_deltas{};
+    std::array<s8, 2> mode_deltas{};
+    Vp9EntropyProbs entropy{};
+    Vp9FrameDimensions frame_size{};
+    u8 first_level{};
+    u8 sharpness_level{};
+    u32 bitstream_size{};
+    std::array<u64, 4> frame_offsets{};
+    std::array<bool, 4> refresh_frame{};
+};
+
+struct Vp9FrameContainer {
+    Vp9PictureInfo info{};
+    std::vector<u8> bit_stream;
+};
+
+struct PictureInfo {
+    INSERT_PADDING_WORDS(12);
+    u32 bitstream_size{};
+    INSERT_PADDING_WORDS(5);
+    Vp9FrameDimensions last_frame_size{};
+    Vp9FrameDimensions golden_frame_size{};
+    Vp9FrameDimensions alt_frame_size{};
+    Vp9FrameDimensions current_frame_size{};
+    u32 vp9_flags{};
+    std::array<s8, 4> ref_frame_sign_bias{};
+    u8 first_level{};
+    u8 sharpness_level{};
+    u8 base_q_index{};
+    u8 y_dc_delta_q{};
+    u8 uv_ac_delta_q{};
+    u8 uv_dc_delta_q{};
+    u8 lossless{};
+    u8 tx_mode{};
+    u8 allow_high_precision_mv{};
+    u8 interp_filter{};
+    u8 reference_mode{};
+    s8 comp_fixed_ref{};
+    std::array<s8, 2> comp_var_ref{};
+    u8 log2_tile_cols{};
+    u8 log2_tile_rows{};
+    Segmentation segmentation{};
+    LoopFilter loop_filter{};
+    INSERT_PADDING_BYTES(5);
+    u32 surface_params{};
+    INSERT_PADDING_WORDS(3);
+
+    [[nodiscard]] Vp9PictureInfo Convert() const {
+        return {
+            .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
+            .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
+            .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
+            .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
+            .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
+            .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
+            .ref_frame_sign_bias = ref_frame_sign_bias,
+            .base_q_index = base_q_index,
+            .y_dc_delta_q = y_dc_delta_q,
+            .uv_dc_delta_q = uv_dc_delta_q,
+            .uv_ac_delta_q = uv_ac_delta_q,
+            .lossless = lossless != 0,
+            .transform_mode = tx_mode,
+            .allow_high_precision_mv = allow_high_precision_mv != 0,
+            .interp_filter = interp_filter,
+            .reference_mode = reference_mode,
+            .comp_fixed_ref = comp_fixed_ref,
+            .comp_var_ref = comp_var_ref,
+            .log2_tile_cols = log2_tile_cols,
+            .log2_tile_rows = log2_tile_rows,
+            .segment_enabled = segmentation.enabled != 0,
+            .segment_map_update = segmentation.update_map != 0,
+            .segment_map_temporal_update = segmentation.temporal_update != 0,
+            .segment_abs_delta = segmentation.abs_delta,
+            .segment_feature_enable = segmentation.feature_mask,
+            .segment_feature_data = segmentation.feature_data,
+            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
+            .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
+                                        !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
+                                        !(vp9_flags == (FrameFlags::IntraOnly)) &&
+                                        (vp9_flags == (FrameFlags::LastShowFrame)) &&
+                                        !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
+            .ref_deltas = loop_filter.ref_deltas,
+            .mode_deltas = loop_filter.mode_deltas,
+            .frame_size = current_frame_size,
+            .first_level = first_level,
+            .sharpness_level = sharpness_level,
+            .bitstream_size = bitstream_size,
+        };
+    }
+};
+static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
+
+struct EntropyProbs {
+    INSERT_PADDING_BYTES(1024);
+    std::array<std::array<u8, 4>, 7> inter_mode_prob{};
+    std::array<u8, 4> intra_inter_prob{};
+    INSERT_PADDING_BYTES(80);
+    std::array<std::array<u8, 1>, 2> tx_8x8_prob{};
+    std::array<std::array<u8, 2>, 2> tx_16x16_prob{};
+    std::array<std::array<u8, 3>, 2> tx_32x32_prob{};
+    std::array<u8, 4> y_mode_prob_e8{};
+    std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{};
+    INSERT_PADDING_BYTES(64);
+    std::array<std::array<u8, 4>, 16> partition_prob{};
+    INSERT_PADDING_BYTES(10);
+    std::array<std::array<u8, 2>, 4> switchable_interp_prob{};
+    std::array<u8, 5> comp_inter_prob{};
+    std::array<u8, 4> skip_probs{};
+    std::array<u8, 3> joints{};
+    std::array<u8, 2> sign{};
+    std::array<std::array<u8, 1>, 2> class_0{};
+    std::array<std::array<u8, 3>, 2> fr{};
+    std::array<u8, 2> class_0_hp{};
+    std::array<u8, 2> high_precision{};
+    std::array<std::array<u8, 10>, 2> classes{};
+    std::array<std::array<std::array<u8, 3>, 2>, 2> class_0_fr{};
+    std::array<std::array<u8, 10>, 2> pred_bits{};
+    std::array<std::array<u8, 2>, 5> single_ref_prob{};
+    std::array<u8, 5> comp_ref_prob{};
+    INSERT_PADDING_BYTES(17);
+    std::array<std::array<std::array<std::array<std::array<std::array<u8, 4>, 6>, 6>, 2>, 2>, 4>
+        coef_probs{};
+
+    void Convert(Vp9EntropyProbs& fc) {
+        std::memcpy(fc.inter_mode_prob.data(), inter_mode_prob.data(), fc.inter_mode_prob.size());
+
+        std::memcpy(fc.intra_inter_prob.data(), intra_inter_prob.data(),
+                    fc.intra_inter_prob.size());
+
+        std::memcpy(fc.tx_8x8_prob.data(), tx_8x8_prob.data(), fc.tx_8x8_prob.size());
+        std::memcpy(fc.tx_16x16_prob.data(), tx_16x16_prob.data(), fc.tx_16x16_prob.size());
+        std::memcpy(fc.tx_32x32_prob.data(), tx_32x32_prob.data(), fc.tx_32x32_prob.size());
+
+        for (s32 i = 0; i < 4; i++) {
+            for (s32 j = 0; j < 9; j++) {
+                fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
+            }
+        }
+
+        std::memcpy(fc.partition_prob.data(), partition_prob.data(), fc.partition_prob.size());
+
+        std::memcpy(fc.switchable_interp_prob.data(), switchable_interp_prob.data(),
+                    fc.switchable_interp_prob.size());
+        std::memcpy(fc.comp_inter_prob.data(), comp_inter_prob.data(), fc.comp_inter_prob.size());
+        std::memcpy(fc.skip_probs.data(), skip_probs.data(), fc.skip_probs.size());
+
+        std::memcpy(fc.joints.data(), joints.data(), fc.joints.size());
+
+        std::memcpy(fc.sign.data(), sign.data(), fc.sign.size());
+        std::memcpy(fc.class_0.data(), class_0.data(), fc.class_0.size());
+        std::memcpy(fc.fr.data(), fr.data(), fc.fr.size());
+        std::memcpy(fc.class_0_hp.data(), class_0_hp.data(), fc.class_0_hp.size());
+        std::memcpy(fc.high_precision.data(), high_precision.data(), fc.high_precision.size());
+        std::memcpy(fc.classes.data(), classes.data(), fc.classes.size());
+        std::memcpy(fc.class_0_fr.data(), class_0_fr.data(), fc.class_0_fr.size());
+        std::memcpy(fc.prob_bits.data(), pred_bits.data(), fc.prob_bits.size());
+        std::memcpy(fc.single_ref_prob.data(), single_ref_prob.data(), fc.single_ref_prob.size());
+        std::memcpy(fc.comp_ref_prob.data(), comp_ref_prob.data(), fc.comp_ref_prob.size());
+
+        std::memcpy(fc.coef_probs.data(), coef_probs.data(), fc.coef_probs.size());
+    }
+};
+static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
+
+enum class Ref { Last, Golden, AltRef };
+
+struct RefPoolElement {
+    s64 frame{};
+    Ref ref{};
+    bool refresh{};
+};
+
+struct FrameContexts {
+    s64 from{};
+    bool adapted{};
+    Vp9EntropyProbs probs{};
+};
+
+}; // namespace Decoder
+}; // namespace Tegra
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/command_classes/host1x.h"
+#include "video_core/gpu.h"
+
+Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
+
+Tegra::Host1x::~Host1x() = default;
+
+void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) {
+    u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
+    std::memcpy(state_offset, &arguments, sizeof(u32));
+}
+
+void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+    StateWrite(static_cast<u32>(method), arguments[0]);
+    switch (method) {
+    case Method::WaitSyncpt:
+        Execute(arguments[0]);
+        break;
+    case Method::LoadSyncptPayload32:
+        syncpoint_value = arguments[0];
+        break;
+    case Method::WaitSyncpt32:
+        Execute(arguments[0]);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
+        break;
+    }
+}
+
+void Tegra::Host1x::Execute(u32 data) {
+    // This method waits on a valid syncpoint.
+    // TODO: Implement when proper Async is in place
+}
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -0,0 +1,78 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+class Nvdec;
+
+class Host1x {
+public:
+    struct Host1xClassRegisters {
+        u32 incr_syncpt{};
+        u32 incr_syncpt_ctrl{};
+        u32 incr_syncpt_error{};
+        INSERT_PADDING_WORDS(5);
+        u32 wait_syncpt{};
+        u32 wait_syncpt_base{};
+        u32 wait_syncpt_incr{};
+        u32 load_syncpt_base{};
+        u32 incr_syncpt_base{};
+        u32 clear{};
+        u32 wait{};
+        u32 wait_with_interrupt{};
+        u32 delay_use{};
+        u32 tick_count_high{};
+        u32 tick_count_low{};
+        u32 tick_ctrl{};
+        INSERT_PADDING_WORDS(23);
+        u32 ind_ctrl{};
+        u32 ind_off2{};
+        u32 ind_off{};
+        std::array<u32, 31> ind_data{};
+        INSERT_PADDING_WORDS(1);
+        u32 load_syncpoint_payload32{};
+        u32 stall_ctrl{};
+        u32 wait_syncpt32{};
+        u32 wait_syncpt_base32{};
+        u32 load_syncpt_base32{};
+        u32 incr_syncpt_base32{};
+        u32 stall_count_high{};
+        u32 stall_count_low{};
+        u32 xref_ctrl{};
+        u32 channel_xref_high{};
+        u32 channel_xref_low{};
+    };
+    static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
+
+    enum class Method : u32 {
+        WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4,
+        LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4,
+        WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4,
+    };
+
+    explicit Host1x(GPU& gpu);
+    ~Host1x();
+
+    /// Writes the method into the state, Invoke Execute() if encountered
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
+
+private:
+    /// For Host1x, execute is waiting on a syncpoint previously written into the state
+    void Execute(u32 data);
+
+    /// Write argument into the provided offset
+    void StateWrite(u32 offset, u32 arguments);
+
+    u32 syncpoint_value{};
+    Host1xClassRegisters state{};
+    GPU& gpu;
+};
+
+} // namespace Tegra
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -0,0 +1,52 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/command_classes/nvdec.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+
+Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
+
+Nvdec::~Nvdec() = default;
+
+void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+    if (method == Method::SetVideoCodec) {
+        codec->StateWrite(static_cast<u32>(method), arguments[0]);
+    } else {
+        codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8);
+    }
+
+    switch (method) {
+    case Method::SetVideoCodec:
+        codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0]));
+        break;
+    case Method::Execute:
+        Execute();
+        break;
+    }
+}
+
+AVFrame* Nvdec::GetFrame() {
+    return codec->GetCurrentFrame();
+}
+
+const AVFrame* Nvdec::GetFrame() const {
+    return codec->GetCurrentFrame();
+}
+
+void Nvdec::Execute() {
+    switch (codec->GetCurrentCodec()) {
+    case NvdecCommon::VideoCodec::H264:
+    case NvdecCommon::VideoCodec::Vp9:
+        codec->Decode();
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
+        break;
+    }
+}
+
+} // namespace Tegra
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/command_classes/codecs/codec.h"
+
+namespace Tegra {
+class GPU;
+
+class Nvdec {
+public:
+    enum class Method : u32 {
+        SetVideoCodec = 0x80,
+        Execute = 0xc0,
+    };
+
+    explicit Nvdec(GPU& gpu);
+    ~Nvdec();
+
+    /// Writes the method into the state, Invoke Execute() if encountered
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
+
+    /// Return most recently decoded frame
+    [[nodiscard]] AVFrame* GetFrame();
+    [[nodiscard]] const AVFrame* GetFrame() const;
+
+private:
+    /// Invoke codec to decode a frame
+    void Execute();
+
+    GPU& gpu;
+    std::unique_ptr<Codec> codec;
+};
+} // namespace Tegra
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/command_classes/nvdec_common.h
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::NvdecCommon {
+
+struct NvdecRegisters {
+    INSERT_PADDING_WORDS(256);
+    u64 set_codec_id{};
+    INSERT_PADDING_WORDS(254);
+    u64 set_platform_id{};
+    u64 picture_info_offset{};
+    u64 frame_bitstream_offset{};
+    u64 frame_number{};
+    u64 h264_slice_data_offsets{};
+    u64 h264_mv_dump_offset{};
+    INSERT_PADDING_WORDS(6);
+    u64 frame_stats_offset{};
+    u64 h264_last_surface_luma_offset{};
+    u64 h264_last_surface_chroma_offset{};
+    std::array<u64, 17> surface_luma_offset{};
+    std::array<u64, 17> surface_chroma_offset{};
+    INSERT_PADDING_WORDS(132);
+    u64 vp9_entropy_probs_offset{};
+    u64 vp9_backward_updates_offset{};
+    u64 vp9_last_frame_segmap_offset{};
+    u64 vp9_curr_frame_segmap_offset{};
+    INSERT_PADDING_WORDS(2);
+    u64 vp9_last_frame_mvs_offset{};
+    u64 vp9_curr_frame_mvs_offset{};
+    INSERT_PADDING_WORDS(2);
+};
+static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
+
+enum class VideoCodec : u32 {
+    None = 0x0,
+    H264 = 0x3,
+    Vp8 = 0x5,
+    H265 = 0x7,
+    Vp9 = 0x9,
+};
+
+} // namespace Tegra::NvdecCommon
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -0,0 +1,60 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include <algorithm>
+#include "sync_manager.h"
+#include "video_core/gpu.h"
+
+namespace Tegra {
+SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
+SyncptIncrManager::~SyncptIncrManager() = default;
+
+void SyncptIncrManager::Increment(u32 id) {
+    increments.emplace_back(0, 0, id, true);
+    IncrementAllDone();
+}
+
+u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
+    const u32 handle = current_id++;
+    increments.emplace_back(handle, class_id, id);
+    return handle;
+}
+
+void SyncptIncrManager::SignalDone(u32 handle) {
+    const auto done_incr =
+        std::find_if(increments.begin(), increments.end(),
+                     [handle](const SyncptIncr& incr) { return incr.id == handle; });
+    if (done_incr != increments.cend()) {
+        done_incr->complete = true;
+    }
+    IncrementAllDone();
+}
+
+void SyncptIncrManager::IncrementAllDone() {
+    std::size_t done_count = 0;
+    for (; done_count < increments.size(); ++done_count) {
+        if (!increments[done_count].complete) {
+            break;
+        }
+        gpu.IncrementSyncPoint(increments[done_count].syncpt_id);
+    }
+    increments.erase(increments.begin(), increments.begin() + done_count);
+}
+} // namespace Tegra
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/command_classes/sync_manager.h
@@ -0,0 +1,64 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#pragma once
+
+#include <mutex>
+#include <vector>
+#include "common/common_types.h"
+
+namespace Tegra {
+class GPU;
+struct SyncptIncr {
+    u32 id;
+    u32 class_id;
+    u32 syncpt_id;
+    bool complete;
+
+    SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
+        : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
+};
+
+class SyncptIncrManager {
+public:
+    explicit SyncptIncrManager(GPU& gpu);
+    ~SyncptIncrManager();
+
+    /// Add syncpoint id and increment all
+    void Increment(u32 id);
+
+    /// Returns a handle to increment later
+    u32 IncrementWhenDone(u32 class_id, u32 id);
+
+    /// IncrememntAllDone, including handle
+    void SignalDone(u32 handle);
+
+    /// Increment all sequential pending increments that are already done.
+    void IncrementAllDone();
+
+private:
+    std::vector<SyncptIncr> increments;
+    std::mutex increment_lock;
+    u32 current_id{};
+
+    GPU& gpu;
+};
+
+} // namespace Tegra
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -0,0 +1,180 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include "common/assert.h"
+#include "video_core/command_classes/nvdec.h"
+#include "video_core/command_classes/vic.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/texture_cache/surface_params.h"
+
+extern "C" {
+#include <libswscale/swscale.h>
+}
+
+namespace Tegra {
+
+Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
+    : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {}
+Vic::~Vic() = default;
+
+void Vic::VicStateWrite(u32 offset, u32 arguments) {
+    u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
+    std::memcpy(state_offset, &arguments, sizeof(u32));
+}
+
+void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
+    LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
+    VicStateWrite(static_cast<u32>(method), arguments[0]);
+    const u64 arg = static_cast<u64>(arguments[0]) << 8;
+    switch (method) {
+    case Method::Execute:
+        Execute();
+        break;
+    case Method::SetConfigStructOffset:
+        config_struct_address = arg;
+        break;
+    case Method::SetOutputSurfaceLumaOffset:
+        output_surface_luma_address = arg;
+        break;
+    case Method::SetOutputSurfaceChromaUOffset:
+        output_surface_chroma_u_address = arg;
+        break;
+    case Method::SetOutputSurfaceChromaVOffset:
+        output_surface_chroma_v_address = arg;
+        break;
+    default:
+        break;
+    }
+}
+
+void Vic::Execute() {
+    if (output_surface_luma_address == 0) {
+        LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}",
+                  vic_state.output_surface.luma_offset);
+        return;
+    }
+    const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
+    const VideoPixelFormat pixel_format =
+        static_cast<VideoPixelFormat>(config.pixel_format.Value());
+    switch (pixel_format) {
+    case VideoPixelFormat::BGRA8:
+    case VideoPixelFormat::RGBA8: {
+        LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
+        const auto* frame = nvdec_processor->GetFrame();
+
+        if (!frame || frame->width == 0 || frame->height == 0) {
+            return;
+        }
+        if (scaler_ctx == nullptr || frame->width != scaler_width ||
+            frame->height != scaler_height) {
+            const AVPixelFormat target_format =
+                (pixel_format == VideoPixelFormat::RGBA8) ? AV_PIX_FMT_RGBA : AV_PIX_FMT_BGRA;
+
+            sws_freeContext(scaler_ctx);
+            scaler_ctx = nullptr;
+
+            // FFmpeg returns all frames in YUV420, convert it into expected format
+            scaler_ctx =
+                sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width,
+                               frame->height, target_format, 0, nullptr, nullptr, nullptr);
+
+            scaler_width = frame->width;
+            scaler_height = frame->height;
+        }
+        // Get Converted frame
+        const std::size_t linear_size = frame->width * frame->height * 4;
+
+        using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
+        AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free};
+
+        const int converted_stride{frame->width * 4};
+        u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
+
+        sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
+                  &converted_frame_buf_addr, &converted_stride);
+
+        const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
+        if (blk_kind != 0) {
+            // swizzle pitch linear to block linear
+            const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
+            const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
+                                                            block_height, 0);
+            std::vector<u8> swizzled_data(size);
+            Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
+                                             swizzled_data.data(), converted_frame_buffer.get(),
+                                             false, block_height, 0, 1);
+
+            gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
+            gpu.Maxwell3D().OnMemoryWrite();
+        } else {
+            // send pitch linear frame
+            gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
+                                           linear_size);
+            gpu.Maxwell3D().OnMemoryWrite();
+        }
+        break;
+    }
+    case VideoPixelFormat::Yuv420: {
+        LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
+
+        const auto* frame = nvdec_processor->GetFrame();
+
+        if (!frame || frame->width == 0 || frame->height == 0) {
+            return;
+        }
+
+        const std::size_t surface_width = config.surface_width_minus1 + 1;
+        const std::size_t surface_height = config.surface_height_minus1 + 1;
+        const std::size_t half_width = surface_width / 2;
+        const std::size_t half_height = config.surface_height_minus1 / 2;
+        const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
+
+        const auto* luma_ptr = frame->data[0];
+        const auto* chroma_b_ptr = frame->data[1];
+        const auto* chroma_r_ptr = frame->data[2];
+        const auto stride = frame->linesize[0];
+        const auto half_stride = frame->linesize[1];
+
+        std::vector<u8> luma_buffer(aligned_width * surface_height);
+        std::vector<u8> chroma_buffer(aligned_width * half_height);
+
+        // Populate luma buffer
+        for (std::size_t y = 0; y < surface_height - 1; ++y) {
+            std::size_t src = y * stride;
+            std::size_t dst = y * aligned_width;
+
+            std::size_t size = surface_width;
+
+            for (std::size_t offset = 0; offset < size; ++offset) {
+                luma_buffer[dst + offset] = luma_ptr[src + offset];
+            }
+        }
+        gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
+                                       luma_buffer.size());
+
+        // Populate chroma buffer from both channels with interleaving.
+        for (std::size_t y = 0; y < half_height; ++y) {
+            std::size_t src = y * half_stride;
+            std::size_t dst = y * aligned_width;
+
+            for (std::size_t x = 0; x < half_width; ++x) {
+                chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
+                chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x];
+            }
+        }
+        gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
+                                       chroma_buffer.size());
+        gpu.Maxwell3D().OnMemoryWrite();
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unknown video pixel format {}", config.pixel_format.Value());
+        break;
+    }
+}
+
+} // namespace Tegra
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -0,0 +1,110 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+struct SwsContext;
+
+namespace Tegra {
+class GPU;
+class Nvdec;
+
+struct PlaneOffsets {
+    u32 luma_offset{};
+    u32 chroma_u_offset{};
+    u32 chroma_v_offset{};
+};
+
+struct VicRegisters {
+    INSERT_PADDING_WORDS(64);
+    u32 nop{};
+    INSERT_PADDING_WORDS(15);
+    u32 pm_trigger{};
+    INSERT_PADDING_WORDS(47);
+    u32 set_application_id{};
+    u32 set_watchdog_timer{};
+    INSERT_PADDING_WORDS(17);
+    u32 context_save_area{};
+    u32 context_switch{};
+    INSERT_PADDING_WORDS(43);
+    u32 execute{};
+    INSERT_PADDING_WORDS(63);
+    std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
+    u32 picture_index{};
+    u32 control_params{};
+    u32 config_struct_offset{};
+    u32 filter_struct_offset{};
+    u32 palette_offset{};
+    u32 hist_offset{};
+    u32 context_id{};
+    u32 fce_ucode_size{};
+    PlaneOffsets output_surface{};
+    u32 fce_ucode_offset{};
+    INSERT_PADDING_WORDS(4);
+    std::array<u32, 8> slot_context_id{};
+    INSERT_PADDING_WORDS(16);
+};
+static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
+
+class Vic {
+public:
+    enum class Method : u32 {
+        Execute = 0xc0,
+        SetControlParams = 0x1c1,
+        SetConfigStructOffset = 0x1c2,
+        SetOutputSurfaceLumaOffset = 0x1c8,
+        SetOutputSurfaceChromaUOffset = 0x1c9,
+        SetOutputSurfaceChromaVOffset = 0x1ca
+    };
+
+    explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
+    ~Vic();
+
+    /// Write to the device state.
+    void ProcessMethod(Method method, const std::vector<u32>& arguments);
+
+private:
+    void Execute();
+
+    void VicStateWrite(u32 offset, u32 arguments);
+    VicRegisters vic_state{};
+
+    enum class VideoPixelFormat : u64_le {
+        RGBA8 = 0x1f,
+        BGRA8 = 0x20,
+        Yuv420 = 0x44,
+    };
+
+    union VicConfig {
+        u64_le raw{};
+        BitField<0, 7, u64_le> pixel_format;
+        BitField<7, 2, u64_le> chroma_loc_horiz;
+        BitField<9, 2, u64_le> chroma_loc_vert;
+        BitField<11, 4, u64_le> block_linear_kind;
+        BitField<15, 4, u64_le> block_linear_height_log2;
+        BitField<19, 3, u64_le> reserved0;
+        BitField<22, 10, u64_le> reserved1;
+        BitField<32, 14, u64_le> surface_width_minus1;
+        BitField<46, 14, u64_le> surface_height_minus1;
+    };
+
+    GPU& gpu;
+    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+
+    GPUVAddr config_struct_address{};
+    GPUVAddr output_surface_luma_address{};
+    GPUVAddr output_surface_chroma_u_address{};
+    GPUVAddr output_surface_chroma_v_address{};
+
+    SwsContext* scaler_ctx{};
+    s32 scaler_width{};
+    s32 scaler_height{};
+};
+
+} // namespace Tegra
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/cityhash.h"
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -12,6 +13,20 @@

 namespace Tegra {

+void CommandList::RefreshIntegrityChecks(GPU& gpu) {
+    command_list_hashes.resize(command_lists.size());
+
+    for (std::size_t index = 0; index < command_lists.size(); ++index) {
+        const CommandListHeader command_list_header = command_lists[index];
+        std::vector<CommandHeader> command_headers(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+        command_list_hashes[index] =
+            Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                               command_list_header.size * sizeof(u32));
+    }
+}
+
 DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}

 DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
        return false;
    }

-    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
+    CommandList& command_list{dma_pushbuffer.front()};
+
+    ASSERT_OR_EXECUTE(
+        command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
+            // Somehow the command_list is empty, in order to avoid a crash
+            // We ignore it and assume its size is 0.
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+            return true;
+        });
+
+    if (command_list.prefetch_command_list.size()) {
+        // Prefetched command list from nvdrv, used for things like synchronization
+        command_headers = std::move(command_list.prefetch_command_list);
        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
-    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
-    const GPUVAddr dma_get = command_list_header.addr;
+    } else {
+        const CommandListHeader command_list_header{
+            command_list.command_lists[dma_pushbuffer_subindex]};
+        const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
+        const GPUVAddr dma_get = command_list_header.addr;

-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
+        if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
+
+        if (command_list_header.size == 0) {
+            return true;
+        }
+
+        // Push buffer non-empty, read a word
+        command_headers.resize(command_list_header.size);
+        gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
+                                            command_list_header.size * sizeof(u32));
+
+        // Integrity check
+        const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
+                                                command_list_header.size * sizeof(u32));
+        if (new_hash != next_hash) {
+            LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
+            dma_pushbuffer.pop();
+            return true;
+        }
    }
-
-    if (command_list_header.size == 0) {
-        return true;
-    }
-
-    // Push buffer non-empty, read a word
-    command_headers.resize(command_list_header.size);
-    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                        command_list_header.size * sizeof(u32));
-
    for (std::size_t index = 0; index < command_headers.size();) {
        const CommandHeader& command_header = command_headers[index];

--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -18,6 +18,8 @@ class System;

 namespace Tegra {

+class GPU;
+
 enum class SubmissionMode : u32 {
    IncreasingOld = 0,
    Increasing = 1,
@@ -27,6 +29,31 @@ enum class SubmissionMode : u32 {
    IncreaseOnce = 5
 };

+// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
+// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
+// So the values you see in docs might be multiplied by 4.
+enum class BufferMethods : u32 {
+    BindObject = 0x0,
+    Nop = 0x2,
+    SemaphoreAddressHigh = 0x4,
+    SemaphoreAddressLow = 0x5,
+    SemaphoreSequence = 0x6,
+    SemaphoreTrigger = 0x7,
+    NotifyIntr = 0x8,
+    WrcacheFlush = 0x9,
+    Unk28 = 0xA,
+    UnkCacheFlush = 0xB,
+    RefCnt = 0x14,
+    SemaphoreAcquire = 0x1A,
+    SemaphoreRelease = 0x1B,
+    FenceValue = 0x1C,
+    FenceAction = 0x1D,
+    WaitForInterrupt = 0x1E,
+    Unk7c = 0x1F,
+    Yield = 0x20,
+    NonPullerMethods = 0x40,
+};
+
 struct CommandListHeader {
    union {
        u64 raw;
@@ -49,9 +76,26 @@ union CommandHeader {
 static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
 static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");

-class GPU;
+inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, SubmissionMode mode) {
+    CommandHeader result{};
+    result.method.Assign(static_cast<u32>(method));
+    result.arg_count.Assign(arg_count);
+    result.mode.Assign(mode);
+    return result;
+}

-using CommandList = std::vector<Tegra::CommandListHeader>;
+struct CommandList final {
+    CommandList() = default;
+    explicit CommandList(std::size_t size) : command_lists(size) {}
+    explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
+        : prefetch_command_list{std::move(prefetch_command_list)} {}
+
+    void RefreshIntegrityChecks(GPU& gpu);
+
+    std::vector<Tegra::CommandListHeader> command_lists;
+    std::vector<u64> command_list_hashes;
+    std::vector<Tegra::CommandHeader> prefetch_command_list;
+};

 /**
 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +104,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
 * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
 * details on this implementation.
 */
-class DmaPusher {
+class DmaPusher final {
 public:
    explicit DmaPusher(Core::System& system, GPU& gpu);
    ~DmaPusher();
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1893,6 +1893,7 @@ public:
        ICMP_IMM,
        FCMP_RR,
        FCMP_RC,
+        FCMP_IMMR,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2205,6 +2206,7 @@ private:
            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+            INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,9 +27,10 @@ namespace Tegra {

 MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));

-GPU::GPU(Core::System& system_, bool is_async_)
+GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
    : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
      dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+      cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
      maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
      fermi_2d{std::make_unique<Engines::Fermi2D>()},
      kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
@@ -77,10 +78,18 @@ DmaPusher& GPU::DmaPusher() {
    return *dma_pusher;
 }

+Tegra::CDmaPusher& GPU::CDmaPusher() {
+    return *cdma_pusher;
+}
+
 const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

+const Tegra::CDmaPusher& GPU::CDmaPusher() const {
+    return *cdma_pusher;
+}
+
 void GPU::WaitFence(u32 syncpoint_id, u32 value) {
    // Synced GPU, is always in sync
    if (!is_async) {
@@ -185,30 +194,6 @@ void GPU::SyncGuestHost() {
 void GPU::OnCommandListEnd() {
    renderer->Rasterizer().ReleaseFences();
 }
-// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
-// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
-// So the values you see in docs might be multiplied by 4.
-enum class BufferMethods {
-    BindObject = 0x0,
-    Nop = 0x2,
-    SemaphoreAddressHigh = 0x4,
-    SemaphoreAddressLow = 0x5,
-    SemaphoreSequence = 0x6,
-    SemaphoreTrigger = 0x7,
-    NotifyIntr = 0x8,
-    WrcacheFlush = 0x9,
-    Unk28 = 0xA,
-    UnkCacheFlush = 0xB,
-    RefCnt = 0x14,
-    SemaphoreAcquire = 0x1A,
-    SemaphoreRelease = 0x1B,
-    FenceValue = 0x1C,
-    FenceAction = 0x1D,
-    Unk78 = 0x1E,
-    Unk7c = 0x1F,
-    Yield = 0x20,
-    NonPullerMethods = 0x40,
-};

 enum class GpuSemaphoreOperation {
    AcquireEqual = 0x1,
@@ -268,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
    case BufferMethods::UnkCacheFlush:
    case BufferMethods::WrcacheFlush:
    case BufferMethods::FenceValue:
+        break;
    case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
        break;
    case BufferMethods::SemaphoreTrigger: {
        ProcessSemaphoreTriggerMethod();
@@ -382,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
    }
 }

+void GPU::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case FenceOperation::Acquire:
+        WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case FenceOperation::Increment:
+        IncrementSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}",
+                          static_cast<u32>(regs.fence_action.op.Value()));
+    }
+}
+
+void GPU::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
 void GPU::ProcessSemaphoreTriggerMethod() {
    const auto semaphoreOperationMask = 0xF;
    const auto op =
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,6 +13,7 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
+#include "video_core/cdma_pusher.h"
 #include "video_core/dma_pusher.h"

 using CacheAddr = std::uintptr_t;
@@ -157,7 +158,7 @@ public:
              method_count(method_count) {}
    };

-    explicit GPU(Core::System& system, bool is_async);
+    explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
    virtual ~GPU();

    /// Binds a renderer to the GPU.
@@ -209,6 +210,15 @@ public:
    /// Returns a reference to the GPU DMA pusher.
    Tegra::DmaPusher& DmaPusher();

+    /// Returns a const reference to the GPU DMA pusher.
+    const Tegra::DmaPusher& DmaPusher() const;
+
+    /// Returns a reference to the GPU CDMA pusher.
+    Tegra::CDmaPusher& CDmaPusher();
+
+    /// Returns a const reference to the GPU CDMA pusher.
+    const Tegra::CDmaPusher& CDmaPusher() const;
+
    VideoCore::RendererBase& Renderer() {
        return *renderer;
    }
@@ -249,8 +259,27 @@ public:
        return is_async;
    }

-    /// Returns a const reference to the GPU DMA pusher.
-    const Tegra::DmaPusher& DmaPusher() const;
+    bool UseNvdec() const {
+        return use_nvdec;
+    }
+
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };

    struct Regs {
        static constexpr size_t NUM_REGS = 0x40;
@@ -280,10 +309,7 @@ public:
                u32 semaphore_acquire;
                u32 semaphore_release;
                u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                INSERT_UNION_PADDING_WORDS(0xE2);

                // Puller state
@@ -311,6 +337,9 @@ public:
    /// Push GPU command entries to be processed
    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;

+    /// Push GPU command buffer entries to be processed
+    virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
+
    /// Swap buffers (render frame)
    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;

@@ -328,6 +357,8 @@ protected:

 private:
    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
    void ProcessSemaphoreTriggerMethod();
    void ProcessSemaphoreRelease();
    void ProcessSemaphoreAcquire();
@@ -349,7 +380,9 @@ protected:
    Core::System& system;
    std::unique_ptr<Tegra::MemoryManager> memory_manager;
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
    std::unique_ptr<VideoCore::RendererBase> renderer;
+    const bool use_nvdec;

 private:
    /// Mapping of command subchannels to their bound engine ids
@@ -372,6 +405,7 @@ private:
    std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;

    std::mutex sync_mutex;
+    std::mutex device_mutex;

    std::condition_variable sync_cv;

--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,12 +10,13 @@

 namespace VideoCommon {

-GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
+GPUAsynch::GPUAsynch(Core::System& system, bool use_nvdec)
+    : GPU{system, true, use_nvdec}, gpu_thread{system} {}

 GPUAsynch::~GPUAsynch() = default;

 void GPUAsynch::Start() {
-    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+    gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
    cpu_context = renderer->GetRenderWindow().CreateSharedContext();
    cpu_context->MakeCurrent();
 }
@@ -32,6 +33,27 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
    gpu_thread.SubmitList(std::move(entries));
 }

+void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+    if (!use_nvdec) {
+        return;
+    }
+    // This condition fires when a video stream ends, clear all intermediary data
+    if (entries[0].raw == 0xDEADB33F) {
+        cdma_pusher.reset();
+        return;
+    }
+    if (!cdma_pusher) {
+        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
+    }
+
+    // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
+    // TODO(ameerj): RE proper async nvdec operation
+    // gpu_thread.SubmitCommandBuffer(std::move(entries));
+
+    cdma_pusher->Push(std::move(entries));
+    cdma_pusher->DispatchCalls();
+}
+
 void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    gpu_thread.SwapBuffers(framebuffer);
 }
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,13 +20,14 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU asynchronously
 class GPUAsynch final : public Tegra::GPU {
 public:
-    explicit GPUAsynch(Core::System& system);
+    explicit GPUAsynch(Core::System& system, bool use_nvdec);
    ~GPUAsynch() override;

    void Start() override;
    void ObtainContext() override;
    void ReleaseContext() override;
    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,7 +7,7 @@

 namespace VideoCommon {

-GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
+GPUSynch::GPUSynch(Core::System& system, bool use_nvdec) : GPU{system, false, use_nvdec} {}

 GPUSynch::~GPUSynch() = default;

@@ -26,6 +26,22 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
    dma_pusher->DispatchCalls();
 }

+void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
+    if (!use_nvdec) {
+        return;
+    }
+    // This condition fires when a video stream ends, clears all intermediary data
+    if (entries[0].raw == 0xDEADB33F) {
+        cdma_pusher.reset();
+        return;
+    }
+    if (!cdma_pusher) {
+        cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
+    }
+    cdma_pusher->Push(std::move(entries));
+    cdma_pusher->DispatchCalls();
+}
+
 void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    renderer->SwapBuffers(framebuffer);
 }
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,13 +19,14 @@ namespace VideoCommon {
 /// Implementation of GPU interface that runs the GPU synchronously
 class GPUSynch final : public Tegra::GPU {
 public:
-    explicit GPUSynch(Core::System& system);
+    explicit GPUSynch(Core::System& system, bool use_nvdec);
    ~GPUSynch() override;

    void Start() override;
    void ObtainContext() override;
    void ReleaseContext() override;
    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
    void FlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -18,7 +18,7 @@ namespace VideoCommon::GPUThread {
 /// Runs the GPU thread
 static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
                      Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
-                      SynchState& state) {
+                      SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
    std::string name = "yuzu:GPU";
    MicroProfileOnThreadCreate(name.c_str());
    Common::SetCurrentThreadName(name.c_str());
@@ -42,6 +42,10 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
        if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
            dma_pusher.Push(std::move(submit_list->entries));
            dma_pusher.DispatchCalls();
+        } else if (const auto command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
+            // NVDEC
+            cdma_pusher.Push(std::move(command_list->entries));
+            cdma_pusher.DispatchCalls();
        } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
        } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -75,15 +79,19 @@ ThreadManager::~ThreadManager() {

 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
-                                Tegra::DmaPusher& dma_pusher) {
-    thread = std::thread{RunThread,         std::ref(system),     std::ref(renderer),
-                         std::ref(context), std::ref(dma_pusher), std::ref(state)};
+                                Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
+    thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
+                         std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
 }

 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
    PushCommand(SubmitListCommand(std::move(entries)));
 }

+void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) {
+    PushCommand(SubmitChCommandEntries(std::move(entries)));
+}
+
 void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
    PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
 }
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -37,6 +37,14 @@ struct SubmitListCommand final {
    Tegra::CommandList entries;
 };

+/// Command to signal to the GPU thread that a cdma command list is ready for processing
+struct SubmitChCommandEntries final {
+    explicit SubmitChCommandEntries(Tegra::ChCommandHeaderList&& entries)
+        : entries{std::move(entries)} {}
+
+    Tegra::ChCommandHeaderList entries;
+};
+
 /// Command to signal to the GPU thread that a swap buffers is pending
 struct SwapBuffersCommand final {
    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
@@ -77,9 +85,9 @@ struct OnCommandListEndCommand final {};
 struct GPUTickCommand final {};

 using CommandData =
-    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
-                 GPUTickCommand>;
+    std::variant<EndProcessingCommand, SubmitListCommand, SubmitChCommandEntries,
+                 SwapBuffersCommand, FlushRegionCommand, InvalidateRegionCommand,
+                 FlushAndInvalidateRegionCommand, OnCommandListEndCommand, GPUTickCommand>;

 struct CommandDataContainer {
    CommandDataContainer() = default;
@@ -109,11 +117,14 @@ public:

    /// Creates and starts the GPU thread.
    void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                     Tegra::DmaPusher& dma_pusher);
+                     Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher);

    /// Push GPU command entries to be processed
    void SubmitList(Tegra::CommandList&& entries);

+    /// Push GPU CDMA command buffer entries to be processed
+    void SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries);
+
    /// Swap buffers (render frame)
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);

--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -11,6 +11,7 @@
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 namespace Tegra {

@@ -44,6 +45,12 @@ GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_
    return Map(cpu_addr, *FindFreeRange(size, align), size);
 }

+GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
+    const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
+    ASSERT(gpu_addr);
+    return Map(cpu_addr, *gpu_addr, size);
+}
+
 void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
    if (!size) {
        return;
@@ -108,7 +115,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
    page_table[PageEntryIndex(gpu_addr)] = page_entry;
 }

-std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const {
+std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
+                                                     bool start_32bit_address) const {
    if (!align) {
        align = page_size;
    } else {
@@ -116,7 +124,7 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
    }

    u64 available_size{};
-    GPUVAddr gpu_addr{address_space_start};
+    GPUVAddr gpu_addr{start_32bit_address ? address_space_start_low : address_space_start};
    while (gpu_addr + available_size < address_space_size) {
        if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) {
            available_size += page_size;
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -116,6 +116,7 @@ public:

    [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
    [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+    [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
    [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
    [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
    void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -124,7 +125,8 @@ private:
    [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
    void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
    GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
-    [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
+    [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
+                                                        bool start_32bit_address = false) const;

    void TryLockPage(PageEntry page_entry, std::size_t size);
    void TryUnlockPage(PageEntry page_entry, std::size_t size);
@@ -135,6 +137,7 @@ private:

    static constexpr u64 address_space_size = 1ULL << 40;
    static constexpr u64 address_space_start = 1ULL << 32;
+    static constexpr u64 address_space_start_low = 1ULL << 16;
    static constexpr u64 page_bits{16};
    static constexpr u64 page_size{1 << page_bits};
    static constexpr u64 page_mask{page_size - 1};
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -317,8 +317,7 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
            return std::nullopt;
        }
    }
-
-    return std::move(entries);
+    return entries;
 }

 void ShaderDiskCacheOpenGL::InvalidateTransferable() {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lioncash	6f006d051e	General: Fix clang build Allows building on clang to work again	2020-11-05 10:07:16 -05:00
bunnei	d62d28522b	Merge pull request #4889 from lioncash/setting-global core/settings: Move configuring_global behind an API	2020-11-04 17:09:19 -08:00
bunnei	087f52e872	Merge pull request #4858 from lioncash/initializer General: Resolve a few missing initializer warnings	2020-11-04 12:10:10 -08:00
Lioncash	7aae6d6d2b	core/settings: Move configuring_global behind an API Rather than have directly modified global state here, we can make it an implementation detail and have an interface that changes are queried through.	2020-11-04 04:16:37 -05:00
Chloe	6bbbbe8f85	Merge pull request #4869 from bunnei/improve-gpu-sync Improvements to GPU synchronization & various refactoring	2020-11-04 18:36:55 +11:00
bunnei	4bfa411ddc	Merge pull request #4874 from lioncash/nodiscard2 nvdec: Make use of [[nodiscard]] where applicable	2020-11-03 16:34:07 -08:00
bunnei	46fdc94586	Merge pull request #4887 from lioncash/common-build microprofile: Silence warning in headers	2020-11-03 13:41:29 -08:00
Lioncash	ee21b5378b	microprofile: Silence warning in headers Silences a truncation warning by making the truncation explicit and documenting the reason for it.	2020-11-03 15:07:13 -05:00
bunnei	222fe75401	Merge pull request #4873 from lioncash/common-error common: Enable warnings as errors	2020-11-03 11:00:23 -08:00
bunnei	448e4d5c2a	Merge pull request #4878 from bunnei/unload-nrr hle: service: ldr: Implement UnloadNrr.	2020-11-03 08:52:40 -08:00
Lioncash	4a4b685a04	common: Enable warnings as errors Cleans up common so that we can enable warnings as errors.	2020-11-02 15:50:58 -05:00
Lioncash	4f0f481f63	nvdec: Make use of [[nodiscard]] where applicable Prevents bugs from occurring where the results of a function are accidentally discarded	2020-11-02 02:45:15 -05:00
bunnei	1089d76736	Merge pull request #4865 from ameerj/async-threadcount async_shaders: Increase Async worker thread count for >8 thread cpus	2020-11-01 01:54:01 -07:00
bunnei	848bdf8a40	fixup! hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements.	2020-11-01 01:52:38 -07:00
bunnei	7d2839d7a3	core: Initialize GPU before services.	2020-11-01 01:52:38 -07:00
bunnei	e67b8678f8	hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements. - Refactor so that SubmitGPFIFO and KickoffPB use shared functionality. - Implement add_wait and add_increment flags.	2020-11-01 01:52:38 -07:00
bunnei	c6e1c46ac7	video_core: dma_pusher: Add support for integrity checks. - Log corrupted command lists, rather than crash.	2020-11-01 01:52:38 -07:00
bunnei	c64545d07a	video_core: dma_pusher: Add support for prefetched command lists.	2020-11-01 01:52:38 -07:00
bunnei	1d4cbb92f2	service: hle: nvflinger: Fix potential shutdown crash when GPU is destroyed.	2020-11-01 01:52:38 -07:00
bunnei	6053b95552	video_core: gpu: Implement WaitFence and IncrementSyncPoint.	2020-11-01 01:52:37 -07:00
bunnei	66edfd61c6	hle service: nvdrv: nvhost_ctrl: Update to use SyncpointManager.	2020-11-01 01:52:37 -07:00
bunnei	4a3fd97e48	hle service: nvdrv: Update to instantiate SyncpointManager.	2020-11-01 01:52:34 -07:00
bunnei	d567b7e841	hle: service: nvdrv: Implement SyncpointManager, to manage syncpoints.	2020-11-01 01:51:54 -07:00
Levi Behunin	bca9591660	Rename to align with switchbrew and remove gpu function (#4714 ) * Rename to align with switchbrew * Rename to align with switchbrew and remove gpu function that checks if clearing should be done.	2020-11-01 01:24:17 -07:00
bunnei	98f68d06f1	Merge pull request #4853 from ReinUsesLisp/fcmp-imm shader/arithmetic: Implement FCMP immediate + register variant	2020-10-31 01:25:02 -07:00
bunnei	a0e5cccb92	hle: service: ldr: Implement UnloadNrr. - Used by Final Fantasy X/X-2 HD Remaster.	2020-10-31 01:22:53 -07:00
LC	6db0c0d8d9	Merge pull request #4872 from jbeich/clang video_core: unbreak -Werror in NVDEC with Clang	2020-10-30 15:11:40 -04:00
Lioncash	14a97d082e	CMakeLists: Resolve MSVC build failures Prevents the compiler tripping up about Windows headers.	2020-10-30 14:57:58 -04:00
Jan Beich	50e52ade85	video_core: unbreak -Werror in NVDEC with Clang src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp:41:15: error: unused variable 'OutOfMemory' [-Werror,-Wunused-const-variable] constexpr u32 OutOfMemory{static_cast<u32>(-12)}; ^	2020-10-30 16:43:10 +00:00
bunnei	8aa9ae5ba5	Merge pull request #4868 from lioncash/discard-error General: Make ignoring a discarded return value an error	2020-10-30 00:35:40 -07:00
bunnei	131a75b65d	Merge pull request #4867 from lioncash/vp9 VP9: Minor interface changes and safety improvements	2020-10-29 21:33:27 -07:00
Lioncash	11d0a6e7b8	General: Catch more expressions with no effect on MSVC MSVC lets us fine-tune catching expressions with no side-effects a little more.	2020-10-30 00:13:26 -04:00
Lioncash	26547d3e3b	General: Make ignoring a discarded return value an error Allows our CI to catch more potential bugs. This also removes the [[nodiscard]] attribute of IOFile's Open member function. There are cases where a file may want to be opened, but have the status of it checked at a later time.	2020-10-30 00:13:21 -04:00
Lioncash	8049b8beb6	common/stream: Be explicit with copy and move operators	2020-10-29 22:57:35 -04:00
Lioncash	12eeffcb7c	vp9: Be explicit with copy and move operators It's deprecated in the language to autogenerate these if the destructor for a type is specified, so we can explicitly specify how we want these to be generated.	2020-10-29 22:57:35 -04:00
Lioncash	0d713cf8eb	vp9: Mark functions with [[nodiscard]] where applicable Prevents values from mistakenly being discarded in cases where it's a bug to do so.	2020-10-29 22:57:32 -04:00
Lioncash	badea3b301	vp9: Provide a default initializer for "hidden" member The API of VP9 exposes a WasFrameHidden() function which accesses this member. Given the constructor previously didn't initialize this member, it's a potential vector for an uninitialized read. Instead, we can initialize this to a deterministic value to prevent that from occurring.	2020-10-29 22:35:55 -04:00
Lioncash	f8543249f0	vp9: Make some member functions internally linked These helper functions don't directly modify any member state and can be hidden from view.	2020-10-29 22:34:46 -04:00
Lioncash	5553bd3ba2	General: Resolve a few missing initializer warnings Resolves a few -Wmissing-initializer warnings.	2020-10-29 19:37:07 -04:00
bunnei	7dcf4c0018	Merge pull request #4831 from lioncash/fmt externals: Update fmt to 7.1.0	2020-10-29 14:44:07 -07:00
bunnei	ef29bf4515	Merge pull request #4837 from lioncash/nvdec-2 nvdec: Minor tidying up	2020-10-29 12:28:07 -07:00
ameerj	3620206136	async_shaders: Increase Async worker thread count for 8+ thread cpus Adds 1 async worker thread for every 2 available threads above 8	2020-10-29 14:16:45 -04:00
bunnei	2dbb144fc6	Merge pull request #4781 from german77/GChotplug Add hotplug, rumble and fix 3rd party adapters for the GC adapter	2020-10-29 10:28:19 -07:00
David	89199ca215	Merge pull request #4859 from Morph1984/missing-ctime-include kernel/process: Add missing <ctime> include	2020-10-29 19:03:19 +11:00
Morph	9cfc5fee2f	kernel/process: Add missing <ctime> include Fixes compilation on MSVC	2020-10-29 03:17:20 -04:00
LC	1a6b1bf1d7	Merge pull request #4857 from liushuyu/master web_service: follow-up fix to #4842	2020-10-29 01:54:45 -04:00
bunnei	c5134cbf3a	Merge pull request #4835 from lat9nq/rng-default-time kernel: Use the current time as the default RNG seed	2020-10-28 22:51:29 -07:00
bunnei	c6d001c94f	Merge pull request #4838 from lioncash/syncmgr sync_manager: Amend parameter order of calls to SyncptIncr constructor	2020-10-28 22:49:22 -07:00
liushuyu	cf63eacc1a	web_service: follow-up fix to #4842 ... * The web_service http request is now fixed on Windows (R) platform. * The issue is due to a complicated race-condition in `httplib`, a detailed explanation is available at https://github.com/yhirose/cpp-httplib/pull/701 * A pending Pull Request on `httplib` has been applied to remedy the said race-condition. * The socket availability check is removed due to a behavioral chice of `httplib` that a socket will not be created before any actual request is sent.	2020-10-28 23:16:06 -06:00
german	5333db91c1	Add hotplug, rumble and fix 3rd party adapters for the GC adapter	2020-10-28 21:12:34 -05:00
LC	c20569ebdf	Merge pull request #4856 from bunnei/webservice-socket-error web_service: web_backend: Handle socket errors with GenericRequest.	2020-10-28 20:46:28 -04:00
bunnei	156556ddd2	web_service: web_backend: Handle socket errors with GenericRequest. - Fixes a shutdown crash when we try to submit telemetry if there is a service issue.	2020-10-28 17:19:12 -07:00
LC	475d46bb64	Merge pull request #4855 from bunnei/cdma-pusher-log-fix video_core: cdma_pusher: Add missing LOG_DEBUG field in ExecuteCommand.	2020-10-28 20:01:29 -04:00
bunnei	94eca09cf6	video_core: cdma_pusher: Add missing LOG_DEBUG field in ExecuteCommand.	2020-10-28 16:47:08 -07:00
bunnei	7af2cb4318	Merge pull request #4846 from lioncash/service-fn service: Update function tables	2020-10-28 13:47:56 -07:00
ReinUsesLisp	44b552be71	shader/arithmetic: Implement FCMP immediate + register variant Trivially add the encoding for this.	2020-10-28 17:05:41 -03:00
bunnei	663e221f99	Merge pull request #4845 from lioncash/inih externals: Track upstream inih	2020-10-28 09:58:58 -07:00
LC	725fcbb368	Merge pull request #4851 from ReinUsesLisp/core-threads-race hle/kernel: Remove unused registered_core_threads to fix data races	2020-10-28 04:54:35 -04:00
LC	a1f176ce52	Merge pull request #4850 from ReinUsesLisp/fiber-ptr-ref common/fiber: Take shared_ptr<Fiber> by copy in YieldTo	2020-10-28 04:54:19 -04:00
LC	1fd22823bc	Merge pull request #4849 from ReinUsesLisp/fix-fiber-test tests: Fix data race in fibers test	2020-10-28 04:26:10 -04:00
LC	978e7897a3	Merge pull request #4848 from ReinUsesLisp/type-limits video_core: Enforce -Werror=type-limits	2020-10-28 03:16:10 -04:00
LC	55ac6f7a2b	Merge pull request #4847 from ReinUsesLisp/warn-move video_core: Enforce -Wredundant-move and -Wpessimizing-move	2020-10-28 03:14:58 -04:00
ReinUsesLisp	79da90cea8	video_core: Enforce -Wredundant-move and -Wpessimizing-move Silence three warnings and make them errors to avoid introducing more in the future.	2020-10-28 02:44:50 -03:00
ReinUsesLisp	4a451e5849	video_core: Enforce -Werror=type-limits Silences one warning and avoids introducing more in the future.	2020-10-28 02:37:47 -03:00
ReinUsesLisp	cdb2480d39	common/fiber: Take shared_ptr<Fiber> by copy in YieldTo YieldTo does not intend to modify the passed shared_ptrs. Pass it by copy to keep a reference count while this function executes.	2020-10-28 02:02:44 -03:00
ReinUsesLisp	3fdb42e0b4	tests: Fix data race in fibers test Previous to this commit, the tests were using operator[] from unordered_map to query elements but this silently inserts empty elements when they don't exist. If all threads were executed without concurrency, this wouldn't be an issue, but the same unordered_map could be written from two threads at the same time. This is a data race and makes some previously inserted elements invisible for a short period of time, causing them to insert and return an empty element. This default constructed element (a zero) was used to index an array of fibers that asserted when one of them was nullptr, shutting the test session off. To address this issue, lock on thread id reads and writes. This could be a shared mutex to allow concurrent reads, but the definition of std::this_thread::get_id is fuzzy when using non-standard techniques like fibers. I opted to use a standard mutex. While we are at it, fix the included headers.	2020-10-28 01:41:24 -03:00
Lioncash	020519def8	service: Update function tables Updates function tables according to info on SwitchBrew.	2020-10-27 21:19:46 -04:00
Lioncash	9a44c1ea27	externals: Update inih to r52	2020-10-27 19:52:48 -04:00
Lioncash	65e697de59	externals: Track mainline inih project	2020-10-27 19:52:48 -04:00
LC	7d27a7a511	Merge pull request #4842 from liushuyu/fix-web-srv web_backend: fix a regression introduced in `39c8d18`	2020-10-27 19:12:27 -04:00
liushuyu	eb84e0f63a	externals: auto detect system OpenSSL	2020-10-27 14:20:20 -06:00
liushuyu	8e673cbb08	web_backend: fix a regression introduced in `39c8d18` * A regression was in `39c8d18` and token verification function was broken. * The reason being `httplib` now requires OpenSSL 1.1+ API while LibreSSL 2.x provided OpenSSL 1.0 compatible API. * The bundled LibreSSL has been updated to 3.2.2 so it now provides OpenSSL 1.1 compatible API now. * Also the path hint has been added so that it will find the correct path to the CA certs on nix systems. An option is provided so that nix system distributions/providers can use their own SSL implementations when compiling Yuzu/Citra to (hopefully) complies with their maintenance guidelines. LURLParse is also removed since `httplib` can handle `scheme:host:port` string itself now.	2020-10-27 02:57:19 -06:00
Lioncash	047e77e2f0	sync_manager: Amend parameter order of calls to SyncptIncr constructor Corrects some cases where the arguments would be incorrectly swapped.	2020-10-27 03:22:57 -04:00
Lioncash	cce14b4cd7	h264: Make WriteUe take a u32 Enforces the type of the desired value in calling code.	2020-10-27 03:21:53 -04:00
Lioncash	6291975731	vp9: std::move buffer within ComposeFrameHeader() We can move the buffer here to avoid a heap reallocation	2020-10-27 02:27:31 -04:00
Lioncash	00decfbb07	vp9: Remove dead code	2020-10-27 02:26:17 -04:00
Lioncash	111802bbbb	vp9: Join declarations with assignments	2020-10-27 02:26:03 -04:00
Lioncash	3b5d5fa86f	vp9: Remove pessimizing moves The move will already occur without std::move.	2020-10-27 02:21:40 -04:00
Lioncash	dcc26c54a5	vp9: Resolve variable shadowing	2020-10-27 02:20:17 -04:00
Lioncash	c04203b786	nvdec: Tidy up header includes Prevents a few unnecessary inclusions.	2020-10-27 02:16:42 -04:00
bunnei	cd92a94965	Merge pull request #4805 from bunnei/update-defaults yuzu: settings: Enable multicore, asynch GPU, and assembly shaders by default.	2020-10-26 23:14:09 -07:00
bunnei	941563f981	yuzu: settings: Enable multicore, asynch GPU, and assembly shaders by default. - In general, this is now the preferred settings for most games. # Conflicts: # src/yuzu/configuration/config.cpp	2020-10-26 23:13:05 -07:00
bunnei	d33399e1f4	Merge pull request #4729 from ameerj/nvdec-prod video_core: NVDEC Implementation	2020-10-26 23:02:42 -07:00
ReinUsesLisp	ce69ff2890	hle/kernel: Remove unused registered_core_threads to fix data races This member was only used on asserts and it triggered data races. Remove it to fix them.	2020-10-27 01:55:39 -03:00
bunnei	c7f32931ee	Merge pull request #4832 from bunnei/cpu-manager-microprofile-fix core: cpu_manager: Add missing call to MicroProfileOnThreadExit().	2020-10-26 21:29:09 -07:00
bunnei	1828f82000	Merge pull request #4833 from bunnei/timezonemanager-explicit hle: services: TimeZoneContentManager: This can be made explicit.	2020-10-26 21:28:45 -07:00
ameerj	eb67a45ca8	video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>	2020-10-26 23:07:36 -04:00
bunnei	9f08cea2c4	Merge pull request #4834 from lioncash/copy-fn controller: Pass ControllerParameters by reference in ReconfigureControllers()	2020-10-26 18:49:26 -07:00
lat9nq	8bd246032a	kernel: Use the current time as the default RNG seed Use the current time, not zero, as the default RNG seed.	2020-10-26 21:42:11 -04:00
Lioncash	6b5f565324	controller: Pass ControllerParameters by reference in ReconfigureControllers() Prevents unnecessary copies and heap reallocations from occurring.	2020-10-26 21:06:15 -04:00
bunnei	54aabb00b0	core: cpu_manager: Add missing call to MicroProfileOnThreadExit(). - Fixes an occasional crash when trying to launch subsequent games.	2020-10-26 16:09:15 -07:00
Lioncash	1dd4132eb1	externals: Update fmt to 7.1.0 Keeps the used version of the library up to date.	2020-10-26 18:34:44 -04:00