fs-srv: GetFreeSpaceSize & GetTotalSpaceSize

Closes #3533 Turns out the functions were already implemented but just never added
Merge pull request #3826 from MerryMage/update-dynarmic
2020-04-30 23:59:57 +10:00 · 2020-04-29 23:30:00 +10:00 · 2020-04-29 14:25:53 +01:00 · 2020-04-28 21:54:06 -04:00 · 2020-04-28 21:41:13 -04:00 · 2020-04-28 21:38:02 -04:00
128 changed files with 2537 additions and 651 deletions
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/externals/microprofile/microprofile.h
+++ b/externals/microprofile/microprofile.h
@@ -910,14 +910,14 @@ typedef void* (*MicroProfileThreadFunc)(void*);

 #ifndef _WIN32
 typedef pthread_t MicroProfileThread;
-void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
+inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
 {
    pthread_attr_t Attr;
    int r  = pthread_attr_init(&Attr);
    MP_ASSERT(r == 0);
    pthread_create(pThread, &Attr, Func, 0);
 }
-void MicroProfileThreadJoin(MicroProfileThread* pThread)
+inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
 {
    int r = pthread_join(*pThread, 0);
    MP_ASSERT(r == 0);
@@ -930,11 +930,11 @@ DWORD _stdcall ThreadTrampoline(void* pFunc)
    return (uint32_t)F(0);
 }

-void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
+inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
 {
    *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
 }
-void MicroProfileThreadJoin(MicroProfileThread* pThread)
+inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
 {
    WaitForSingleObject(*pThread, INFINITE);
    CloseHandle(*pThread);
@@ -1131,7 +1131,7 @@ inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
    pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
 }
 #else
-MicroProfileThreadLog* MicroProfileGetThreadLog()
+inline MicroProfileThreadLog* MicroProfileGetThreadLog()
 {
    return g_MicroProfileThreadLog;
 }
@@ -1247,7 +1247,7 @@ MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
    return MICROPROFILE_INVALID_TOKEN;
 }

-uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
+inline uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
 {
    for(uint32_t i = 0; i < S.nGroupCount; ++i)
    {
@@ -1276,7 +1276,7 @@ uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
    return nGroupIndex;
 }

-void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
+inline void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
 {
    int nCategoryIndex = -1;
    for(uint32_t i = 0; i < S.nCategoryCount; ++i)
@@ -1442,7 +1442,7 @@ void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart)
    }
 }

-void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
+inline void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
 {
    if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks)
    {
@@ -1894,7 +1894,7 @@ void MicroProfileSetEnableAllGroups(bool bEnableAllGroups)
    S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0;
 }

-void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
+inline void MicroProfileEnableCategory(const char* pCategory, bool bEnabled)
 {
    int nCategoryIndex = -1;
    for(uint32_t i = 0; i < S.nCategoryCount; ++i)
@@ -2004,7 +2004,7 @@ void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Typ
 }


-void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
+inline void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
 {
    for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
    {
--- a/externals/microprofile/microprofileui.h
+++ b/externals/microprofile/microprofileui.h
@@ -417,19 +417,19 @@ void MicroProfileToggleDisplayMode()
 }


-void MicroProfileStringArrayClear(MicroProfileStringArray* pArray)
+inline void MicroProfileStringArrayClear(MicroProfileStringArray* pArray)
 {
    pArray->nNumStrings = 0;
    pArray->pBufferPos = &pArray->Buffer[0];
 }

-void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral)
+inline void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral)
 {
    MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
    pArray->ppStrings[pArray->nNumStrings++] = pLiteral;
 }

-void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...)
+inline void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...)
 {
    MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS);
    pArray->ppStrings[pArray->nNumStrings++] = pArray->pBufferPos;
@@ -439,7 +439,7 @@ void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char*
    va_end(args);
    MP_ASSERT(pArray->pBufferPos < pArray->Buffer + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE);
 }
-void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc)
+inline void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc)
 {
    memcpy(&pDest->ppStrings[0], &pSrc->ppStrings[0], sizeof(pDest->ppStrings));
    memcpy(&pDest->Buffer[0], &pSrc->Buffer[0], sizeof(pDest->Buffer));
@@ -456,7 +456,7 @@ void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStr
    pDest->nNumStrings = pSrc->nNumStrings;
 }

-void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0)
+inline void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0)
 {
    uint32_t* nStringLengths = pStringLengths ? pStringLengths : (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
    uint32_t nTextCount = nNumStrings/2;
@@ -474,7 +474,7 @@ void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, u
    nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nTextCount + 2 * MICROPROFILE_BORDER_SIZE;
 }

-void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
+inline void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
 {
    uint32_t nWidth = 0, nHeight = 0;
    uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
@@ -503,7 +503,7 @@ void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppString
        nY += (MICROPROFILE_TEXT_HEIGHT+1);
    }
 }
-void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
+inline void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0)
 {
    uint32_t nWidth = 0, nHeight = 0;
    uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t));
@@ -529,7 +529,7 @@ void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, u



-void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
+inline void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
 {
    MicroProfile& S = *MicroProfileGet();
    if(UI.nRangeBeginIndex != UI.nRangeEndIndex && UI.pRangeLog)
@@ -608,7 +608,7 @@ void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip)
    }
 }

-void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime)
+inline void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -718,7 +718,7 @@ void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uin
 }


-void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
+inline void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -728,7 +728,7 @@ void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd)
    UI.fDetailedRangeTarget = MicroProfileLogTickDifference(nTickStart, nTickEnd) * fToMs;
 }

-void MicroProfileCenter(int64_t nTickCenter)
+inline void MicroProfileCenter(int64_t nTickCenter)
 {
    MicroProfile& S = *MicroProfileGet();
    int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu;
@@ -739,7 +739,7 @@ void MicroProfileCenter(int64_t nTickCenter)
 #ifdef MICROPROFILE_DEBUG
 uint64_t* g_pMicroProfileDumpStart = 0;
 uint64_t* g_pMicroProfileDumpEnd = 0;
-void MicroProfileDebugDumpRange()
+inline void MicroProfileDebugDumpRange()
 {
    MicroProfile& S = *MicroProfileGet();
    if(g_pMicroProfileDumpStart != g_pMicroProfileDumpEnd)
@@ -777,7 +777,7 @@ void MicroProfileDebugDumpRange()

 #define MICROPROFILE_HOVER_DIST 0.5f

-void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY)
+inline void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY)
 {
    MicroProfile& S = *MicroProfileGet();
    int64_t nTickIn = -1;
@@ -841,7 +841,7 @@ void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId,
    }
 }

-void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame)
+inline void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame)
 {
    MicroProfile& S = *MicroProfileGet();
    MP_DEBUG_DUMP_RANGE();
@@ -1325,7 +1325,7 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
 }


-void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame)
+inline void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -1379,7 +1379,7 @@ void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uin
    }
    MicroProfileDrawBox(fSelectionStart, nBaseY, fSelectionEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat);
 }
-void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
+inline void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -1416,11 +1416,11 @@ void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight)
    MicroProfileDrawDetailedFrameHistory(nWidth, nHeight, nBaseY, nSelectedFrame);
 }

-void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen)
+inline void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen)
 {
    MicroProfileDrawText(nX - nStrLen * (MICROPROFILE_TEXT_WIDTH+1), nY, nColor, pStr, nStrLen);
 }
-void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
+inline void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)
 {
    if(pName)
    {
@@ -1432,7 +1432,7 @@ void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName)

 typedef void (*MicroProfileLoopGroupCallback)(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pData);

-void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData)
+inline void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData)
 {
    MicroProfile& S = *MicroProfileGet();
    nY += MICROPROFILE_TEXT_HEIGHT + 2;
@@ -1465,7 +1465,7 @@ void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName,
 }


-void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize)
+inline void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -1527,7 +1527,7 @@ void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float*

 #define SBUF_MAX 32

-void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+inline void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
 {
    const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT;
    const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
@@ -1547,7 +1547,7 @@ void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t n
 }


-uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL)
+inline uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL)
 {
    const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH);
    const uint32_t nWidth = MICROPROFILE_BAR_WIDTH;
@@ -1559,7 +1559,7 @@ uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const
    return nWidth + 5 + nTextWidth;

 }
-void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+inline void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
 {
    MicroProfile& S = *MicroProfileGet();
    char sBuffer[SBUF_MAX];
@@ -1567,7 +1567,7 @@ void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64
    MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, nLen);
 }

-uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName)
+inline uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName)
 {
    MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarCallCountCallback, 0);
    const uint32_t nTextWidth = 6 * MICROPROFILE_TEXT_WIDTH;
@@ -1581,7 +1581,7 @@ struct MicroProfileMetaAverageArgs
    float fRcpFrames;
 };

-void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+inline void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
 {
    MicroProfileMetaAverageArgs* pArgs = (MicroProfileMetaAverageArgs*)pExtra;
    uint64_t* pCounters = pArgs->pCounters;
@@ -1591,7 +1591,7 @@ void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint
    MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
 }

-uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
+inline uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
 {
    if(!pName)
        return 0;
@@ -1605,7 +1605,7 @@ uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCount
 }


-void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+inline void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
 {
    uint64_t* pCounters = (uint64_t*)pExtra;
    char sBuffer[SBUF_MAX];
@@ -1613,7 +1613,7 @@ void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64
    MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen);
 }

-uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
+inline uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight)
 {
    if(!pName)
        return 0;
@@ -1625,7 +1625,7 @@ uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounter
    return 5 + nTextWidth;
 }

-void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
+inline void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra)
 {
    MicroProfile& S = *MicroProfileGet();
    if (S.TimerInfo[nTimer].bGraph)
@@ -1640,7 +1640,7 @@ void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t
    }
 }

-uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth)
+inline uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth)
 {
    MicroProfileDrawLineVertical(nX-5, nY, nTotalHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]);
    MicroProfileLoopActiveGroupsDraw(nMaxWidth, nY, 0, MicroProfileDrawBarLegendCallback, 0);
@@ -1807,7 +1807,7 @@ void MicroProfileDumpTimers()
    }
 }

-void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight)
+inline void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -1951,7 +1951,7 @@ typedef const char* (*MicroProfileSubmenuCallback)(int, bool* bSelected);
 typedef void (*MicroProfileClickCallback)(int);


-const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
 {
    MicroProfile& S = *MicroProfileGet();
    switch(nIndex)
@@ -1979,7 +1979,7 @@ const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected)
    }
 }

-const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
 {
    MicroProfile& S = *MicroProfileGet();
    *bSelected = false;
@@ -2012,7 +2012,7 @@ const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected)
    }
 }

-const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)
 {
    MicroProfile& S = *MicroProfileGet();
    if(nIndex < sizeof(g_MicroProfileAggregatePresets)/sizeof(g_MicroProfileAggregatePresets[0]))
@@ -2032,7 +2032,7 @@ const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected)

 }

-const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
 {
    MicroProfile& S = *MicroProfileGet();
    *bSelected = 0 != (S.nBars & (1 << nIndex));
@@ -2054,7 +2054,7 @@ const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected)
    return 0;
 }

-const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
 {
    MicroProfile& S = *MicroProfileGet();
    if(nIndex >= MICROPROFILE_OPTION_SIZE) return 0;
@@ -2094,7 +2094,7 @@ const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected)
    return UI.Options[nIndex].Text;
 }

-const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
 {
    static char buf[128];
    *bSelected = false;
@@ -2118,7 +2118,7 @@ const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected)
    }
 }

-const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
 {
    if((uint32_t)-1 == UI.nCustomActive)
    {
@@ -2145,13 +2145,13 @@ const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected)
    }
 }

-const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected)
+inline const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected)
 {
    return 0;
 }


-void MicroProfileUIClickMode(int nIndex)
+inline void MicroProfileUIClickMode(int nIndex)
 {
    MicroProfile& S = *MicroProfileGet();
    switch(nIndex)
@@ -2176,7 +2176,7 @@ void MicroProfileUIClickMode(int nIndex)
    }
 }

-void MicroProfileUIClickGroups(int nIndex)
+inline void MicroProfileUIClickGroups(int nIndex)
 {
    MicroProfile& S = *MicroProfileGet();
    if(nIndex == 0)
@@ -2208,7 +2208,7 @@ void MicroProfileUIClickGroups(int nIndex)
    }
 }

-void MicroProfileUIClickAggregate(int nIndex)
+inline void MicroProfileUIClickAggregate(int nIndex)
 {
    MicroProfile& S = *MicroProfileGet();
    S.nAggregateFlip = g_MicroProfileAggregatePresets[nIndex];
@@ -2218,13 +2218,13 @@ void MicroProfileUIClickAggregate(int nIndex)
    }
 }

-void MicroProfileUIClickTimers(int nIndex)
+inline void MicroProfileUIClickTimers(int nIndex)
 {
    MicroProfile& S = *MicroProfileGet();
    S.nBars ^= (1 << nIndex);
 }

-void MicroProfileUIClickOptions(int nIndex)
+inline void MicroProfileUIClickOptions(int nIndex)
 {
    MicroProfile& S = *MicroProfileGet();
    switch(UI.Options[nIndex].nSubType)
@@ -2271,7 +2271,7 @@ void MicroProfileUIClickOptions(int nIndex)
    }
 }

-void MicroProfileUIClickPreset(int nIndex)
+inline void MicroProfileUIClickPreset(int nIndex)
 {
    int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]);
    int nIndexSave = nIndex - nNumPresets - 1;
@@ -2285,7 +2285,7 @@ void MicroProfileUIClickPreset(int nIndex)
    }
 }

-void MicroProfileUIClickCustom(int nIndex)
+inline void MicroProfileUIClickCustom(int nIndex)
 {
    if(nIndex == 0)
    {
@@ -2298,13 +2298,13 @@ void MicroProfileUIClickCustom(int nIndex)

 }

-void MicroProfileUIClickEmpty(int nIndex)
+inline void MicroProfileUIClickEmpty(int nIndex)
 {

 }


-void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
+inline void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
 {
    MicroProfile& S = *MicroProfileGet();

@@ -2489,7 +2489,7 @@ void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight)
 }


-void MicroProfileMoveGraph()
+inline void MicroProfileMoveGraph()
 {

    int nZoom = UI.nMouseWheelDelta;
@@ -2536,7 +2536,7 @@ void MicroProfileMoveGraph()
        UI.nOffsetY = 0;
 }

-void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
+inline void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
 {
    if((uint32_t)-1 != UI.nCustomActive)
    {
@@ -2633,7 +2633,7 @@ void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight)
        }
    }
 }
-void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
+inline void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
 {
    MICROPROFILE_SCOPE(g_MicroProfileDraw);
    MicroProfile& S = *MicroProfileGet();
@@ -3226,7 +3226,7 @@ void MicroProfileLoadPreset(const char* pSuffix)
    }
 }

-uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
+inline uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
 {
    for(uint32_t i = 0; i < UI.nCustomCount; ++i)
    {
@@ -3238,7 +3238,7 @@ uint32_t MicroProfileCustomGroupFind(const char* pCustomName)
    return (uint32_t)-1;
 }

-uint32_t MicroProfileCustomGroup(const char* pCustomName)
+inline uint32_t MicroProfileCustomGroup(const char* pCustomName)
 {
    for(uint32_t i = 0; i < UI.nCustomCount; ++i)
    {
@@ -3271,7 +3271,7 @@ void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint3
    UI.Custom[nIndex].nAggregateFlip = nAggregateFlip;
 }

-void MicroProfileCustomGroupEnable(uint32_t nIndex)
+inline void MicroProfileCustomGroupEnable(uint32_t nIndex)
 {
    if(nIndex < UI.nCustomCount)
    {
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,8 +54,10 @@ else()
    add_compile_options(
        -Wall
        -Werror=implicit-fallthrough
+        -Werror=missing-declarations
        -Werror=reorder
        -Wextra
+        -Wmissing-declarations
        -Wno-attributes
        -Wno-unused-parameter
    )
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -180,7 +180,7 @@ public:
    }

    constexpr void Assign(const T& value) {
-        storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
+        storage = static_cast<StorageType>((storage & ~mask) | FormatValue(value));
    }

    constexpr T Value() const {
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,10 +185,9 @@ void ARM_Dynarmic_64::Step() {

 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
                                 std::size_t core_index)
-    : ARM_Interface{system},
-      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system},
-      core_index{core_index}, exclusive_monitor{
-                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
+      inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
+      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

 ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;

--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -62,8 +62,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return false;
 }

-ARM_Unicorn::ARM_Unicorn(System& system) : ARM_Interface{system} {
-    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));
+ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} {
+    const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
+    CHECKED(uc_open(arch, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
    CHECKED(uc_reg_write(uc, UC_ARM64_REG_CPACR_EL1, &fpv));
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -15,7 +15,12 @@ class System;

 class ARM_Unicorn final : public ARM_Interface {
 public:
-    explicit ARM_Unicorn(System& system);
+    enum class Arch {
+        AArch32, // 32-bit ARM
+        AArch64, // 64-bit ARM
+    };
+
+    explicit ARM_Unicorn(System& system, Arch architecture);
    ~ARM_Unicorn() override;

    void SetPC(u64 pc) override;
--- a/src/core/crypto/partition_data_manager.cpp
+++ b/src/core/crypto/partition_data_manager.cpp
@@ -202,8 +202,8 @@ static std::array<Key128, 0x20> FindEncryptedMasterKeyFromHex(const std::vector<
    return out;
 }

-FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
-                                            const std::string& name) {
+static FileSys::VirtualFile FindFileInDirWithNames(const FileSys::VirtualDir& dir,
+                                                   const std::string& name) {
    const auto upper = Common::ToUpper(name);

    for (const auto& fname : {name, name + ".bin", upper, upper + ".BIN"}) {
@@ -345,8 +345,7 @@ FileSys::VirtualFile PartitionDataManager::GetPackage2Raw(Package2Type type) con
    return package2.at(static_cast<size_t>(type));
 }

-bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) {
-
+static bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header) {
    const std::vector<u8> iv(header.header_ctr.begin(), header.header_ctr.end());
    Package2Header temp = header;
    AESCipher<Key128> cipher(key, Mode::CTR);
--- a/src/core/hle/kernel/memory/memory_block.h
+++ b/src/core/hle/kernel/memory/memory_block.h
@@ -17,7 +17,7 @@ namespace Kernel::Memory {

 enum class MemoryState : u32 {
    None = 0,
-    Mask = 0xFFFFFFFF, // TODO(bunnei): This should probable be 0xFF
+    Mask = 0xFF,
    All = ~None,

    FlagCanReprotect = (1 << 8),
@@ -253,6 +253,23 @@ public:
        };
    }

+    void ShareToDevice(MemoryPermission /*new_perm*/) {
+        ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared ||
+               device_use_count == 0);
+        attribute |= MemoryAttribute::DeviceShared;
+        const u16 new_use_count{++device_use_count};
+        ASSERT(new_use_count > 0);
+    }
+
+    void UnshareToDevice(MemoryPermission /*new_perm*/) {
+        ASSERT((attribute & MemoryAttribute::DeviceShared) == MemoryAttribute::DeviceShared);
+        const u16 prev_use_count{device_use_count--};
+        ASSERT(prev_use_count > 0);
+        if (prev_use_count == 1) {
+            attribute &= ~MemoryAttribute::DeviceShared;
+        }
+    }
+
 private:
    constexpr bool HasProperties(MemoryState s, MemoryPermission p, MemoryAttribute a) const {
        constexpr MemoryAttribute AttributeIgnoreMask{MemoryAttribute::DontCareMask |
@@ -287,9 +304,9 @@ private:
        state = new_state;
        perm = new_perm;

-        // TODO(bunnei): Is this right?
        attribute = static_cast<MemoryAttribute>(
-            new_attribute /*| (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared))*/);
+            new_attribute |
+            (attribute & (MemoryAttribute::IpcLocked | MemoryAttribute::DeviceShared)));
    }

    constexpr MemoryBlock Split(VAddr split_addr) {
--- a/src/core/hle/kernel/memory/memory_block_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_block_manager.cpp
@@ -143,6 +143,42 @@ void MemoryBlockManager::Update(VAddr addr, std::size_t num_pages, MemoryState s
    }
 }

+void MemoryBlockManager::UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func,
+                                    MemoryPermission perm) {
+    const std::size_t prev_count{memory_block_tree.size()};
+    const VAddr end_addr{addr + num_pages * PageSize};
+    iterator node{memory_block_tree.begin()};
+
+    while (node != memory_block_tree.end()) {
+        MemoryBlock* block{&(*node)};
+        iterator next_node{std::next(node)};
+        const VAddr cur_addr{block->GetAddress()};
+        const VAddr cur_end_addr{block->GetNumPages() * PageSize + cur_addr};
+
+        if (addr < cur_end_addr && cur_addr < end_addr) {
+            iterator new_node{node};
+
+            if (addr > cur_addr) {
+                memory_block_tree.insert(node, block->Split(addr));
+            }
+
+            if (end_addr < cur_end_addr) {
+                new_node = memory_block_tree.insert(node, block->Split(end_addr));
+            }
+
+            lock_func(new_node, perm);
+
+            MergeAdjacent(new_node, next_node);
+        }
+
+        if (cur_end_addr - 1 >= end_addr - 1) {
+            break;
+        }
+
+        node = next_node;
+    }
+}
+
 void MemoryBlockManager::IterateForRange(VAddr start, VAddr end, IterateFunc&& func) {
    const_iterator it{FindIterator(start)};
    MemoryInfo info{};
--- a/src/core/hle/kernel/memory/memory_block_manager.h
+++ b/src/core/hle/kernel/memory/memory_block_manager.h
@@ -45,6 +45,9 @@ public:
                MemoryPermission perm = MemoryPermission::None,
                MemoryAttribute attribute = MemoryAttribute::None);

+    using LockFunc = std::function<void(iterator, MemoryPermission)>;
+    void UpdateLock(VAddr addr, std::size_t num_pages, LockFunc&& lock_func, MemoryPermission perm);
+
    using IterateFunc = std::function<void(const MemoryInfo&)>;
    void IterateForRange(VAddr start, VAddr end, IterateFunc&& func);

--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -840,6 +840,50 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
    return MakeResult<VAddr>(addr);
 }

+ResultCode PageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) {
+    std::lock_guard lock{page_table_lock};
+
+    MemoryPermission perm{};
+    if (const ResultCode result{CheckMemoryState(
+            nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
+            MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
+            MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
+            MemoryAttribute::DeviceSharedAndUncached)};
+        result.IsError()) {
+        return result;
+    }
+
+    block_manager->UpdateLock(addr, size / PageSize,
+                              [](MemoryBlockManager::iterator block, MemoryPermission perm) {
+                                  block->ShareToDevice(perm);
+                              },
+                              perm);
+
+    return RESULT_SUCCESS;
+}
+
+ResultCode PageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) {
+    std::lock_guard lock{page_table_lock};
+
+    MemoryPermission perm{};
+    if (const ResultCode result{CheckMemoryState(
+            nullptr, &perm, nullptr, addr, size, MemoryState::FlagCanChangeAttribute,
+            MemoryState::FlagCanChangeAttribute, MemoryPermission::None, MemoryPermission::None,
+            MemoryAttribute::LockedAndIpcLocked, MemoryAttribute::None,
+            MemoryAttribute::DeviceSharedAndUncached)};
+        result.IsError()) {
+        return result;
+    }
+
+    block_manager->UpdateLock(addr, size / PageSize,
+                              [](MemoryBlockManager::iterator block, MemoryPermission perm) {
+                                  block->UnshareToDevice(perm);
+                              },
+                              perm);
+
+    return RESULT_SUCCESS;
+}
+
 ResultCode PageTable::InitializeMemoryLayout(VAddr start, VAddr end) {
    block_manager = std::make_unique<MemoryBlockManager>(start, end);

--- a/src/core/hle/kernel/memory/page_table.h
+++ b/src/core/hle/kernel/memory/page_table.h
@@ -53,6 +53,8 @@ public:
                                          bool is_map_only, VAddr region_start,
                                          std::size_t region_num_pages, MemoryState state,
                                          MemoryPermission perm, PAddr map_addr = 0);
+    ResultCode LockForDeviceAddressSpace(VAddr addr, std::size_t size);
+    ResultCode UnlockForDeviceAddressSpace(VAddr addr, std::size_t size);

    Common::PageTable& PageTableImpl() {
        return page_table_impl;
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -27,7 +27,9 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
        std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);

 #else
-    arm_interface = std::make_shared<Core::ARM_Unicorn>(system);
+    using Core::ARM_Unicorn;
+    arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32);
+    arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64);
    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif

--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -36,22 +36,22 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
 }

 ResultCode SharedMemory::Map(Process& target_process, VAddr address, std::size_t size,
-                             Memory::MemoryPermission permission) {
+                             Memory::MemoryPermission permissions) {
    const u64 page_count{(size + Memory::PageSize - 1) / Memory::PageSize};

    if (page_list.GetNumPages() != page_count) {
        UNIMPLEMENTED_MSG("Page count does not match");
    }

-    Memory::MemoryPermission expected =
+    const Memory::MemoryPermission expected =
        &target_process == owner_process ? owner_permission : user_permission;

-    if (permission != expected) {
+    if (permissions != expected) {
        UNIMPLEMENTED_MSG("Permission does not match");
    }

    return target_process.PageTable().MapPages(address, page_list, Memory::MemoryState::Shared,
-                                               permission);
+                                               permissions);
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -51,7 +51,7 @@ public:
     * @param permissions Memory block map permissions (specified by SVC field)
     */
    ResultCode Map(Process& target_process, VAddr address, std::size_t size,
-                   Memory::MemoryPermission permission);
+                   Memory::MemoryPermission permissions);

    /**
     * Gets a pointer to the shared memory block
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -55,9 +55,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
    return address + size > address;
 }

-// 8 GiB
-constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
-
 // Helper function that performs the common sanity checks for svcMapMemory
 // and svcUnmapMemory. This is doable, as both functions perform their sanitizing
 // in the same order.
@@ -776,7 +773,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
            break;
        }

-        LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
+        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
        return ERR_INVALID_ENUM_VALUE;
    }

@@ -869,7 +866,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
    }

    default:
-        LOG_WARNING(Kernel_SVC, "(STUBBED) Unimplemented svcGetInfo id=0x{:016X}", info_id);
+        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
        return ERR_INVALID_ENUM_VALUE;
    }
 }
@@ -1229,6 +1226,142 @@ static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address,
    return QueryMemory(system, memory_info_address, page_info_address, query_address);
 }

+static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
+                                       u64 src_address, u64 size) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. process_handle=0x{:08X}, dst_address=0x{:016X}, "
+              "src_address=0x{:016X}, size=0x{:016X}",
+              process_handle, dst_address, src_address, size);
+
+    if (!Common::Is4KBAligned(src_address)) {
+        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
+                  src_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (!Common::Is4KBAligned(dst_address)) {
+        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
+                  dst_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsValidAddressRange(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range overflows the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto process = handle_table.Get<Process>(process_handle);
+    if (!process) {
+        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
+                  process_handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    auto& page_table = process->PageTable();
+    if (!page_table.IsInsideAddressSpace(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range is not within the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!page_table.IsInsideASLRRegion(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return page_table.MapProcessCodeMemory(dst_address, src_address, size);
+}
+
+static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
+                                         u64 dst_address, u64 src_address, u64 size) {
+    LOG_DEBUG(Kernel_SVC,
+              "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
+              "size=0x{:016X}",
+              process_handle, dst_address, src_address, size);
+
+    if (!Common::Is4KBAligned(dst_address)) {
+        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
+                  dst_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (!Common::Is4KBAligned(src_address)) {
+        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
+                  src_address);
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || Common::Is4KBAligned(size)) {
+        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsValidAddressRange(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range overflows the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    auto process = handle_table.Get<Process>(process_handle);
+    if (!process) {
+        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
+                  process_handle);
+        return ERR_INVALID_HANDLE;
+    }
+
+    auto& page_table = process->PageTable();
+    if (!page_table.IsInsideAddressSpace(src_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Source address range is not within the address space (src_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  src_address, size);
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!page_table.IsInsideASLRRegion(dst_address, size)) {
+        LOG_ERROR(Kernel_SVC,
+                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
+                  "size=0x{:016X}).",
+                  dst_address, size);
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
+}
+
 /// Exits the current process
 static void ExitProcess(Core::System& system) {
    auto* current_process = system.Kernel().CurrentProcess();
@@ -2256,8 +2389,8 @@ static const FunctionDef SVC_Table_64[] = {
    {0x74, nullptr, "MapProcessMemory"},
    {0x75, nullptr, "UnmapProcessMemory"},
    {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"},
-    {0x77, nullptr, "MapProcessCodeMemory"},
-    {0x78, nullptr, "UnmapProcessCodeMemory"},
+    {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"},
+    {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"},
    {0x79, nullptr, "CreateProcess"},
    {0x7A, nullptr, "StartProcess"},
    {0x7B, nullptr, "TerminateProcess"},
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -33,8 +33,10 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {111, nullptr, "ClearSaveDataThumbnail"},
        {112, nullptr, "LoadSaveDataThumbnail"},
        {113, nullptr, "GetSaveDataThumbnailExistence"},
+        {120, nullptr, "ListOpenUsersInApplication"},
        {130, nullptr, "ActivateOpenContextRetention"},
        {140, nullptr, "ListQualifiedUsers"},
+        {150, nullptr, "AuthenticateApplicationAsync"},
        {190, nullptr, "GetUserLastOpenedApplication"},
        {191, nullptr, "ActivateOpenContextHolder"},
        {200, nullptr, "BeginUserRegistration"},
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -35,6 +35,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {113, nullptr, "GetSaveDataThumbnailExistence"},
        {130, nullptr, "ActivateOpenContextRetention"},
        {140, nullptr, "ListQualifiedUsers"},
+        {150, nullptr, "AuthenticateApplicationAsync"},
        {190, nullptr, "GetUserLastOpenedApplication"},
        {191, nullptr, "ActivateOpenContextHolder"},
        {997, nullptr, "DebugInvalidateTokenCacheForUser"},
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,9 +43,9 @@

 namespace Service::AM {

-constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 0x2};
-constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 0x3};
-constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 0x1F7};
+constexpr ResultCode ERR_NO_DATA_IN_CHANNEL{ErrorModule::AM, 2};
+constexpr ResultCode ERR_NO_MESSAGES{ErrorModule::AM, 3};
+constexpr ResultCode ERR_SIZE_OUT_OF_BOUNDS{ErrorModule::AM, 503};

 enum class LaunchParameterKind : u32 {
    ApplicationSpecific = 1,
@@ -235,6 +235,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
        {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
        {40, nullptr, "GetAppletResourceUsageInfo"},
        {100, nullptr, "SetCpuBoostModeForApplet"},
+        {101, nullptr, "CancelCpuBoostModeForApplet"},
        {110, nullptr, "PushToAppletBoundChannelForDebug"},
        {111, nullptr, "TryPopFromAppletBoundChannelForDebug"},
        {120, nullptr, "AlarmSettingNotificationEnableAppEventReserve"},
@@ -277,6 +278,8 @@ ISelfController::ISelfController(Core::System& system,
        {41, nullptr, "IsSystemBufferSharingEnabled"},
        {42, nullptr, "GetSystemSharedLayerHandle"},
        {43, nullptr, "GetSystemSharedBufferHandle"},
+        {44, nullptr, "CreateManagedDisplaySeparableLayer"},
+        {45, nullptr, "SetManagedDisplayLayerSeparationMode"},
        {50, &ISelfController::SetHandlesRequestToDisplay, "SetHandlesRequestToDisplay"},
        {51, nullptr, "ApproveToDisplay"},
        {60, nullptr, "OverrideAutoSleepTimeAndDimmingTime"},
@@ -623,11 +626,15 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,
        {64, nullptr, "SetTvPowerStateMatchingMode"},
        {65, nullptr, "GetApplicationIdByContentActionName"},
        {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"},
+        {67, nullptr, "CancelCpuBoostMode"},
        {80, nullptr, "PerformSystemButtonPressingIfInFocus"},
        {90, nullptr, "SetPerformanceConfigurationChangedNotification"},
        {91, nullptr, "GetCurrentPerformanceConfiguration"},
+        {100, nullptr, "SetHandlingHomeButtonShortPressedEnabled"},
        {200, nullptr, "GetOperationModeSystemInfo"},
        {300, nullptr, "GetSettingsPlatformRegion"},
+        {400, nullptr, "ActivateMigrationService"},
+        {401, nullptr, "DeactivateMigrationService"},
    };
    // clang-format on

@@ -835,6 +842,7 @@ public:
            {25, nullptr, "Terminate"},
            {30, &ILibraryAppletAccessor::GetResult, "GetResult"},
            {50, nullptr, "SetOutOfFocusApplicationSuspendingEnabled"},
+            {60, nullptr, "PresetLibraryAppletGpuTimeSliceZero"},
            {100, &ILibraryAppletAccessor::PushInData, "PushInData"},
            {101, &ILibraryAppletAccessor::PopOutData, "PopOutData"},
            {102, nullptr, "PushExtraStorage"},
@@ -1139,6 +1147,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
        {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"},
        {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"},
        {33, &IApplicationFunctions::EndBlockingHomeButton, "EndBlockingHomeButton"},
+        {34, nullptr, "SelectApplicationLicense"},
        {40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"},
        {50, &IApplicationFunctions::GetPseudoDeviceId, "GetPseudoDeviceId"},
        {60, nullptr, "SetMediaPlaybackStateForApplication"},
@@ -1148,6 +1157,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
        {68, nullptr, "RequestFlushGamePlayingMovieForDebug"},
        {70, nullptr, "RequestToShutdown"},
        {71, nullptr, "RequestToReboot"},
+        {72, nullptr, "RequestToSleep"},
        {80, nullptr, "ExitAndRequestToShowThanksMessage"},
        {90, &IApplicationFunctions::EnableApplicationCrashReport, "EnableApplicationCrashReport"},
        {100, &IApplicationFunctions::InitializeApplicationCopyrightFrameBuffer, "InitializeApplicationCopyrightFrameBuffer"},
--- a/src/core/hle/service/audio/audctl.cpp
+++ b/src/core/hle/service/audio/audctl.cpp
@@ -39,6 +39,8 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} {
        {25, nullptr, "GetAudioVolumeDataForPlayReport"},
        {26, nullptr, "UpdateHeadphoneSettings"},
        {27, nullptr, "SetVolumeMappingTableForDev"},
+        {28, nullptr, "GetAudioOutputChannelCountForPlayReport"},
+        {29, nullptr, "BindAudioOutputChannelCountUpdateEventForPlayReport"},
    };
    // clang-format on

--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -18,6 +18,7 @@
 #include "core/hle/service/bcat/backend/boxcat.h"
 #include "core/settings.h"

+namespace Service::BCAT {
 namespace {

 // Prevents conflicts with windows macro called CreateFile
@@ -30,10 +31,6 @@ bool VfsDeleteFileWrap(FileSys::VirtualDir dir, std::string_view name) {
    return dir->DeleteFile(name);
 }

-} // Anonymous namespace
-
-namespace Service::BCAT {
-
 constexpr ResultCode ERROR_GENERAL_BCAT_FAILURE{ErrorModule::BCAT, 1};

 constexpr char BOXCAT_HOSTNAME[] = "api.yuzu-emu.org";
@@ -90,8 +87,6 @@ constexpr u32 PORT = 443;
 constexpr u32 TIMEOUT_SECONDS = 30;
 [[maybe_unused]] constexpr u64 VFS_COPY_BLOCK_SIZE = 1ULL << 24; // 4MB

-namespace {
-
 std::string GetBINFilePath(u64 title_id) {
    return fmt::format("{}bcat/{:016X}/launchparam.bin",
                       FileUtil::GetUserPath(FileUtil::UserPath::CacheDir), title_id);
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -141,6 +141,7 @@ public:
            {20301, nullptr, "RequestSuspendDeliveryTask"},
            {20400, nullptr, "RegisterSystemApplicationDeliveryTask"},
            {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
+            {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
            {30100, &IBcatService::SetPassphrase, "SetPassphrase"},
            {30200, nullptr, "RegisterBackgroundDeliveryTask"},
            {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -4,6 +4,7 @@

 #include "core/crypto/key_manager.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/service/es/es.h"
 #include "core/hle/service/service.h"

 namespace Service::ES {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -316,8 +316,8 @@ public:
            {8, &IFileSystem::OpenFile, "OpenFile"},
            {9, &IFileSystem::OpenDirectory, "OpenDirectory"},
            {10, &IFileSystem::Commit, "Commit"},
-            {11, nullptr, "GetFreeSpaceSize"},
-            {12, nullptr, "GetTotalSpaceSize"},
+            {11, &IFileSystem::GetFreeSpaceSize, "GetFreeSpaceSize"},
+            {12, &IFileSystem::GetTotalSpaceSize, "GetTotalSpaceSize"},
            {13, &IFileSystem::CleanDirectoryRecursively, "CleanDirectoryRecursively"},
            {14, nullptr, "GetFileTimeStampRaw"},
            {15, nullptr, "QueryEntry"},
@@ -697,12 +697,14 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter)
        {68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"},
        {69, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataAttribute"},
        {70, nullptr, "WriteSaveDataFileSystemExtraDataBySaveDataAttribute"},
+        {71, nullptr, "ReadSaveDataFileSystemExtraDataWithMaskBySaveDataAttribute"},
        {80, nullptr, "OpenSaveDataMetaFile"},
        {81, nullptr, "OpenSaveDataTransferManager"},
        {82, nullptr, "OpenSaveDataTransferManagerVersion2"},
        {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"},
        {84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"},
        {85, nullptr, "OpenSaveDataTransferManagerForSaveDataRepair"},
+        {86, nullptr, "OpenSaveDataMover"},
        {100, nullptr, "OpenImageDirectoryFileSystem"},
        {110, nullptr, "OpenContentStorageFileSystem"},
        {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
@@ -762,9 +764,11 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter)
        {1011, &FSP_SRV::GetAccessLogVersionInfo, "GetAccessLogVersionInfo"},
        {1012, nullptr, "GetFsStackUsage"},
        {1013, nullptr, "UnsetSaveDataRootPath"},
+        {1014, nullptr, "OutputMultiProgramTagAccessLog"},
        {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
        {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
        {1200, nullptr, "OpenMultiCommitManager"},
+        {1300, nullptr, "OpenBisWiper"},
    };
    // clang-format on
    RegisterHandlers(functions);
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -96,6 +96,7 @@ public:
            {30830, nullptr, "ClearPlayLog"},
            {30900, nullptr, "SendFriendInvitation"},
            {30910, nullptr, "ReadFriendInvitation"},
+            {30911, nullptr, "ReadAllFriendInvitations"},
            {49900, nullptr, "DeleteNetworkServiceAccountCache"},
        };
        // clang-format on
--- a/src/core/hle/service/glue/errors.h
+++ b/src/core/hle/service/glue/errors.h
@@ -8,9 +8,9 @@

 namespace Service::Glue {

-constexpr ResultCode ERR_INVALID_RESOURCE{ErrorModule::ARP, 0x1E};
-constexpr ResultCode ERR_INVALID_PROCESS_ID{ErrorModule::ARP, 0x1F};
-constexpr ResultCode ERR_INVALID_ACCESS{ErrorModule::ARP, 0x2A};
-constexpr ResultCode ERR_NOT_REGISTERED{ErrorModule::ARP, 0x66};
+constexpr ResultCode ERR_INVALID_RESOURCE{ErrorModule::ARP, 30};
+constexpr ResultCode ERR_INVALID_PROCESS_ID{ErrorModule::ARP, 31};
+constexpr ResultCode ERR_INVALID_ACCESS{ErrorModule::ARP, 42};
+constexpr ResultCode ERR_NOT_REGISTERED{ErrorModule::ARP, 102};

 } // namespace Service::Glue
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -233,7 +233,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {302, nullptr, "StopConsoleSixAxisSensor"},
        {303, nullptr, "ActivateSevenSixAxisSensor"},
        {304, nullptr, "StartSevenSixAxisSensor"},
-        {305, nullptr, "StopSevenSixAxisSensor"},
+        {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
        {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
        {307, nullptr, "FinalizeSevenSixAxisSensor"},
        {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
@@ -282,6 +282,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
        {1001, nullptr, "GetNpadCommunicationMode"},
        {1002, nullptr, "SetTouchScreenConfiguration"},
        {1003, nullptr, "IsFirmwareUpdateNeededForNotification"},
+        {2000, nullptr, "ActivateDigitizer"},
    };
    // clang-format on

@@ -852,6 +853,17 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

+void Hid::StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+                applet_resource_user_id);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
 void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_HID, "(STUBBED) called");

@@ -870,6 +882,7 @@ public:
            {10, nullptr, "DeactivateTouchScreen"},
            {11, nullptr, "SetTouchScreenAutoPilotState"},
            {12, nullptr, "UnsetTouchScreenAutoPilotState"},
+            {13, nullptr, "GetTouchScreenConfiguration"},
            {20, nullptr, "DeactivateMouse"},
            {21, nullptr, "SetMouseAutoPilotState"},
            {22, nullptr, "UnsetMouseAutoPilotState"},
@@ -879,7 +892,9 @@ public:
            {50, nullptr, "DeactivateXpad"},
            {51, nullptr, "SetXpadAutoPilotState"},
            {52, nullptr, "UnsetXpadAutoPilotState"},
-            {60, nullptr, "DeactivateJoyXpad"},
+            {60, nullptr, "ClearNpadSystemCommonPolicy"},
+            {61, nullptr, "DeactivateNpad"},
+            {62, nullptr, "ForceDisconnectNpad"},
            {91, nullptr, "DeactivateGesture"},
            {110, nullptr, "DeactivateHomeButton"},
            {111, nullptr, "SetHomeButtonAutoPilotState"},
@@ -899,6 +914,15 @@ public:
            {141, nullptr, "GetConsoleSixAxisSensorSamplingFrequency"},
            {142, nullptr, "DeactivateSevenSixAxisSensor"},
            {143, nullptr, "GetConsoleSixAxisSensorCountStates"},
+            {144, nullptr, "GetAccelerometerFsr"},
+            {145, nullptr, "SetAccelerometerFsr"},
+            {146, nullptr, "GetAccelerometerOdr"},
+            {147, nullptr, "SetAccelerometerOdr"},
+            {148, nullptr, "GetGyroscopeFsr"},
+            {149, nullptr, "SetGyroscopeFsr"},
+            {150, nullptr, "GetGyroscopeOdr"},
+            {151, nullptr, "SetGyroscopeOdr"},
+            {152, nullptr, "GetWhoAmI"},
            {201, nullptr, "ActivateFirmwareUpdate"},
            {202, nullptr, "DeactivateFirmwareUpdate"},
            {203, nullptr, "StartFirmwareUpdate"},
@@ -927,6 +951,17 @@ public:
            {233, nullptr, "ClearPairingInfo"},
            {234, nullptr, "GetUniquePadDeviceTypeSetInternal"},
            {235, nullptr, "EnableAnalogStickPower"},
+            {236, nullptr, "RequestKuinaUartClockCal"},
+            {237, nullptr, "GetKuinaUartClockCal"},
+            {238, nullptr, "SetKuinaUartClockTrim"},
+            {239, nullptr, "KuinaLoopbackTest"},
+            {240, nullptr, "RequestBatteryVoltage"},
+            {241, nullptr, "GetBatteryVoltage"},
+            {242, nullptr, "GetUniquePadPowerInfo"},
+            {243, nullptr, "RebootUniquePad"},
+            {244, nullptr, "RequestKuinaFirmwareVersion"},
+            {245, nullptr, "GetKuinaFirmwareVersion"},
+            {246, nullptr, "GetVidPid"},
            {301, nullptr, "GetAbstractedPadHandles"},
            {302, nullptr, "GetAbstractedPadState"},
            {303, nullptr, "GetAbstractedPadsState"},
@@ -945,6 +980,17 @@ public:
            {350, nullptr, "AddRegisteredDevice"},
            {400, nullptr, "DisableExternalMcuOnNxDevice"},
            {401, nullptr, "DisableRailDeviceFiltering"},
+            {402, nullptr, "EnableWiredPairing"},
+            {403, nullptr, "EnableShipmentModeAutoClear"},
+            {500, nullptr, "SetFactoryInt"},
+            {501, nullptr, "IsFactoryBootEnabled"},
+            {550, nullptr, "SetAnalogStickModelDataTemporarily"},
+            {551, nullptr, "GetAnalogStickModelData"},
+            {552, nullptr, "ResetAnalogStickModelData"},
+            {600, nullptr, "ConvertPadState"},
+            {2000, nullptr, "DeactivateDigitizer"},
+            {2001, nullptr, "SetDigitizerAutoPilotState"},
+            {2002, nullptr, "UnsetDigitizerAutoPilotState"},
        };
        // clang-format on

--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -128,6 +128,7 @@ private:
    void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
    void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
+    void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
    void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);

    std::shared_ptr<IAppletResource> applet_resource;
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -116,6 +116,7 @@ public:
            {1, nullptr, "GetProgramInfo"},
            {2, nullptr, "RegisterTitle"},
            {3, nullptr, "UnregisterTitle"},
+            {4, nullptr, "SetEnabledProgramVerification"},
        };
        // clang-format on

--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -122,6 +122,7 @@ public:
            {11, nullptr, "ActivateContentMetaDatabase"},
            {12, nullptr, "InactivateContentMetaDatabase"},
            {13, nullptr, "InvalidateRightsIdCache"},
+            {14, nullptr, "GetMemoryReport"},
        };
        // clang-format on

--- a/src/core/hle/service/npns/npns.cpp
+++ b/src/core/hle/service/npns/npns.cpp
@@ -48,6 +48,8 @@ public:
            {151, nullptr, "GetStateWithHandover"},
            {152, nullptr, "GetStateChangeEventWithHandover"},
            {153, nullptr, "GetDropEventWithHandover"},
+            {161, nullptr, "GetRequestChangeStateCancelEvent"},
+            {162, nullptr, "RequestChangeStateForceTimedWithCancelEvent"},
            {201, nullptr, "RequestChangeStateForceTimed"},
            {202, nullptr, "RequestChangeStateForceAsync"},
        };
--- a/src/core/hle/service/ns/ns.cpp
+++ b/src/core/hle/service/ns/ns.cpp
@@ -110,6 +110,10 @@ IApplicationManagerInterface::IApplicationManagerInterface()
        {100, nullptr, "ResetToFactorySettings"},
        {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
        {102, nullptr, "ResetToFactorySettingsForRefurbishment"},
+        {103, nullptr, "ResetToFactorySettingsWithPlatformRegion"},
+        {104, nullptr, "ResetToFactorySettingsWithPlatformRegionAuthentication"},
+        {105, nullptr, "RequestResetToFactorySettingsSecurely"},
+        {106, nullptr, "RequestResetToFactorySettingsWithPlatformRegionAuthenticationSecurely"},
        {200, nullptr, "CalculateUserSaveDataStatistics"},
        {201, nullptr, "DeleteUserSaveDataAll"},
        {210, nullptr, "DeleteUserSystemSaveData"},
@@ -191,6 +195,9 @@ IApplicationManagerInterface::IApplicationManagerInterface()
        {1307, nullptr, "TryDeleteRunningApplicationContentEntities"},
        {1308, nullptr, "DeleteApplicationCompletelyForDebug"},
        {1309, nullptr, "CleanupUnavailableAddOnContents"},
+        {1310, nullptr, "RequestMoveApplicationEntity"},
+        {1311, nullptr, "EstimateSizeToMove"},
+        {1312, nullptr, "HasMovableEntity"},
        {1400, nullptr, "PrepareShutdown"},
        {1500, nullptr, "FormatSdCard"},
        {1501, nullptr, "NeedsSystemUpdateToFormatSdCard"},
@@ -241,7 +248,7 @@ IApplicationManagerInterface::IApplicationManagerInterface()
        {2153, nullptr, "DeactivateRightsEnvironment"},
        {2154, nullptr, "ForceActivateRightsContextForExit"},
        {2155, nullptr, "UpdateRightsEnvironmentStatus"},
-        {2156, nullptr, "CreateRightsEnvironmentForPreomia"},
+        {2156, nullptr, "CreateRightsEnvironmentForMicroApplication"},
        {2160, nullptr, "AddTargetApplicationToRightsEnvironment"},
        {2161, nullptr, "SetUsersToRightsEnvironment"},
        {2170, nullptr, "GetRightsEnvironmentStatus"},
@@ -258,6 +265,7 @@ IApplicationManagerInterface::IApplicationManagerInterface()
        {2350, nullptr, "PerformAutoUpdateByApplicationId"},
        {2351, nullptr, "RequestNoDownloadRightsErrorResolution"},
        {2352, nullptr, "RequestResolveNoDownloadRightsError"},
+        {2353, nullptr, "GetApplicationDownloadTaskInfo"},
        {2400, nullptr, "GetPromotionInfo"},
        {2401, nullptr, "CountPromotionInfo"},
        {2402, nullptr, "ListPromotionInfo"},
@@ -266,9 +274,12 @@ IApplicationManagerInterface::IApplicationManagerInterface()
        {2500, nullptr, "ConfirmAvailableTime"},
        {2510, nullptr, "CreateApplicationResource"},
        {2511, nullptr, "GetApplicationResource"},
-        {2513, nullptr, "LaunchPreomia"},
+        {2513, nullptr, "LaunchMicroApplication"},
        {2514, nullptr, "ClearTaskOfAsyncTaskManager"},
+        {2515, nullptr, "CleanupAllPlaceHolderAndFragmentsIfNoTask"},
+        {2516, nullptr, "EnsureApplicationCertificate"},
        {2800, nullptr, "GetApplicationIdOfPreomia"},
+        {9999, nullptr, "GetApplicationCertificate"},
    };
    // clang-format on

@@ -360,10 +371,15 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage(
    // Convert to application language, get priority list
    const auto application_language = ConvertToApplicationLanguage(language_code);
    if (application_language == std::nullopt) {
+        LOG_ERROR(Service_NS, "Could not convert application language! language_code={}",
+                  language_code);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }
    const auto priority_list = GetApplicationLanguagePriorityList(*application_language);
    if (!priority_list) {
+        LOG_ERROR(Service_NS,
+                  "Could not find application language priorities! application_language={}",
+                  *application_language);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }

@@ -375,6 +391,8 @@ ResultVal<u8> IApplicationManagerInterface::GetApplicationDesiredLanguage(
        }
    }

+    LOG_ERROR(Service_NS, "Could not find a valid language! supported_languages={:08X}",
+              supported_languages);
    return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
 }

@@ -399,6 +417,7 @@ ResultVal<u64> IApplicationManagerInterface::ConvertApplicationLanguageToLanguag
    const auto language_code =
        ConvertToLanguageCode(static_cast<ApplicationLanguage>(application_language));
    if (language_code == std::nullopt) {
+        LOG_ERROR(Service_NS, "Language not found! application_language={}", application_language);
        return ERR_APPLICATION_LANGUAGE_NOT_FOUND;
    }

@@ -505,6 +524,10 @@ IFactoryResetInterface::IFactoryResetInterface::IFactoryResetInterface()
            {100, nullptr, "ResetToFactorySettings"},
            {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
            {102, nullptr, "ResetToFactorySettingsForRefurbishment"},
+            {103, nullptr, "ResetToFactorySettingsWithPlatformRegion"},
+            {104, nullptr, "ResetToFactorySettingsWithPlatformRegionAuthentication"},
+            {105, nullptr, "RequestResetToFactorySettingsSecurely"},
+            {106, nullptr, "RequestResetToFactorySettingsWithPlatformRegionAuthenticationSecurely"},
        };
    // clang-format on

@@ -553,6 +576,9 @@ public:
            {10, nullptr, "TerminateApplication2"},
            {11, nullptr, "GetRunningApplicationProcessId"},
            {12, nullptr, "SetCurrentApplicationRightsEnvironmentCanBeActive"},
+            {13, nullptr, "CreateApplicationResourceForDevelop"},
+            {14, nullptr, "IsPreomiaForDevelop"},
+            {15, nullptr, "GetApplicationProgramIdFromHost"},
        };
        // clang-format on

--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -164,6 +164,7 @@ PL_U::PL_U(Core::System& system)
        {6, nullptr, "GetSharedFontInOrderOfPriorityForSystem"},
        {100, nullptr, "RequestApplicationFunctionAuthorization"},
        {101, nullptr, "RequestApplicationFunctionAuthorizationForSystem"},
+        {102, nullptr, "RequestApplicationFunctionAuthorizationByApplicationId"},
        {1000, nullptr, "LoadNgWordDataForPlatformRegionChina"},
        {1001, nullptr, "GetNgWordDataSizeForPlatformRegionChina"},
    };
--- a/src/core/hle/service/pctl/module.cpp
+++ b/src/core/hle/service/pctl/module.cpp
@@ -31,6 +31,8 @@ public:
            {1014, nullptr, "ConfirmPlayableApplicationVideoOld"},
            {1015, nullptr, "ConfirmPlayableApplicationVideo"},
            {1016, nullptr, "ConfirmShowNewsPermission"},
+            {1017, nullptr, "EndFreeCommunication"},
+            {1018, nullptr, "IsFreeCommunicationAvailable"},
            {1031, nullptr, "IsRestrictionEnabled"},
            {1032, nullptr, "GetSafetyLevel"},
            {1033, nullptr, "SetSafetyLevel"},
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -21,8 +21,10 @@ public:
        static const FunctionInfo functions[] = {
            {10100, &PlayReport::SaveReport<Core::Reporter::PlayReportType::Old>, "SaveReportOld"},
            {10101, &PlayReport::SaveReportWithUser<Core::Reporter::PlayReportType::Old>, "SaveReportWithUserOld"},
-            {10102, &PlayReport::SaveReport<Core::Reporter::PlayReportType::New>, "SaveReport"},
-            {10103, &PlayReport::SaveReportWithUser<Core::Reporter::PlayReportType::New>, "SaveReportWithUser"},
+            {10102, &PlayReport::SaveReport<Core::Reporter::PlayReportType::Old2>, "SaveReportOld2"},
+            {10103, &PlayReport::SaveReportWithUser<Core::Reporter::PlayReportType::Old2>, "SaveReportWithUserOld2"},
+            {10104, nullptr, "SaveReport"},
+            {10105, nullptr, "SaveReportWithUser"},
            {10200, nullptr, "RequestImmediateTransmission"},
            {10300, nullptr, "GetTransmissionStatus"},
            {10400, nullptr, "GetSystemSessionId"},
@@ -35,8 +37,10 @@ public:
            {30400, nullptr, "GetStatistics"},
            {30401, nullptr, "GetThroughputHistory"},
            {30500, nullptr, "GetLastUploadError"},
+            {30600, nullptr, "GetApplicationUploadSummary"},
            {40100, nullptr, "IsUserAgreementCheckEnabled"},
            {40101, nullptr, "SetUserAgreementCheckEnabled"},
+            {50100, nullptr, "ReadAllApplicationReportFiles"},
            {90100, nullptr, "ReadAllReportFiles"},
        };
        // clang-format on
@@ -51,7 +55,7 @@ private:
        const auto process_id = rp.PopRaw<u64>();

        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
-        if (Type == Core::Reporter::PlayReportType::New) {
+        if constexpr (Type == Core::Reporter::PlayReportType::Old2) {
            data.emplace_back(ctx.ReadBuffer(1));
        }

@@ -71,7 +75,7 @@ private:
        const auto user_id = rp.PopRaw<u128>();
        const auto process_id = rp.PopRaw<u64>();
        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
-        if (Type == Core::Reporter::PlayReportType::New) {
+        if constexpr (Type == Core::Reporter::PlayReportType::Old2) {
            data.emplace_back(ctx.ReadBuffer(1));
        }

--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -67,6 +67,7 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
    const auto index = rp.Pop<u32>();

    if (index >= available_language_codes.size()) {
+        LOG_ERROR(Service_SET, "Invalid language code index! index={}", index);
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_INVALID_LANGUAGE);
        return;
--- a/src/core/hle/service/set/set_cal.cpp
+++ b/src/core/hle/service/set/set_cal.cpp
@@ -50,6 +50,8 @@ SET_CAL::SET_CAL() : ServiceFramework("set:cal") {
        {39, nullptr, "GetConsoleSixAxisSensorModuleType"},
        {40, nullptr, "GetConsoleSixAxisSensorHorizontalOffset"},
        {41, nullptr, "GetBatteryVersion"},
+        {42, nullptr, "GetDeviceId"},
+        {43, nullptr, "GetConsoleSixAxisSensorMountType"},
    };
    // clang-format on

--- a/src/core/hle/service/set/set_sys.cpp
+++ b/src/core/hle/service/set/set_sys.cpp
@@ -288,6 +288,18 @@ SET_SYS::SET_SYS() : ServiceFramework("set:sys") {
        {186, nullptr, "GetMemoryUsageRateFlag"},
        {187, nullptr, "GetTouchScreenMode"},
        {188, nullptr, "SetTouchScreenMode"},
+        {189, nullptr, "GetButtonConfigSettingsFull"},
+        {190, nullptr, "SetButtonConfigSettingsFull"},
+        {191, nullptr, "GetButtonConfigSettingsEmbedded"},
+        {192, nullptr, "SetButtonConfigSettingsEmbedded"},
+        {193, nullptr, "GetButtonConfigSettingsLeft"},
+        {194, nullptr, "SetButtonConfigSettingsLeft"},
+        {195, nullptr, "GetButtonConfigSettingsRight"},
+        {196, nullptr, "SetButtonConfigSettingsRight"},
+        {197, nullptr, "GetButtonConfigRegisteredSettingsEmbedded"},
+        {198, nullptr, "SetButtonConfigRegisteredSettingsEmbedded"},
+        {199, nullptr, "GetButtonConfigRegisteredSettings"},
+        {200, nullptr, "SetButtonConfigRegisteredSettings"},
    };
    // clang-format on

--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -28,9 +28,11 @@ void ServiceManager::InvokeControlRequest(Kernel::HLERequestContext& context) {

 static ResultCode ValidateServiceName(const std::string& name) {
    if (name.size() <= 0 || name.size() > 8) {
+        LOG_ERROR(Service_SM, "Invalid service name! service={}", name);
        return ERR_INVALID_NAME;
    }
    if (name.find('\0') != std::string::npos) {
+        LOG_ERROR(Service_SM, "A non null terminated service was passed");
        return ERR_INVALID_NAME;
    }
    return RESULT_SUCCESS;
@@ -51,8 +53,10 @@ ResultVal<std::shared_ptr<Kernel::ServerPort>> ServiceManager::RegisterService(

    CASCADE_CODE(ValidateServiceName(name));

-    if (registered_services.find(name) != registered_services.end())
+    if (registered_services.find(name) != registered_services.end()) {
+        LOG_ERROR(Service_SM, "Service is already registered! service={}", name);
        return ERR_ALREADY_REGISTERED;
+    }

    auto& kernel = Core::System::GetInstance().Kernel();
    auto [server_port, client_port] =
@@ -66,9 +70,10 @@ ResultCode ServiceManager::UnregisterService(const std::string& name) {
    CASCADE_CODE(ValidateServiceName(name));

    const auto iter = registered_services.find(name);
-    if (iter == registered_services.end())
+    if (iter == registered_services.end()) {
+        LOG_ERROR(Service_SM, "Server is not registered! service={}", name);
        return ERR_SERVICE_NOT_REGISTERED;
-
+    }
    registered_services.erase(iter);
    return RESULT_SUCCESS;
 }
@@ -79,6 +84,7 @@ ResultVal<std::shared_ptr<Kernel::ClientPort>> ServiceManager::GetServicePort(
    CASCADE_CODE(ValidateServiceName(name));
    auto it = registered_services.find(name);
    if (it == registered_services.end()) {
+        LOG_ERROR(Service_SM, "Server is not registered! service={}", name);
        return ERR_SERVICE_NOT_REGISTERED;
    }

--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -148,6 +148,7 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
        {30, nullptr, "SendMMsg"},
        {31, nullptr, "EventFd"},
        {32, nullptr, "RegisterResourceStatisticsName"},
+        {33, nullptr, "Initialize2"},
    };
    // clang-format on

--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -20,8 +20,8 @@ namespace Service::Time {

 class ISystemClock final : public ServiceFramework<ISystemClock> {
 public:
-    ISystemClock(Clock::SystemClockCore& clock_core)
-        : ServiceFramework("ISystemClock"), clock_core{clock_core} {
+    explicit ISystemClock(Clock::SystemClockCore& clock_core, Core::System& system)
+        : ServiceFramework("ISystemClock"), clock_core{clock_core}, system{system} {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"},
@@ -46,9 +46,8 @@ private:
        }

        s64 posix_time{};
-        if (const ResultCode result{
-                clock_core.GetCurrentTime(Core::System::GetInstance(), posix_time)};
-            result != RESULT_SUCCESS) {
+        if (const ResultCode result{clock_core.GetCurrentTime(system, posix_time)};
+            result.IsError()) {
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(result);
            return;
@@ -69,9 +68,8 @@ private:
        }

        Clock::SystemClockContext system_clock_context{};
-        if (const ResultCode result{
-                clock_core.GetClockContext(Core::System::GetInstance(), system_clock_context)};
-            result != RESULT_SUCCESS) {
+        if (const ResultCode result{clock_core.GetClockContext(system, system_clock_context)};
+            result.IsError()) {
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(result);
            return;
@@ -83,12 +81,13 @@ private:
    }

    Clock::SystemClockCore& clock_core;
+    Core::System& system;
 };

 class ISteadyClock final : public ServiceFramework<ISteadyClock> {
 public:
-    ISteadyClock(Clock::SteadyClockCore& clock_core)
-        : ServiceFramework("ISteadyClock"), clock_core{clock_core} {
+    explicit ISteadyClock(Clock::SteadyClockCore& clock_core, Core::System& system)
+        : ServiceFramework("ISteadyClock"), clock_core{clock_core}, system{system} {
        static const FunctionInfo functions[] = {
            {0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"},
        };
@@ -105,14 +104,14 @@ private:
            return;
        }

-        const Clock::SteadyClockTimePoint time_point{
-            clock_core.GetCurrentTimePoint(Core::System::GetInstance())};
+        const Clock::SteadyClockTimePoint time_point{clock_core.GetCurrentTimePoint(system)};
        IPC::ResponseBuilder rb{ctx, (sizeof(Clock::SteadyClockTimePoint) / 4) + 2};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(time_point);
    }

    Clock::SteadyClockCore& clock_core;
+    Core::System& system;
 };

 ResultCode Module::Interface::GetClockSnapshotFromSystemClockContextInternal(
@@ -134,7 +133,7 @@ ResultCode Module::Interface::GetClockSnapshotFromSystemClockContextInternal(
    }

    const auto current_time_point{
-        time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(Core::System::GetInstance())};
+        time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)};
    if (const ResultCode result{Clock::ClockSnapshot::GetCurrentTime(
            clock_snapshot.user_time, current_time_point, clock_snapshot.user_context)};
        result != RESULT_SUCCESS) {
@@ -176,21 +175,24 @@ void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ct
    LOG_DEBUG(Service_Time, "called");
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardUserSystemClockCore());
+    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardUserSystemClockCore(),
+                                      system);
 }

 void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Time, "called");
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardNetworkSystemClockCore());
+    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardNetworkSystemClockCore(),
+                                      system);
 }

 void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Time, "called");
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ISteadyClock>(module->GetTimeManager().GetStandardSteadyClockCore());
+    rb.PushIpcInterface<ISteadyClock>(module->GetTimeManager().GetStandardSteadyClockCore(),
+                                      system);
 }

 void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) {
@@ -204,7 +206,8 @@ void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& c
    LOG_DEBUG(Service_Time, "called");
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardLocalSystemClockCore());
+    rb.PushIpcInterface<ISystemClock>(module->GetTimeManager().GetStandardLocalSystemClockCore(),
+                                      system);
 }

 void Module::Interface::IsStandardNetworkSystemClockAccuracySufficient(
@@ -228,8 +231,7 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe

    IPC::RequestParser rp{ctx};
    const auto context{rp.PopRaw<Clock::SystemClockContext>()};
-    const auto current_time_point{
-        steady_clock_core.GetCurrentTimePoint(Core::System::GetInstance())};
+    const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};

    if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
        const auto ticks{Clock::TimeSpanType::FromTicks(
@@ -255,8 +257,8 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
    Clock::SystemClockContext user_context{};
    if (const ResultCode result{
            module->GetTimeManager().GetStandardUserSystemClockCore().GetClockContext(
-                Core::System::GetInstance(), user_context)};
-        result != RESULT_SUCCESS) {
+                system, user_context)};
+        result.IsError()) {
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(result);
        return;
@@ -264,8 +266,8 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
    Clock::SystemClockContext network_context{};
    if (const ResultCode result{
            module->GetTimeManager().GetStandardNetworkSystemClockCore().GetClockContext(
-                Core::System::GetInstance(), network_context)};
-        result != RESULT_SUCCESS) {
+                system, network_context)};
+        result.IsError()) {
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(result);
        return;
@@ -274,7 +276,7 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
    Clock::ClockSnapshot clock_snapshot{};
    if (const ResultCode result{GetClockSnapshotFromSystemClockContextInternal(
            &ctx.GetThread(), user_context, network_context, type, clock_snapshot)};
-        result != RESULT_SUCCESS) {
+        result.IsError()) {
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(result);
        return;
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -267,7 +267,7 @@ protected:

 private:
    struct Data {
-        u32_le unk_0;
+        u32_le unk_0{};
    };

    Data data{};
@@ -614,6 +614,14 @@ private:
            ctx.WriteBuffer(response.Serialize());
            break;
        }
+        case TransactionId::SetBufferCount: {
+            LOG_WARNING(Service_VI, "(STUBBED) called, transaction=SetBufferCount");
+            [[maybe_unused]] const auto buffer = ctx.ReadBuffer();
+
+            IGBPEmptyResponseParcel response{};
+            ctx.WriteBuffer(response.Serialize());
+            break;
+        }
        default:
            ASSERT_MSG(false, "Unimplemented");
        }
@@ -859,6 +867,7 @@ private:

        const auto layer_id = nv_flinger->CreateLayer(display);
        if (!layer_id) {
+            LOG_ERROR(Service_VI, "Layer not found! display=0x{:016X}", display);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -975,6 +984,7 @@ private:

        const auto display_id = nv_flinger->OpenDisplay(name);
        if (!display_id) {
+            LOG_ERROR(Service_VI, "Display not found! display_name={}", name);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1074,6 +1084,7 @@ private:

        const auto display_id = nv_flinger->OpenDisplay(display_name);
        if (!display_id) {
+            LOG_ERROR(Service_VI, "Layer not found! layer_id={}", layer_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1081,6 +1092,7 @@ private:

        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
        if (!buffer_queue_id) {
+            LOG_ERROR(Service_VI, "Buffer queue id not found! display_id={}", *display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1116,6 +1128,7 @@ private:

        const auto layer_id = nv_flinger->CreateLayer(display_id);
        if (!layer_id) {
+            LOG_ERROR(Service_VI, "Layer not found! layer_id={}", *layer_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1123,6 +1136,7 @@ private:

        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
        if (!buffer_queue_id) {
+            LOG_ERROR(Service_VI, "Buffer queue id not found! display_id={}", display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1153,6 +1167,7 @@ private:

        const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
        if (!vsync_event) {
+            LOG_ERROR(Service_VI, "Vsync event was not found for display_id={}", display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
@@ -1193,6 +1208,7 @@ private:
        case NintendoScaleMode::PreserveAspectRatio:
            return MakeResult(ConvertedScaleMode::PreserveAspectRatio);
        default:
+            LOG_ERROR(Service_VI, "Invalid scaling mode specified, mode={}", mode);
            return ERR_OPERATION_FAILED;
        }
    }
@@ -1249,6 +1265,7 @@ void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
    const auto policy = rp.PopEnum<Policy>();

    if (!IsValidServiceAccess(permission, policy)) {
+        LOG_ERROR(Service_VI, "Permission denied for policy {}", static_cast<u32>(policy));
        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_PERMISSION_DENIED);
        return;
--- a/src/core/reporter.h
+++ b/src/core/reporter.h
@@ -56,6 +56,7 @@ public:

    enum class PlayReportType {
        Old,
+        Old2,
        New,
        System,
    };
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -92,7 +92,7 @@ void LogSettings() {
    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
    LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
-    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
+    LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
    LogSetting("Renderer_UseAsynchronousGpuEmulation",
               Settings::values.use_asynchronous_gpu_emulation);
    LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
@@ -109,4 +109,12 @@ void LogSettings() {
    LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
 }

+bool IsGPULevelExtreme() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme;
+}
+
+bool IsGPULevelHigh() {
+    return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
+}
+
 } // namespace Settings
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -376,6 +376,12 @@ enum class RendererBackend {
    Vulkan = 1,
 };

+enum class GPUAccuracy : u32 {
+    Normal = 0,
+    High = 1,
+    Extreme = 2,
+};
+
 struct Values {
    // System
    bool use_docked_mode;
@@ -436,10 +442,11 @@ struct Values {
    bool use_frame_limit;
    u16 frame_limit;
    bool use_disk_shader_cache;
-    bool use_accurate_gpu_emulation;
+    GPUAccuracy gpu_accuracy;
    bool use_asynchronous_gpu_emulation;
    bool use_vsync;
    bool force_30fps_mode;
+    bool use_fast_gpu_time;

    float bg_red;
    float bg_green;
@@ -480,6 +487,9 @@ struct Values {
    std::map<u64, std::vector<std::string>> disabled_addons;
 } extern values;

+bool IsGPULevelExtreme();
+bool IsGPULevelHigh();
+
 void Apply();
 void LogSettings();
 } // namespace Settings
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
    return "Unknown";
 }

+static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
+    switch (backend) {
+    case Settings::GPUAccuracy::Normal:
+        return "Normal";
+    case Settings::GPUAccuracy::High:
+        return "High";
+    case Settings::GPUAccuracy::Extreme:
+        return "Extreme";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
    u64 telemetry_id{};
    const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
    AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
-    AddField(field_type, "Renderer_UseAccurateGpuEmulation",
-             Settings::values.use_accurate_gpu_emulation);
+    AddField(field_type, "Renderer_GPUAccuracyLevel",
+             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation);
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -14,13 +14,14 @@
 #include "core/core.h"
 #include "core/core_timing.h"

+namespace {
 // Numbers are chosen randomly to make sure the correct one is given.
-static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
-static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
+constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals

-static std::bitset<CB_IDS.size()> callbacks_ran_flags;
-static u64 expected_callback = 0;
-static s64 lateness = 0;
+std::bitset<CB_IDS.size()> callbacks_ran_flags;
+u64 expected_callback = 0;
+s64 lateness = 0;

 template <unsigned int IDX>
 void CallbackTemplate(u64 userdata, s64 cycles_late) {
@@ -31,7 +32,7 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
    REQUIRE(lateness == cycles_late);
 }

-static u64 callbacks_done = 0;
+u64 callbacks_done = 0;

 void EmptyCallback(u64 userdata, s64 cycles_late) {
    ++callbacks_done;
@@ -48,8 +49,8 @@ struct ScopeInit final {
    Core::Timing::CoreTiming core_timing;
 };

-static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
-                            int expected_lateness = 0, int cpu_downcount = 0) {
+void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
+                     int expected_lateness = 0, int cpu_downcount = 0) {
    callbacks_ran_flags = 0;
    expected_callback = CB_IDS[idx];
    lateness = expected_lateness;
@@ -62,6 +63,7 @@ static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32

    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
 }
+} // Anonymous namespace

 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
    ScopeInit guard;
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(video_core STATIC
    engines/shader_bytecode.h
    engines/shader_header.h
    engines/shader_type.h
+    fence_manager.h
    gpu.cpp
    gpu.h
    gpu_asynch.cpp
@@ -51,6 +52,8 @@ add_library(video_core STATIC
    renderer_opengl/gl_buffer_cache.h
    renderer_opengl/gl_device.cpp
    renderer_opengl/gl_device.h
+    renderer_opengl/gl_fence_manager.cpp
+    renderer_opengl/gl_fence_manager.h
    renderer_opengl/gl_framebuffer_cache.cpp
    renderer_opengl/gl_framebuffer_cache.h
    renderer_opengl/gl_rasterizer.cpp
@@ -121,6 +124,8 @@ add_library(video_core STATIC
    shader/decode.cpp
    shader/expr.cpp
    shader/expr.h
+    shader/memory_util.cpp
+    shader/memory_util.h
    shader/node_helper.cpp
    shader/node_helper.h
    shader/node.h
@@ -176,6 +181,8 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_descriptor_pool.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_fence_manager.cpp
+        renderer_vulkan/vk_fence_manager.h
        renderer_vulkan/vk_graphics_pipeline.cpp
        renderer_vulkan/vk_graphics_pipeline.h
        renderer_vulkan/vk_image.cpp
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <list>
 #include <memory>
 #include <mutex>
 #include <unordered_map>
@@ -18,8 +19,10 @@

 #include "common/alignment.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/buffer_cache/buffer_block.h"
 #include "video_core/buffer_cache/map_interval.h"
 #include "video_core/memory_manager.h"
@@ -79,6 +82,9 @@ public:
        auto map = MapAddress(block, gpu_addr, cpu_addr, size);
        if (is_written) {
            map->MarkAsModified(true, GetModifiedTicks());
+            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+                MarkForAsyncFlush(map);
+            }
            if (!map->IsWritten()) {
                map->MarkAsWritten(true);
                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -137,11 +143,22 @@ public:
        });
        for (auto& object : objects) {
            if (object->IsModified() && object->IsRegistered()) {
+                mutex.unlock();
                FlushMap(object);
+                mutex.lock();
            }
        }
    }

+    bool MustFlushRegion(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+        return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
+            return map->IsModified() && map->IsRegistered();
+        });
+    }
+
    /// Mark the specified region as being invalidated
    void InvalidateRegion(VAddr addr, u64 size) {
        std::lock_guard lock{mutex};
@@ -154,6 +171,77 @@ public:
        }
    }

+    void OnCPUWrite(VAddr addr, std::size_t size) {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : GetMapsInRange(addr, size)) {
+            if (object->IsMemoryMarked() && object->IsRegistered()) {
+                UnmarkMemory(object);
+                object->SetSyncPending(true);
+                marked_for_unregister.emplace_back(object);
+            }
+        }
+    }
+
+    void SyncGuestHost() {
+        std::lock_guard lock{mutex};
+
+        for (const auto& object : marked_for_unregister) {
+            if (object->IsRegistered()) {
+                object->SetSyncPending(false);
+                Unregister(object);
+            }
+        }
+        marked_for_unregister.clear();
+    }
+
+    void CommitAsyncFlushes() {
+        if (uncommitted_flushes) {
+            auto commit_list = std::make_shared<std::list<MapInterval>>();
+            for (auto& map : *uncommitted_flushes) {
+                if (map->IsRegistered() && map->IsModified()) {
+                    // TODO(Blinkhawk): Implement backend asynchronous flushing
+                    // AsyncFlushMap(map)
+                    commit_list->push_back(map);
+                }
+            }
+            if (!commit_list->empty()) {
+                committed_flushes.push_back(commit_list);
+            } else {
+                committed_flushes.emplace_back();
+            }
+        } else {
+            committed_flushes.emplace_back();
+        }
+        uncommitted_flushes.reset();
+    }
+
+    bool ShouldWaitAsyncFlushes() const {
+        return !committed_flushes.empty() && committed_flushes.front() != nullptr;
+    }
+
+    bool HasUncommittedFlushes() const {
+        return uncommitted_flushes != nullptr;
+    }
+
+    void PopAsyncFlushes() {
+        if (committed_flushes.empty()) {
+            return;
+        }
+        auto& flush_list = committed_flushes.front();
+        if (!flush_list) {
+            committed_flushes.pop_front();
+            return;
+        }
+        for (MapInterval& map : *flush_list) {
+            if (map->IsRegistered()) {
+                // TODO(Blinkhawk): Replace this for reading the asynchronous flush
+                FlushMap(map);
+            }
+        }
+        committed_flushes.pop_front();
+    }
+
    virtual BufferType GetEmptyBuffer(std::size_t size) = 0;

 protected:
@@ -196,17 +284,30 @@ protected:
        const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
        mapped_addresses.insert({interval, new_map});
        rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+        new_map->SetMemoryMarked(true);
        if (inherit_written) {
            MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
            new_map->MarkAsWritten(true);
        }
    }

-    /// Unregisters an object from the cache
-    void Unregister(MapInterval& map) {
+    void UnmarkMemory(const MapInterval& map) {
+        if (!map->IsMemoryMarked()) {
+            return;
+        }
        const std::size_t size = map->GetEnd() - map->GetStart();
        rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
+        map->SetMemoryMarked(false);
+    }
+
+    /// Unregisters an object from the cache
+    void Unregister(const MapInterval& map) {
+        UnmarkMemory(map);
        map->MarkAsRegistered(false);
+        if (map->IsSyncPending()) {
+            marked_for_unregister.remove(map);
+            map->SetSyncPending(false);
+        }
        if (map->IsWritten()) {
            UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
        }
@@ -264,6 +365,9 @@ private:
        MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
        if (modified_inheritance) {
            new_map->MarkAsModified(true, GetModifiedTicks());
+            if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
+                MarkForAsyncFlush(new_map);
+            }
        }
        Register(new_map, write_inheritance);
        return new_map;
@@ -450,6 +554,13 @@ private:
        return false;
    }

+    void MarkForAsyncFlush(MapInterval& map) {
+        if (!uncommitted_flushes) {
+            uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
+        }
+        uncommitted_flushes->insert(map);
+    }
+
    VideoCore::RasterizerInterface& rasterizer;
    Core::System& system;

@@ -479,6 +590,10 @@ private:
    u64 modified_ticks = 0;

    std::vector<u8> staging_buffer;
+    std::list<MapInterval> marked_for_unregister;
+
+    std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;

    std::recursive_mutex mutex;
 };
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -46,6 +46,22 @@ public:
        return is_registered;
    }

+    void SetMemoryMarked(bool is_memory_marked_) {
+        is_memory_marked = is_memory_marked_;
+    }
+
+    bool IsMemoryMarked() const {
+        return is_memory_marked;
+    }
+
+    void SetSyncPending(bool is_sync_pending_) {
+        is_sync_pending = is_sync_pending_;
+    }
+
+    bool IsSyncPending() const {
+        return is_sync_pending;
+    }
+
    VAddr GetStart() const {
        return start;
    }
@@ -83,6 +99,8 @@ private:
    bool is_written{};
    bool is_modified{};
    bool is_registered{};
+    bool is_memory_marked{};
+    bool is_sync_pending{};
    u64 ticks{};
 };

--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
 void DmaPusher::DispatchCalls() {
    MICROPROFILE_SCOPE(DispatchCalls);

+    gpu.SyncGuestHost();
    // On entering GPU code, assume all memory may be touched by the ARM core.
    gpu.Maxwell3D().OnMemoryWrite();

@@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() {
        }
    }
    gpu.FlushCommands();
+    gpu.SyncGuestHost();
+    gpu.OnCommandListEnd();
 }

 bool DmaPusher::Step() {
@@ -68,16 +71,22 @@ bool DmaPusher::Step() {
    gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
                                        command_list_header.size * sizeof(u32));

-    for (const CommandHeader& command_header : command_headers) {
+    for (std::size_t index = 0; index < command_headers.size();) {
+        const CommandHeader& command_header = command_headers[index];

-        // now, see if we're in the middle of a command
-        if (dma_state.length_pending) {
-            // Second word of long non-inc methods command - method count
-            dma_state.length_pending = 0;
-            dma_state.method_count = command_header.method_count_;
-        } else if (dma_state.method_count) {
+        if (dma_state.method_count) {
            // Data word of methods command
-            CallMethod(command_header.argument);
+            if (dma_state.non_incrementing) {
+                const u32 max_write = static_cast<u32>(
+                    std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) -
+                    index);
+                CallMultiMethod(&command_header.argument, max_write);
+                dma_state.method_count -= max_write;
+                index += max_write;
+                continue;
+            } else {
+                CallMethod(command_header.argument);
+            }

            if (!dma_state.non_incrementing) {
                dma_state.method++;
@@ -117,6 +126,7 @@ bool DmaPusher::Step() {
                break;
            }
        }
+        index++;
    }

    if (!non_main) {
@@ -137,4 +147,9 @@ void DmaPusher::CallMethod(u32 argument) const {
    gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
 }

+void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
+    gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+                        dma_state.method_count);
+}
+
 } // namespace Tegra
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,7 @@ private:
    void SetState(const CommandHeader& command_header);

    void CallMethod(u32 argument) const;
+    void CallMultiMethod(const u32* base_start, u32 num_methods) const;

    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,7 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

-std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
+void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
+    for (std::size_t i = 0; i < amount; i++) {
+        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+    }
+}
+
+static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
    const u32 line_a = src_2 - src_1;
    const u32 line_b = dst_2 - dst_1;
    const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -39,6 +39,9 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
+
    enum class Origin : u32 {
        Center = 0,
        Corner = 1,
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -51,6 +51,13 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

+void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                    u32 methods_pending) {
+    for (std::size_t i = 0; i < amount; i++) {
+        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+    }
+}
+
 Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
    const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
    ASSERT(cbuf_mask[regs.tex_cb_index]);
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -202,6 +202,9 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
+
    Texture::FullTextureInfo GetTexture(std::size_t offset) const;

    /// Given a texture handle, returns the TSC and TIC entries.
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,4 +41,11 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

+void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                   u32 methods_pending) {
+    for (std::size_t i = 0; i < amount; i++) {
+        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+    }
+}
+
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -40,6 +40,9 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
+
    struct Regs {
        static constexpr size_t NUM_REGS = 0x7F;

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -280,6 +280,58 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    }
 }

+void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                u32 methods_pending) {
+    // Methods after 0xE00 are special, they're actually triggers for some microcode that was
+    // uploaded to the GPU during initialization.
+    if (method >= MacroRegistersStart) {
+        // We're trying to execute a macro
+        if (executing_macro == 0) {
+            // A macro call must begin by writing the macro method's register, not its argument.
+            ASSERT_MSG((method % 2) == 0,
+                       "Can't start macro execution by writing to the ARGS register");
+            executing_macro = method;
+        }
+
+        for (std::size_t i = 0; i < amount; i++) {
+            macro_params.push_back(base_start[i]);
+        }
+
+        // Call the macro when there are no more parameters in the command buffer
+        if (amount == methods_pending) {
+            CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
+            macro_params.clear();
+        }
+        return;
+    }
+    switch (method) {
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
+    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
+        ProcessCBMultiData(method, base_start, amount);
+        break;
+    }
+    default: {
+        for (std::size_t i = 0; i < amount; i++) {
+            CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+        }
+    }
+    }
+}
+
 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
    if (mme_draw.current_mode == MMEDrawMode::Undefined) {
        if (mme_draw.gl_begin_consume) {
@@ -404,7 +456,11 @@ void Maxwell3D::ProcessQueryGet() {

    switch (regs.query.query_get.operation) {
    case Regs::QueryOperation::Release:
-        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+        if (regs.query.query_get.fence == 1) {
+            rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+        } else {
+            StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
+        }
        break;
    case Regs::QueryOperation::Acquire:
        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -483,7 +539,7 @@ void Maxwell3D::ProcessSyncPoint() {
    const u32 increment = regs.sync_info.increment.Value();
    [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
    if (increment) {
-        system.GPU().IncrementSyncPoint(sync_point);
+        rasterizer.SignalSyncPoint(sync_point);
    }
 }

@@ -566,6 +622,28 @@ void Maxwell3D::StartCBData(u32 method) {
    ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
 }

+void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
+    if (cb_data_state.current != method) {
+        if (cb_data_state.current != null_cb_data) {
+            FinishCBData();
+        }
+        constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+        cb_data_state.start_pos = regs.const_buffer.cb_pos;
+        cb_data_state.id = method - first_cb_data;
+        cb_data_state.current = method;
+        cb_data_state.counter = 0;
+    }
+    const std::size_t id = cb_data_state.id;
+    const std::size_t size = amount;
+    std::size_t i = 0;
+    for (; i < size; i++) {
+        cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
+        cb_data_state.counter++;
+    }
+    // Increment the current buffer position.
+    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
+}
+
 void Maxwell3D::FinishCBData() {
    // Write the input value to the current const buffer at the current position.
    const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1259,7 +1259,8 @@ public:

                    GPUVAddr LimitAddress() const {
                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
-                                                     limit_low);
+                                                     limit_low) +
+                               1;
                    }
                } vertex_array_limit[NumVertexArrays];

@@ -1358,6 +1359,9 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
+
    /// Write the value to the register identified by method.
    void CallMethodFromMME(const GPU::MethodCall& method_call);

@@ -1511,6 +1515,7 @@ private:
    /// Handles a write to the CB_DATA[i] register.
    void StartCBData(u32 method);
    void ProcessCBData(u32 value);
+    void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
    void FinishCBData();

    /// Handles a write to the CB_BIND register.
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -36,6 +36,13 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
 #undef MAXWELLDMA_REG_INDEX
 }

+void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
+                                 u32 methods_pending) {
+    for (std::size_t i = 0; i < amount; i++) {
+        CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)});
+    }
+}
+
 void MaxwellDMA::HandleCopy() {
    LOG_TRACE(HW_GPU, "Requested a DMA copy");

@@ -104,8 +111,13 @@ void MaxwellDMA::HandleCopy() {
            write_buffer.resize(dst_size);
        }

-        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+        if (Settings::IsGPULevelExtreme()) {
+            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+        } else {
+            memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
+            memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
+        }

        Texture::UnswizzleSubrect(
            regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
@@ -136,7 +148,7 @@ void MaxwellDMA::HandleCopy() {
            write_buffer.resize(dst_size);
        }

-        if (Settings::values.use_accurate_gpu_emulation) {
+        if (Settings::IsGPULevelExtreme()) {
            memory_manager.ReadBlock(source, read_buffer.data(), src_size);
            memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
        } else {
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -35,6 +35,9 @@ public:
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

+    /// Write multiple values to the register identified by method.
+    void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending);
+
    struct Regs {
        static constexpr std::size_t NUM_REGS = 0x1D6;

--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -655,6 +655,7 @@ union Instruction {
    }

    constexpr Instruction(u64 value) : value{value} {}
+    constexpr Instruction(const Instruction& instr) : value(instr.value) {}

    BitField<0, 8, Register> gpr0;
    BitField<8, 8, Register> gpr8;
@@ -817,11 +818,9 @@ union Instruction {
        BitField<32, 1, u64> saturate;
        BitField<49, 2, HalfMerge> merge;

-        BitField<43, 1, u64> negate_a;
        BitField<44, 1, u64> abs_a;
        BitField<47, 2, HalfType> type_a;

-        BitField<31, 1, u64> negate_b;
        BitField<30, 1, u64> abs_b;
        BitField<28, 2, HalfType> type_b;

--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -0,0 +1,170 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <queue>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "core/settings.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+class FenceBase {
+public:
+    FenceBase(u32 payload, bool is_stubbed)
+        : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
+
+    FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
+        : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
+
+    GPUVAddr GetAddress() const {
+        return address;
+    }
+
+    u32 GetPayload() const {
+        return payload;
+    }
+
+    bool IsSemaphore() const {
+        return is_semaphore;
+    }
+
+private:
+    GPUVAddr address;
+    u32 payload;
+    bool is_semaphore;
+
+protected:
+    bool is_stubbed;
+};
+
+template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
+class FenceManager {
+public:
+    void SignalSemaphore(GPUVAddr addr, u32 value) {
+        TryReleasePendingFences();
+        const bool should_flush = ShouldFlush();
+        CommitAsyncFlushes();
+        TFence new_fence = CreateFence(addr, value, !should_flush);
+        fences.push(new_fence);
+        QueueFence(new_fence);
+        if (should_flush) {
+            rasterizer.FlushCommands();
+        }
+        rasterizer.SyncGuestHost();
+    }
+
+    void SignalSyncPoint(u32 value) {
+        TryReleasePendingFences();
+        const bool should_flush = ShouldFlush();
+        CommitAsyncFlushes();
+        TFence new_fence = CreateFence(value, !should_flush);
+        fences.push(new_fence);
+        QueueFence(new_fence);
+        if (should_flush) {
+            rasterizer.FlushCommands();
+        }
+        rasterizer.SyncGuestHost();
+    }
+
+    void WaitPendingFences() {
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
+        while (!fences.empty()) {
+            TFence& current_fence = fences.front();
+            if (ShouldWait()) {
+                WaitFence(current_fence);
+            }
+            PopAsyncFlushes();
+            if (current_fence->IsSemaphore()) {
+                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            } else {
+                gpu.IncrementSyncPoint(current_fence->GetPayload());
+            }
+            fences.pop();
+        }
+    }
+
+protected:
+    FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
+                 TQueryCache& query_cache)
+        : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
+          buffer_cache{buffer_cache}, query_cache{query_cache} {}
+
+    virtual ~FenceManager() {}
+
+    /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
+    /// true
+    virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
+    /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
+    virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+    /// Queues a fence into the backend if the fence isn't stubbed.
+    virtual void QueueFence(TFence& fence) = 0;
+    /// Notifies that the backend fence has been signaled/reached in host GPU.
+    virtual bool IsFenceSignaled(TFence& fence) const = 0;
+    /// Waits until a fence has been signalled by the host GPU.
+    virtual void WaitFence(TFence& fence) = 0;
+
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
+    TTextureCache& texture_cache;
+    TTBufferCache& buffer_cache;
+    TQueryCache& query_cache;
+
+private:
+    void TryReleasePendingFences() {
+        auto& gpu{system.GPU()};
+        auto& memory_manager{gpu.MemoryManager()};
+        while (!fences.empty()) {
+            TFence& current_fence = fences.front();
+            if (ShouldWait() && !IsFenceSignaled(current_fence)) {
+                return;
+            }
+            PopAsyncFlushes();
+            if (current_fence->IsSemaphore()) {
+                memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
+            } else {
+                gpu.IncrementSyncPoint(current_fence->GetPayload());
+            }
+            fences.pop();
+        }
+    }
+
+    bool ShouldWait() const {
+        return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
+               query_cache.ShouldWaitAsyncFlushes();
+    }
+
+    bool ShouldFlush() const {
+        return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
+               query_cache.HasUncommittedFlushes();
+    }
+
+    void PopAsyncFlushes() {
+        texture_cache.PopAsyncFlushes();
+        buffer_cache.PopAsyncFlushes();
+        query_cache.PopAsyncFlushes();
+    }
+
+    void CommitAsyncFlushes() {
+        texture_cache.CommitAsyncFlushes();
+        buffer_cache.CommitAsyncFlushes();
+        query_cache.CommitAsyncFlushes();
+    }
+
+    std::queue<TFence> fences;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -9,6 +9,7 @@
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/kepler_memory.h"
@@ -125,6 +126,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
    return true;
 }

+u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
+    std::unique_lock lck{flush_request_mutex};
+    const u64 fence = ++last_flush_fence;
+    flush_requests.emplace_back(fence, addr, size);
+    return fence;
+}
+
+void GPU::TickWork() {
+    std::unique_lock lck{flush_request_mutex};
+    while (!flush_requests.empty()) {
+        auto& request = flush_requests.front();
+        const u64 fence = request.fence;
+        const VAddr addr = request.addr;
+        const std::size_t size = request.size;
+        flush_requests.pop_front();
+        flush_request_mutex.unlock();
+        renderer->Rasterizer().FlushRegion(addr, size);
+        current_flush_fence.store(fence);
+        flush_request_mutex.lock();
+    }
+}
+
 u64 GPU::GetTicks() const {
    // This values were reversed engineered by fincs from NVN
    // The gpu clock is reported in units of 385/625 nanoseconds
@@ -132,7 +155,10 @@ u64 GPU::GetTicks() const {
    constexpr u64 gpu_ticks_den = 625;

    const u64 cpu_ticks = system.CoreTiming().GetTicks();
-    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+    if (Settings::values.use_fast_gpu_time) {
+        nanoseconds /= 256;
+    }
    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
@@ -142,6 +168,13 @@ void GPU::FlushCommands() {
    renderer->Rasterizer().FlushCommands();
 }

+void GPU::SyncGuestHost() {
+    renderer->Rasterizer().SyncGuestHost();
+}
+
+void GPU::OnCommandListEnd() {
+    renderer->Rasterizer().ReleaseFences();
+}
 // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 // So the values you see in docs might be multiplied by 4.
@@ -180,16 +213,32 @@ void GPU::CallMethod(const MethodCall& method_call) {

    ASSERT(method_call.subchannel < bound_engines.size());

-    if (ExecuteMethodOnEngine(method_call)) {
+    if (ExecuteMethodOnEngine(method_call.method)) {
        CallEngineMethod(method_call);
    } else {
        CallPullerMethod(method_call);
    }
 }

-bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
-    const auto method = static_cast<BufferMethods>(method_call.method);
-    return method >= BufferMethods::NonPullerMethods;
+void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                          u32 methods_pending) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+    ASSERT(subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method)) {
+        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+    } else {
+        for (std::size_t i = 0; i < amount; i++) {
+            CallPullerMethod(
+                {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)});
+        }
+    }
+}
+
+bool GPU::ExecuteMethodOnEngine(u32 method) {
+    const auto buffer_method = static_cast<BufferMethods>(method);
+    return buffer_method >= BufferMethods::NonPullerMethods;
 }

 void GPU::CallPullerMethod(const MethodCall& method_call) {
@@ -269,6 +318,31 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
    }
 }

+void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                                u32 methods_pending) {
+    const EngineID engine = bound_engines[subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_B:
+        maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
 void GPU::ProcessBindMethod(const MethodCall& method_call) {
    // Bind the current subchannel to the desired engine id.
    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,7 +155,27 @@ public:
    /// Calls a GPU method.
    void CallMethod(const MethodCall& method_call);

+    /// Calls a GPU multivalue method.
+    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                         u32 methods_pending);
+
+    /// Flush all current written commands into the host GPU for execution.
    void FlushCommands();
+    /// Synchronizes CPU writes with Host GPU memory.
+    void SyncGuestHost();
+    /// Signal the ending of command list.
+    virtual void OnCommandListEnd();
+
+    /// Request a host GPU memory flush from the CPU.
+    u64 RequestFlush(VAddr addr, std::size_t size);
+
+    /// Obtains current flush request fence id.
+    u64 CurrentFlushRequestFence() const {
+        return current_flush_fence.load(std::memory_order_relaxed);
+    }
+
+    /// Tick pending requests within the GPU.
+    void TickWork();

    /// Returns a reference to the Maxwell3D GPU engine.
    Engines::Maxwell3D& Maxwell3D();
@@ -293,8 +313,12 @@ private:
    /// Calls a GPU engine method.
    void CallEngineMethod(const MethodCall& method_call);

+    /// Calls a GPU engine multivalue method.
+    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                               u32 methods_pending);
+
    /// Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+    bool ExecuteMethodOnEngine(u32 method);

 protected:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
@@ -325,6 +349,19 @@ private:

    std::condition_variable sync_cv;

+    struct FlushRequest {
+        FlushRequest(u64 fence, VAddr addr, std::size_t size)
+            : fence{fence}, addr{addr}, size{size} {}
+        u64 fence;
+        VAddr addr;
+        std::size_t size;
+    };
+
+    std::list<FlushRequest> flush_requests;
+    std::atomic<u64> current_flush_fence{};
+    u64 last_flush_fence{};
+    std::mutex flush_request_mutex;
+
    const bool is_async;
 };

--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
    gpu_thread.WaitIdle();
 }

+void GPUAsynch::OnCommandListEnd() {
+    gpu_thread.OnCommandListEnd();
+}
+
 } // namespace VideoCommon
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    void WaitIdle() const override;

+    void OnCommandListEnd() override;
+
 protected:
    void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;

--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -6,6 +6,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -14,8 +15,9 @@
 namespace VideoCommon::GPUThread {

 /// Runs the GPU thread
-static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
+static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
+                      Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
+                      SynchState& state) {
    MicroProfileOnThreadCreate("GpuThread");

    // Wait for first GPU command before acquiring the window context
@@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
            dma_pusher.DispatchCalls();
        } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
+        } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+            renderer.Rasterizer().ReleaseFences();
+        } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
+            system.GPU().TickWork();
        } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
            renderer.Rasterizer().FlushRegion(data->addr, data->size);
        } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+            renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
            return;
        } else {
@@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
                                Tegra::DmaPusher& dma_pusher) {
-    thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
-                         std::ref(state)};
+    thread = std::thread{RunThread,         std::ref(system),     std::ref(renderer),
+                         std::ref(context), std::ref(dma_pusher), std::ref(state)};
 }

 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 }

 void ThreadManager::FlushRegion(VAddr addr, u64 size) {
-    PushCommand(FlushRegionCommand(addr, size));
+    if (!Settings::IsGPULevelHigh()) {
+        PushCommand(FlushRegionCommand(addr, size));
+        return;
+    }
+    if (!Settings::IsGPULevelExtreme()) {
+        return;
+    }
+    if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
+        auto& gpu = system.GPU();
+        u64 fence = gpu.RequestFlush(addr, size);
+        PushCommand(GPUTickCommand());
+        while (fence > gpu.CurrentFlushRequestFence()) {
+        }
+    }
 }

 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    system.Renderer().Rasterizer().InvalidateRegion(addr, size);
+    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 }

 void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
-    InvalidateRegion(addr, size);
+    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
 }

 void ThreadManager::WaitIdle() const {
@@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
    }
 }

+void ThreadManager::OnCommandListEnd() {
+    PushCommand(OnCommandListEndCommand());
+}
+
 u64 ThreadManager::PushCommand(CommandData&& command_data) {
    const u64 fence{++state.last_fence};
    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
    u64 size;
 };

+/// Command called within the gpu, to schedule actions after a command list end
+struct OnCommandListEndCommand final {};
+
+/// Command to make the gpu look into pending requests
+struct GPUTickCommand final {};
+
 using CommandData =
    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
-                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
+                 GPUTickCommand>;

 struct CommandDataContainer {
    CommandDataContainer() = default;
@@ -122,6 +129,8 @@ public:
    // Wait until the gpu thread is idle.
    void WaitIdle() const;

+    void OnCommandListEnd();
+
 private:
    /// Pushes a command to be executed by the GPU thread
    u64 PushCommand(CommandData&& command_data);
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -51,11 +51,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};

    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
-    ASSERT(system.CurrentProcess()
-               ->PageTable()
-               .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared,
-                                   Kernel::Memory::MemoryAttribute::DeviceShared)
-               .IsSuccess());
+    ASSERT(
+        system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());

    return gpu_addr;
 }
@@ -66,11 +63,8 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
    const u64 aligned_size{Common::AlignUp(size, page_size)};

    MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr);
-    ASSERT(system.CurrentProcess()
-               ->PageTable()
-               .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared,
-                                   Kernel::Memory::MemoryAttribute::DeviceShared)
-               .IsSuccess());
+    ASSERT(
+        system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess());
    return gpu_addr;
 }

@@ -87,9 +81,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    UnmapRange(gpu_addr, aligned_size);
    ASSERT(system.CurrentProcess()
               ->PageTable()
-               .SetMemoryAttribute(cpu_addr.value(), size,
-                                   Kernel::Memory::MemoryAttribute::DeviceShared,
-                                   Kernel::Memory::MemoryAttribute::None)
+               .UnlockForDeviceAddressSpace(cpu_addr.value(), size)
               .IsSuccess());

    return gpu_addr;
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -12,10 +12,12 @@
 #include <mutex>
 #include <optional>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>

 #include "common/assert.h"
 #include "core/core.h"
+#include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
@@ -130,6 +132,9 @@ public:
        }

        query->BindCounter(Stream(type).Current(), timestamp);
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            AsyncFlushQuery(cpu_addr);
+        }
    }

    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -170,6 +175,37 @@ public:
        return streams[static_cast<std::size_t>(type)];
    }

+    void CommitAsyncFlushes() {
+        committed_flushes.push_back(uncommitted_flushes);
+        uncommitted_flushes.reset();
+    }
+
+    bool HasUncommittedFlushes() const {
+        return uncommitted_flushes != nullptr;
+    }
+
+    bool ShouldWaitAsyncFlushes() const {
+        if (committed_flushes.empty()) {
+            return false;
+        }
+        return committed_flushes.front() != nullptr;
+    }
+
+    void PopAsyncFlushes() {
+        if (committed_flushes.empty()) {
+            return;
+        }
+        auto& flush_list = committed_flushes.front();
+        if (!flush_list) {
+            committed_flushes.pop_front();
+            return;
+        }
+        for (VAddr query_address : *flush_list) {
+            FlushAndRemoveRegion(query_address, 4);
+        }
+        committed_flushes.pop_front();
+    }
+
 protected:
    std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;

@@ -224,6 +260,13 @@ private:
        return found != std::end(contents) ? &*found : nullptr;
    }

+    void AsyncFlushQuery(VAddr addr) {
+        if (!uncommitted_flushes) {
+            uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
+        }
+        uncommitted_flushes->insert(addr);
+    }
+
    static constexpr std::uintptr_t PAGE_SIZE = 4096;
    static constexpr unsigned PAGE_SHIFT = 12;

@@ -235,6 +278,9 @@ private:
    std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;

    std::array<CounterStream, VideoCore::NumQueryTypes> streams;
+
+    std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
+    std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
 };

 template <class QueryCache, class HostCounter>
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,15 +49,33 @@ public:
    /// Records a GPU query and caches it
    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;

+    /// Signal a GPU based semaphore as a fence
+    virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
+
+    /// Signal a GPU based syncpoint as a fence
+    virtual void SignalSyncPoint(u32 value) = 0;
+
+    /// Release all pending fences.
+    virtual void ReleaseFences() = 0;
+
    /// Notify rasterizer that all caches should be flushed to Switch memory
    virtual void FlushAll() = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
    virtual void FlushRegion(VAddr addr, u64 size) = 0;

+    /// Check if the the specified memory area requires flushing to CPU Memory.
+    virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
+
    /// Notify rasterizer that any caches of the specified region should be invalidated
    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;

+    /// Notify rasterizer that any caches of the specified region are desync with guest
+    virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
+
+    /// Sync memory between guest and host.
+    virtual void SyncGuestHost() = 0;
+
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
    /// and invalidated
    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 }

 void OGLBufferCache::WriteBarrier() {
-    glMemoryBarrier(GL_ALL_BARRIER_BITS);
+    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
 }

 GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
 void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
                                       u8* data) {
    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
    glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
                            static_cast<GLsizeiptr>(size), data);
 }
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -0,0 +1,72 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+
+#include "video_core/renderer_opengl/gl_fence_manager.h"
+
+namespace OpenGL {
+
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+
+GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+
+GLInnerFence::~GLInnerFence() = default;
+
+void GLInnerFence::Queue() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(sync_object.handle == 0);
+    sync_object.Create();
+}
+
+bool GLInnerFence::IsSignaled() const {
+    if (is_stubbed) {
+        return true;
+    }
+    ASSERT(sync_object.handle != 0);
+    GLsizei length;
+    GLint sync_status;
+    glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+    return sync_status == GL_SIGNALED;
+}
+
+void GLInnerFence::Wait() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(sync_object.handle != 0);
+    glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
+}
+
+FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
+                                       VideoCore::RasterizerInterface& rasterizer,
+                                       TextureCacheOpenGL& texture_cache,
+                                       OGLBufferCache& buffer_cache, QueryCache& query_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+
+Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
+    return std::make_shared<GLInnerFence>(value, is_stubbed);
+}
+
+Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+    return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
+}
+
+void FenceManagerOpenGL::QueueFence(Fence& fence) {
+    fence->Queue();
+}
+
+bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
+    return fence->IsSignaled();
+}
+
+void FenceManagerOpenGL::WaitFence(Fence& fence) {
+    fence->Wait();
+}
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -0,0 +1,53 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/fence_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_query_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace OpenGL {
+
+class GLInnerFence : public VideoCommon::FenceBase {
+public:
+    GLInnerFence(u32 payload, bool is_stubbed);
+    GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
+    ~GLInnerFence();
+
+    void Queue();
+
+    bool IsSignaled() const;
+
+    void Wait();
+
+private:
+    OGLSync sync_object;
+};
+
+using Fence = std::shared_ptr<GLInnerFence>;
+using GenericFenceManager =
+    VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
+
+class FenceManagerOpenGL final : public GenericFenceManager {
+public:
+    FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+                       QueryCache& query_cache);
+
+protected:
+    Fence CreateFence(u32 value, bool is_stubbed) override;
+    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    void QueueFence(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) const override;
+    void WaitFence(Fence& fence) override;
+};
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
                                   ScreenInfo& info, GLShader::ProgramManager& program_manager,
                                   StateTracker& state_tracker)
    : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
-      shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
-      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
-      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
+      shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
+      buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
+      fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
+      screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
    CheckExtensions();
 }

@@ -185,8 +186,12 @@ void RasterizerOpenGL::SetupVertexBuffer() {
        const GPUVAddr start = vertex_array.StartAddress();
        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();

-        ASSERT(end > start);
-        const u64 size = end - start + 1;
+        ASSERT(end >= start);
+        const u64 size = end - start;
+        if (size == 0) {
+            glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
+            continue;
+        }
        const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
        glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
                           vertex_array.stride);
@@ -310,8 +315,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
        const GPUVAddr start = regs.vertex_array[index].StartAddress();
        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();

-        ASSERT(end > start);
-        size += end - start + 1;
+        size += end - start;
+        ASSERT(end >= start);
    }

    return size;
@@ -599,6 +604,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    EndTransformFeedback();

    ++num_queued_commands;
+
+    system.GPU().TickWork();
 }

 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -649,6 +656,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
    query_cache.FlushRegion(addr, size);
 }

+bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelHigh()) {
+        return buffer_cache.MustFlushRegion(addr, size);
+    }
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    if (addr == 0 || size == 0) {
@@ -660,8 +674,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    query_cache.InvalidateRegion(addr, size);
 }

+void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (addr == 0 || size == 0) {
+        return;
+    }
+    texture_cache.OnCPUWrite(addr, size);
+    shader_cache.InvalidateRegion(addr, size);
+    buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerOpenGL::SyncGuestHost() {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    texture_cache.SyncGuestHost();
+    buffer_cache.SyncGuestHost();
+}
+
+void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        auto& memory_manager{gpu.MemoryManager()};
+        memory_manager.Write<u32>(addr, value);
+        return;
+    }
+    fence_manager.SignalSemaphore(addr, value);
+}
+
+void RasterizerOpenGL::SignalSyncPoint(u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        gpu.IncrementSyncPoint(value);
+        return;
+    }
+    fence_manager.SignalSyncPoint(value);
+}
+
+void RasterizerOpenGL::ReleaseFences() {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        return;
+    }
+    fence_manager.WaitPendingFences();
+}
+
 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
-    if (Settings::values.use_accurate_gpu_emulation) {
+    if (Settings::IsGPULevelExtreme()) {
        FlushRegion(addr, size);
    }
    InvalidateRegion(addr, size);
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_fence_manager.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 #include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -66,7 +67,13 @@ public:
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
+    void OnCPUWrite(VAddr addr, u64 size) override;
+    void SyncGuestHost() override;
+    void SignalSemaphore(GPUVAddr addr, u32 value) override;
+    void SignalSyncPoint(u32 value) override;
+    void ReleaseFences() override;
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    void FlushCommands() override;
    void TickFrame() override;
@@ -222,6 +229,8 @@ private:
    SamplerCacheOpenGL sampler_cache;
    FramebufferCacheOpenGL framebuffer_cache;
    QueryCache query_cache;
+    OGLBufferCache buffer_cache;
+    FenceManagerOpenGL fence_manager;

    Core::System& system;
    ScreenInfo& screen_info;
@@ -229,7 +238,6 @@ private:
    StateTracker& state_tracker;

    static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
-    OGLBufferCache buffer_cache;

    GLint vertex_binding = 0;

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -10,8 +10,6 @@
 #include <thread>
 #include <unordered_set>

-#include <boost/functional/hash.hpp>
-
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -28,76 +26,26 @@
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"

 namespace OpenGL {

 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::GetShaderAddress;
+using VideoCommon::Shader::GetShaderCode;
+using VideoCommon::Shader::GetUniqueIdentifier;
+using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
+using VideoCommon::Shader::STAGE_MAIN_OFFSET;

 namespace {

-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
 constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};

-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
-    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & mask) == self_jumping_branch) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        offset++;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
-                          const u8* host_ptr) {
-    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
-        std::fill(code.begin(), code.end(), 0);
-        return code;
-    });
-    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
-    code.resize(CalculateProgramSize(code));
-    return code;
-}
-
 /// Gets the shader type from a Maxwell program type
 constexpr GLenum GetGLShaderType(ShaderType shader_type) {
    switch (shader_type) {
@@ -114,17 +62,6 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
    }
 }

-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b = {}) {
-    u64 unique_identifier = boost::hash_value(code);
-    if (is_a) {
-        // VertexA programs include two programs
-        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
-    }
-    return unique_identifier;
-}
-
 constexpr const char* GetShaderTypeName(ShaderType shader_type) {
    switch (shader_type) {
    case ShaderType::Vertex:
@@ -448,7 +385,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {

    // Look up shader in the cache based on address
    const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
-    Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
+    Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
    if (shader) {
        return last_shaders[static_cast<std::size_t>(program)] = shader;
    }
@@ -456,11 +393,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    const auto host_ptr{memory_manager.GetPointer(address)};

    // No shader found - create a new one
-    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
+    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
    ProgramCode code_b;
    if (program == Maxwell::ShaderProgram::VertexA) {
        const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
-        code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
+        const u8* host_ptr_b = memory_manager.GetPointer(address_b);
+        code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
    }

    const auto unique_identifier = GetUniqueIdentifier(
@@ -477,7 +415,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
        const std::size_t size_in_bytes = code.size() * sizeof(u64);
        shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
    }
-    Register(shader);
+
+    if (cpu_addr) {
+        Register(shader);
+    } else {
+        null_shader = shader;
+    }

    return last_shaders[static_cast<std::size_t>(program)] = shader;
 }
@@ -486,14 +429,14 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
    auto& memory_manager{system.GPU().MemoryManager()};
    const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};

-    auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+    auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
    if (kernel) {
        return kernel;
    }

    const auto host_ptr{memory_manager.GetPointer(code_addr)};
    // No kernel found, create a new one
-    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
+    auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
    const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};

    const ShaderParameters params{system,    disk_cache, device,
@@ -507,7 +450,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
        kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
    }

-    Register(kernel);
+    if (cpu_addr) {
+        Register(kernel);
+    } else {
+        null_kernel = kernel;
+    }
    return kernel;
 }

--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -125,6 +125,9 @@ private:
    ShaderDiskCacheOpenGL disk_cache;
    std::unordered_map<u64, PrecompiledShader> runtime_cache;

+    Shader null_shader{};
+    Shader null_kernel{};
+
    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 };

--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
    enable.Assign(1);
 }

+void FixedPipelineState::Fill(const Maxwell& regs) {
+    rasterizer.Fill(regs);
+    depth_stencil.Fill(regs);
+    color_blending.Fill(regs);
+}
+
 std::size_t FixedPipelineState::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
@@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
    return std::memcmp(this, &rhs, sizeof *this) == 0;
 }

-FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
-    FixedPipelineState fixed_state;
-    fixed_state.rasterizer.Fill(regs);
-    fixed_state.depth_stencil.Fill(regs);
-    fixed_state.color_blending.Fill(regs);
-    fixed_state.padding = {};
-    return fixed_state;
-}
-
 u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
    // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
    // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -17,7 +17,7 @@ namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-struct alignas(32) FixedPipelineState {
+struct FixedPipelineState {
    static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
    static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;

@@ -129,7 +129,7 @@ struct alignas(32) FixedPipelineState {
            auto& binding = bindings[index];
            binding.raw = 0;
            binding.enabled.Assign(enabled ? 1 : 0);
-            binding.stride.Assign(stride);
+            binding.stride.Assign(static_cast<u16>(stride));
            binding_divisors[index] = divisor;
        }

@@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState {
    Rasterizer rasterizer;
    DepthStencil depth_stencil;
    ColorBlending color_blending;
-    std::array<u8, 20> padding;
+
+    void Fill(const Maxwell& regs);

    std::size_t Hash() const noexcept;

@@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState {
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
 static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
 static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
-static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
-
-FixedPipelineState GetFixedPipelineState(const Maxwell& regs);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -82,11 +82,6 @@ public:
        return present_family;
    }

-    /// Returns true if the device is integrated with the host CPU.
-    bool IsIntegrated() const {
-        return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
-    }
-
    /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
    u32 GetApiVersion() const {
        return properties.apiVersion;
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -0,0 +1,101 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <thread>
+
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_fence_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
+
+InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
+                       u32 payload, bool is_stubbed)
+    : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
+
+InnerFence::~InnerFence() = default;
+
+void InnerFence::Queue() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(!event);
+
+    event = device.GetLogical().CreateEvent();
+    ticks = scheduler.Ticks();
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
+        cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+    });
+}
+
+bool InnerFence::IsSignaled() const {
+    if (is_stubbed) {
+        return true;
+    }
+    ASSERT(event);
+    return IsEventSignalled();
+}
+
+void InnerFence::Wait() {
+    if (is_stubbed) {
+        return;
+    }
+    ASSERT(event);
+
+    if (ticks >= scheduler.Ticks()) {
+        scheduler.Flush();
+    }
+    while (!IsEventSignalled()) {
+        std::this_thread::yield();
+    }
+}
+
+bool InnerFence::IsEventSignalled() const {
+    switch (const VkResult result = event.GetStatus()) {
+    case VK_EVENT_SET:
+        return true;
+    case VK_EVENT_RESET:
+        return false;
+    default:
+        throw vk::Exception(result);
+    }
+}
+
+VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               const VKDevice& device, VKScheduler& scheduler,
+                               VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
+                               VKQueryCache& query_cache)
+    : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
+      device{device}, scheduler{scheduler} {}
+
+Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
+    return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
+}
+
+Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+    return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
+}
+
+void VKFenceManager::QueueFence(Fence& fence) {
+    fence->Queue();
+}
+
+bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
+    return fence->IsSignaled();
+}
+
+void VKFenceManager::WaitFence(Fence& fence) {
+    fence->Wait();
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -0,0 +1,74 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "video_core/fence_manager.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class VKBufferCache;
+class VKDevice;
+class VKQueryCache;
+class VKScheduler;
+class VKTextureCache;
+
+class InnerFence : public VideoCommon::FenceBase {
+public:
+    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
+                        bool is_stubbed);
+    explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
+                        u32 payload, bool is_stubbed);
+    ~InnerFence();
+
+    void Queue();
+
+    bool IsSignaled() const;
+
+    void Wait();
+
+private:
+    bool IsEventSignalled() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    vk::Event event;
+    u64 ticks = 0;
+};
+using Fence = std::shared_ptr<InnerFence>;
+
+using GenericFenceManager =
+    VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
+
+class VKFenceManager final : public GenericFenceManager {
+public:
+    explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                            const VKDevice& device, VKScheduler& scheduler,
+                            VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
+                            VKQueryCache& query_cache);
+
+protected:
+    Fence CreateFence(u32 value, bool is_stubbed) override;
+    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    void QueueFence(Fence& fence) override;
+    bool IsFenceSignaled(Fence& fence) const override;
+    void WaitFence(Fence& fence) override;
+
+private:
+    const VKDevice& device;
+    VKScheduler& scheduler;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -288,7 +288,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
    depth_stencil_ci.maxDepthBounds = 0.0f;

    std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
-    const std::size_t num_attachments = renderpass_params.color_attachments.size();
+    const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
    for (std::size_t index = 0; index < num_attachments; ++index) {
        static constexpr std::array COMPONENT_TABLE = {
            VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -118,8 +118,7 @@ private:
 };

 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, properties{device.GetPhysical().GetMemoryProperties()},
-      is_memory_unified{GetMemoryUnified(properties)} {}
+    : device{device}, properties{device.GetPhysical().GetMemoryProperties()} {}

 VKMemoryManager::~VKMemoryManager() = default;

@@ -209,16 +208,6 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi
    return {};
 }

-bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) {
-    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
-        if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) {
-            // Memory is considered unified when heaps are device local only.
-            return false;
-        }
-    }
-    return true;
-}
-
 VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
                                       const vk::DeviceMemory& memory, u64 begin, u64 end)
    : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {}
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -40,11 +40,6 @@ public:
    /// Commits memory required by the image and binds it.
    VKMemoryCommit Commit(const vk::Image& image, bool host_visible);

-    /// Returns true if the memory allocations are done always in host visible and coherent memory.
-    bool IsMemoryUnified() const {
-        return is_memory_unified;
-    }
-
 private:
    /// Allocates a chunk of memory.
    bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
@@ -53,12 +48,8 @@ private:
    VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
                                  VkMemoryPropertyFlags wanted_properties);

-    /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties);
-
-    const VKDevice& device;                            ///< Device handler.
-    const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
-    const bool is_memory_unified;                      ///< True if memory model is unified.
+    const VKDevice& device;                                       ///< Device handler.
+    const VkPhysicalDeviceMemoryProperties properties;            ///< Physical device properties.
    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };

--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,12 +27,18 @@
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/memory_util.h"

 namespace Vulkan {

 MICROPROFILE_DECLARE(Vulkan_PipelineCache);

 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::GetShaderAddress;
+using VideoCommon::Shader::GetShaderCode;
+using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
+using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::STAGE_MAIN_OFFSET;

 namespace {

@@ -45,60 +51,6 @@ constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
 constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
    VideoCommon::Shader::CompileDepth::FullDecompile};

-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
-    const std::size_t start_offset = is_compute ? 0 : 10;
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
-    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & mask) == self_jumping_branch) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        ++offset;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute) {
-    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
-        std::fill(program_code.begin(), program_code.end(), 0);
-        return program_code;
-    });
-    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
-                                   program_code.size() * sizeof(u64));
-    program_code.resize(CalculateProgramSize(program_code, is_compute));
-    return program_code;
-}
-
 constexpr std::size_t GetStageFromProgram(std::size_t program) {
    return program == 0 ? 0 : program - 1;
 }
@@ -161,6 +113,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,

 } // Anonymous namespace

+std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<std::size_t>(hash);
+}
+
+bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
+
+std::size_t ComputePipelineCacheKey::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<std::size_t>(hash);
+}
+
+bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
+
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
                           u32 main_offset)
@@ -207,18 +177,22 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
        const GPUVAddr program_addr{GetShaderAddress(system, program)};
        const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
        ASSERT(cpu_addr);
-        auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+        auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
        if (!shader) {
            const auto host_ptr{memory_manager.GetPointer(program_addr)};

            // No shader found - create a new one
-            constexpr u32 stage_offset = 10;
+            constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
-            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);

            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
                                                    std::move(code), stage_offset);
-            Register(shader);
+            if (cpu_addr) {
+                Register(shader);
+            } else {
+                null_shader = shader;
+            }
        }
        shaders[index] = std::move(shader);
    }
@@ -261,17 +235,20 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
    const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
    ASSERT(cpu_addr);

-    auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
+    auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
    if (!shader) {
        // No shader found - create a new one
        const auto host_ptr = memory_manager.GetPointer(program_addr);

-        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
-        constexpr u32 kernel_main_offset = 0;
+        ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
                                                program_addr, *cpu_addr, std::move(code),
-                                                kernel_main_offset);
-        Register(shader);
+                                                KERNEL_MAIN_OFFSET);
+        if (cpu_addr) {
+            Register(shader);
+        } else {
+            null_kernel = shader;
+        }
    }

    Specialization specialization;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -7,7 +7,6 @@
 #include <array>
 #include <cstddef>
 #include <memory>
-#include <tuple>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
@@ -25,6 +24,7 @@
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/surface.h"
@@ -47,46 +47,40 @@ class CachedShader;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-using ProgramCode = std::vector<u64>;
-
 struct GraphicsPipelineCacheKey {
    FixedPipelineState fixed_state;
-    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
    RenderPassParams renderpass_params;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    u64 padding; // This is necessary for unique object representations

-    std::size_t Hash() const noexcept {
-        std::size_t hash = fixed_state.Hash();
-        for (const auto& shader : shaders) {
-            boost::hash_combine(hash, shader);
-        }
-        boost::hash_combine(hash, renderpass_params.Hash());
-        return hash;
-    }
+    std::size_t Hash() const noexcept;

-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
-        return std::tie(fixed_state, shaders, renderpass_params) ==
-               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
    }
 };
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);

 struct ComputePipelineCacheKey {
-    GPUVAddr shader{};
-    u32 shared_memory_size{};
-    std::array<u32, 3> workgroup_size{};
+    GPUVAddr shader;
+    u32 shared_memory_size;
+    std::array<u32, 3> workgroup_size;

-    std::size_t Hash() const noexcept {
-        return static_cast<std::size_t>(shader) ^
-               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
-               static_cast<std::size_t>(workgroup_size[0]) ^
-               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
-               (static_cast<std::size_t>(workgroup_size[2]) << 24);
-    }
+    std::size_t Hash() const noexcept;

-    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
-        return std::tie(shader, shared_memory_size, workgroup_size) ==
-               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
+
+    bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
+        return !operator==(rhs);
    }
 };
+static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);

 } // namespace Vulkan

@@ -113,7 +107,8 @@ namespace Vulkan {
 class CachedShader final : public RasterizerCacheObject {
 public:
    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
+                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
+                          u32 main_offset);
    ~CachedShader();

    GPUVAddr GetGpuAddr() const {
@@ -145,7 +140,7 @@ private:
                                                                 Tegra::Engines::ShaderType stage);

    GPUVAddr gpu_addr{};
-    ProgramCode program_code;
+    VideoCommon::Shader::ProgramCode program_code;
    VideoCommon::Shader::Registry registry;
    VideoCommon::Shader::ShaderIR shader_ir;
    ShaderEntries entries;
@@ -182,6 +177,9 @@ private:
    VKUpdateDescriptorQueue& update_descriptor_queue;
    VKRenderPassCache& renderpass_cache;

+    Shader null_shader{};
+    Shader null_kernel{};
+
    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;

    GraphicsPipelineCacheKey last_graphics_key;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -17,6 +17,7 @@
 #include "common/microprofile.h"
 #include "core/core.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
                     renderpass_cache),
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
-      sampler_cache(device), query_cache(system, *this, device, scheduler) {
+      sampler_cache(device),
+      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
+      query_cache(system, *this, device, scheduler) {
    scheduler.SetQueryCache(query_cache);
 }

@@ -313,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    query_cache.UpdateCounters();

    const auto& gpu = system.GPU().Maxwell3D();
-    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
+    GraphicsPipelineCacheKey key;
+    key.fixed_state.Fill(gpu.regs);

    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));

@@ -331,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {

    buffer_cache.Unmap();

-    const auto texceptions = UpdateAttachments();
+    const Texceptions texceptions = UpdateAttachments();
    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);

    key.renderpass_params = GetRenderPassParams(texceptions);
+    key.padding = 0;

    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
@@ -360,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    });

    EndTransformFeedback();
+
+    system.GPU().TickWork();
 }

 void RasterizerVulkan::Clear() {
@@ -448,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    query_cache.UpdateCounters();

    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    const ComputePipelineCacheKey key{
-        code_addr,
-        launch_desc.shared_alloc,
-        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
+    ComputePipelineCacheKey key;
+    key.shader = code_addr;
+    key.shared_memory_size = launch_desc.shared_alloc;
+    key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
+                          launch_desc.block_dim_z};
+
    auto& pipeline = pipeline_cache.GetComputePipeline(key);

    // Compute dispatches can't be executed inside a renderpass
@@ -504,6 +513,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
    query_cache.FlushRegion(addr, size);
 }

+bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
+    if (!Settings::IsGPULevelHigh()) {
+        return buffer_cache.MustFlushRegion(addr, size);
+    }
+    return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
+}
+
 void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
    if (addr == 0 || size == 0) {
        return;
@@ -514,6 +530,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
    query_cache.InvalidateRegion(addr, size);
 }

+void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return;
+    }
+    texture_cache.OnCPUWrite(addr, size);
+    pipeline_cache.InvalidateRegion(addr, size);
+    buffer_cache.OnCPUWrite(addr, size);
+    query_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::SyncGuestHost() {
+    texture_cache.SyncGuestHost();
+    buffer_cache.SyncGuestHost();
+}
+
+void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        gpu.MemoryManager().Write<u32>(addr, value);
+        return;
+    }
+    fence_manager.SignalSemaphore(addr, value);
+}
+
+void RasterizerVulkan::SignalSyncPoint(u32 value) {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        gpu.IncrementSyncPoint(value);
+        return;
+    }
+    fence_manager.SignalSyncPoint(value);
+}
+
+void RasterizerVulkan::ReleaseFences() {
+    auto& gpu{system.GPU()};
+    if (!gpu.IsAsync()) {
+        return;
+    }
+    fence_manager.WaitPendingFences();
+}
+
 void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    FlushRegion(addr, size);
    InvalidateRegion(addr, size);
@@ -635,7 +692,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
    FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
                            std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};

-    const auto try_push = [&](const View& view) {
+    const auto try_push = [&key](const View& view) {
        if (!view) {
            return false;
        }
@@ -646,7 +703,9 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
        return true;
    };

-    for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
+    for (std::size_t index = 0; index < num_attachments; ++index) {
        if (try_push(color_attachments[index])) {
            texture_cache.MarkColorBufferInUse(index);
        }
@@ -824,8 +883,12 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
        const GPUVAddr start{vertex_array.StartAddress()};
        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};

-        ASSERT(end > start);
-        const std::size_t size{end - start + 1};
+        ASSERT(end >= start);
+        const std::size_t size{end - start};
+        if (size == 0) {
+            buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
+            continue;
+        }
        const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
        buffer_bindings.AddVertexBinding(buffer, offset);
    }
@@ -980,8 +1043,7 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
                                        const Tegra::Engines::ConstBufferInfo& buffer) {
    if (!buffer.enabled) {
        // Set values to zero to unbind buffers
-        update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
-                                          sizeof(float));
+        update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
        return;
    }

@@ -1004,7 +1066,9 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
    if (size == 0) {
        // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
        // because Vulkan doesn't like empty buffers.
-        constexpr std::size_t dummy_size = 4;
+        // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
+        // default buffer.
+        static constexpr std::size_t dummy_size = 4;
        const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
        update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
        return;
@@ -1169,7 +1233,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
        DEBUG_ASSERT(end >= start);

-        size += (end - start + 1) * regs.vertex_array[index].enable;
+        size += (end - start) * regs.vertex_array[index].enable;
    }
    return size;
 }
@@ -1192,28 +1256,54 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
 }

 RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
-    using namespace VideoCore::Surface;
-
    const auto& regs = system.GPU().Maxwell3D().regs;
-    RenderPassParams renderpass_params;
+    const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);

-    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
+    RenderPassParams params;
+    params.color_formats = {};
+    std::size_t color_texceptions = 0;
+
+    std::size_t index = 0;
+    for (std::size_t rt = 0; rt < num_attachments; ++rt) {
        const auto& rendertarget = regs.rt[rt];
        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
            continue;
        }
-        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
-            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
-            texceptions[rt]});
+        params.color_formats[index] = static_cast<u8>(rendertarget.format);
+        color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
+        ++index;
+    }
+    params.num_color_attachments = static_cast<u8>(index);
+    params.texceptions = static_cast<u8>(color_texceptions);
+
+    params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
+    params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
+    return params;
+}
+
+VkBuffer RasterizerVulkan::DefaultBuffer() {
+    if (default_buffer) {
+        return *default_buffer;
    }

-    renderpass_params.has_zeta = regs.zeta_enable;
-    if (renderpass_params.has_zeta) {
-        renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
-        renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
-    }
+    VkBufferCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+    ci.size = DEFAULT_BUFFER_SIZE;
+    ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
+               VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+    ci.queueFamilyIndexCount = 0;
+    ci.pQueueFamilyIndices = nullptr;
+    default_buffer = device.GetLogical().CreateBuffer(ci);
+    default_buffer_commit = memory_manager.Commit(default_buffer, false);

-    return renderpass_params;
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {
+        cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0);
+    });
+    return *default_buffer;
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,6 +21,7 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pass.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_fence_manager.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -118,7 +119,13 @@ public:
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
+    bool MustFlushRegion(VAddr addr, u64 size) override;
    void InvalidateRegion(VAddr addr, u64 size) override;
+    void OnCPUWrite(VAddr addr, u64 size) override;
+    void SyncGuestHost() override;
+    void SignalSemaphore(GPUVAddr addr, u32 value) override;
+    void SignalSyncPoint(u32 value) override;
+    void ReleaseFences() override;
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    void FlushCommands() override;
    void TickFrame() override;
@@ -148,6 +155,7 @@ private:
    using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;

    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
+    static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);

    void FlushWork();

@@ -240,6 +248,8 @@ private:

    RenderPassParams GetRenderPassParams(Texceptions texceptions) const;

+    VkBuffer DefaultBuffer();
+
    Core::System& system;
    Core::Frontend::EmuWindow& render_window;
    VKScreenInfo& screen_info;
@@ -261,8 +271,12 @@ private:
    VKPipelineCache pipeline_cache;
    VKBufferCache buffer_cache;
    VKSamplerCache sampler_cache;
+    VKFenceManager fence_manager;
    VKQueryCache query_cache;

+    vk::Buffer default_buffer;
+    VKMemoryCommit default_buffer_commit;
+
    std::array<View, Maxwell::NumRenderTargets> color_attachments;
    View zeta_attachment;

--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -2,9 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <cstring>
 #include <memory>
 #include <vector>

+#include "common/cityhash.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_device.h"
@@ -13,6 +15,15 @@

 namespace Vulkan {

+std::size_t RenderPassParams::Hash() const noexcept {
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    return static_cast<std::size_t>(hash);
+}
+
+bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
+    return std::memcmp(&rhs, this, sizeof *this) == 0;
+}
+
 VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}

 VKRenderPassCache::~VKRenderPassCache() = default;
@@ -27,20 +38,22 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
 }

 vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    using namespace VideoCore::Surface;
    std::vector<VkAttachmentDescription> descriptors;
    std::vector<VkAttachmentReference> color_references;

-    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
-        const auto attachment = params.color_attachments[rt];
-        const auto format =
-            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+    const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
+    for (std::size_t rt = 0; rt < num_attachments; ++rt) {
+        const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
+        const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
+        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
-                   static_cast<u32>(attachment.pixel_format));
+                   static_cast<int>(pixel_format));

-        // TODO(Rodrigo): Add eMayAlias when it's needed.
-        const auto color_layout = attachment.is_texception
-                                      ? VK_IMAGE_LAYOUT_GENERAL
-                                      : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+        // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
+        const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
+                                               ? VK_IMAGE_LAYOUT_GENERAL
+                                               : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
        VkAttachmentDescription& descriptor = descriptors.emplace_back();
        descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
        descriptor.format = format.format;
@@ -58,15 +71,17 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
    }

    VkAttachmentReference zeta_attachment_ref;
-    if (params.has_zeta) {
-        const auto format =
-            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+    const bool has_zeta = params.zeta_format != 0;
+    if (has_zeta) {
+        const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
+        const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
+        const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
-                   static_cast<u32>(params.zeta_pixel_format));
+                   static_cast<int>(pixel_format));

-        const auto zeta_layout = params.zeta_texception
-                                     ? VK_IMAGE_LAYOUT_GENERAL
-                                     : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+        const VkImageLayout zeta_layout = params.zeta_texception != 0
+                                              ? VK_IMAGE_LAYOUT_GENERAL
+                                              : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
        VkAttachmentDescription& descriptor = descriptors.emplace_back();
        descriptor.flags = 0;
        descriptor.format = format.format;
@@ -78,7 +93,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
        descriptor.initialLayout = zeta_layout;
        descriptor.finalLayout = zeta_layout;

-        zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size());
+        zeta_attachment_ref.attachment = static_cast<u32>(num_attachments);
        zeta_attachment_ref.layout = zeta_layout;
    }

@@ -90,7 +105,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
    subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
    subpass_description.pColorAttachments = color_references.data();
    subpass_description.pResolveAttachments = nullptr;
-    subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr;
+    subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr;
    subpass_description.preserveAttachmentCount = 0;
    subpass_description.pPreserveAttachments = nullptr;

@@ -101,7 +116,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param
        stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
    }

-    if (params.has_zeta) {
+    if (has_zeta) {
        access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
        stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -4,8 +4,7 @@

 #pragma once

-#include <memory>
-#include <tuple>
+#include <type_traits>
 #include <unordered_map>

 #include <boost/container/static_vector.hpp>
@@ -19,51 +18,25 @@ namespace Vulkan {

 class VKDevice;

-// TODO(Rodrigo): Optimize this structure for faster hashing
-
 struct RenderPassParams {
-    struct ColorAttachment {
-        u32 index = 0;
-        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
-        bool is_texception = false;
+    std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
+    u8 num_color_attachments;
+    u8 texceptions;

-        std::size_t Hash() const noexcept {
-            return static_cast<std::size_t>(pixel_format) |
-                   static_cast<std::size_t>(is_texception) << 6 |
-                   static_cast<std::size_t>(index) << 7;
-        }
+    u8 zeta_format;
+    u8 zeta_texception;

-        bool operator==(const ColorAttachment& rhs) const noexcept {
-            return std::tie(index, pixel_format, is_texception) ==
-                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
-        }
-    };
+    std::size_t Hash() const noexcept;

-    boost::container::static_vector<ColorAttachment,
-                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
-        color_attachments{};
-    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
-    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
-    bool has_zeta = false;
-    bool zeta_texception = false;
+    bool operator==(const RenderPassParams& rhs) const noexcept;

-    std::size_t Hash() const noexcept {
-        std::size_t hash = 0;
-        for (const auto& rt : color_attachments) {
-            boost::hash_combine(hash, rt.Hash());
-        }
-        boost::hash_combine(hash, zeta_pixel_format);
-        boost::hash_combine(hash, has_zeta);
-        boost::hash_combine(hash, zeta_texception);
-        return hash;
-    }
-
-    bool operator==(const RenderPassParams& rhs) const {
-        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
-               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
-                        rhs.zeta_texception);
+    bool operator!=(const RenderPassParams& rhs) const noexcept {
+        return !operator==(rhs);
    }
 };
+static_assert(std::has_unique_object_representations_v<RenderPassParams>);
+static_assert(std::is_trivially_copyable_v<RenderPassParams>);
+static_assert(std::is_trivially_constructible_v<RenderPassParams>);

 } // namespace Vulkan

--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -39,8 +39,7 @@ VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator

 VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
                                         VKScheduler& scheduler)
-    : device{device}, memory_manager{memory_manager}, scheduler{scheduler},
-      is_device_integrated{device.IsIntegrated()} {}
+    : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}

 VKStagingBufferPool::~VKStagingBufferPool() = default;

@@ -56,9 +55,7 @@ void VKStagingBufferPool::TickFrame() {
    current_delete_level = (current_delete_level + 1) % NumLevels;

    ReleaseCache(true);
-    if (!is_device_integrated) {
-        ReleaseCache(false);
-    }
+    ReleaseCache(false);
 }

 VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
@@ -81,7 +78,7 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
    ci.size = 1ULL << log2;
    ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
               VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-               VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+               VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
    ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
    ci.queueFamilyIndexCount = 0;
    ci.pQueueFamilyIndices = nullptr;
@@ -95,7 +92,7 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
 }

 VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
-    return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
+    return host_visible ? host_staging_buffers : device_staging_buffers;
 }

 void VKStagingBufferPool::ReleaseCache(bool host_visible) {
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -71,7 +71,6 @@ private:
    const VKDevice& device;
    VKMemoryManager& memory_manager;
    VKScheduler& scheduler;
-    const bool is_device_integrated;

    StagingBuffersCache host_staging_buffers;
    StagingBuffersCache device_staging_buffers;
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <exception>
 #include <memory>
 #include <optional>
@@ -16,6 +17,23 @@ namespace Vulkan::vk {

 namespace {

+void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
+    std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) {
+        // This will call Vulkan more than needed, but these calls are cheap.
+        const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties();
+        const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties();
+
+        // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest.
+        const bool preferred =
+            (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
+             rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) ||
+            (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) ||
+            (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) ||
+            (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086);
+        return !preferred;
+    });
+}
+
 template <typename T>
 bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name,
          VkInstance instance = nullptr) noexcept {
@@ -63,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
    X(vkCmdSetBlendConstants);
    X(vkCmdSetDepthBias);
    X(vkCmdSetDepthBounds);
+    X(vkCmdSetEvent);
    X(vkCmdSetScissor);
    X(vkCmdSetStencilCompareMask);
    X(vkCmdSetStencilReference);
@@ -75,6 +94,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
    X(vkCreateDescriptorPool);
    X(vkCreateDescriptorSetLayout);
    X(vkCreateDescriptorUpdateTemplateKHR);
+    X(vkCreateEvent);
    X(vkCreateFence);
    X(vkCreateFramebuffer);
    X(vkCreateGraphicsPipelines);
@@ -93,6 +113,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
    X(vkDestroyDescriptorPool);
    X(vkDestroyDescriptorSetLayout);
    X(vkDestroyDescriptorUpdateTemplateKHR);
+    X(vkDestroyEvent);
    X(vkDestroyFence);
    X(vkDestroyFramebuffer);
    X(vkDestroyImage);
@@ -112,6 +133,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
    X(vkFreeMemory);
    X(vkGetBufferMemoryRequirements);
    X(vkGetDeviceQueue);
+    X(vkGetEventStatus);
    X(vkGetFenceStatus);
    X(vkGetImageMemoryRequirements);
    X(vkGetQueryPoolResults);
@@ -269,6 +291,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
    dld.vkFreeMemory(device, handle, nullptr);
 }

+void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
+    dld.vkDestroyEvent(device, handle, nullptr);
+}
+
 void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
    dld.vkDestroyFence(device, handle, nullptr);
 }
@@ -381,7 +407,8 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices(
    if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) {
        return std::nullopt;
    }
-    return physical_devices;
+    SortPhysicalDevices(physical_devices, *dld);
+    return std::make_optional(std::move(physical_devices));
 }

 DebugCallback Instance::TryCreateDebugCallback(
@@ -599,6 +626,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
    return ShaderModule(object, handle, *dld);
 }

+Event Device::CreateEvent() const {
+    VkEventCreateInfo ci;
+    ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
+    ci.pNext = nullptr;
+    ci.flags = 0;
+    VkEvent object;
+    Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
+    return Event(object, handle, *dld);
+}
+
 SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
    VkSwapchainKHR object;
    Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
    PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
    PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
    PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
+    PFN_vkCmdSetEvent vkCmdSetEvent;
    PFN_vkCmdSetScissor vkCmdSetScissor;
    PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
    PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
    PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
    PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
+    PFN_vkCreateEvent vkCreateEvent;
    PFN_vkCreateFence vkCreateFence;
    PFN_vkCreateFramebuffer vkCreateFramebuffer;
    PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
    PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
    PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
    PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
+    PFN_vkDestroyEvent vkDestroyEvent;
    PFN_vkDestroyFence vkDestroyFence;
    PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
    PFN_vkDestroyImage vkDestroyImage;
@@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
    PFN_vkFreeMemory vkFreeMemory;
    PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
    PFN_vkGetDeviceQueue vkGetDeviceQueue;
+    PFN_vkGetEventStatus vkGetEventStatus;
    PFN_vkGetFenceStatus vkGetFenceStatus;
    PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
    PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
@@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
+void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
 void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@@ -648,6 +653,15 @@ public:
    std::vector<VkImage> GetImages() const;
 };

+class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
+    using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
+
+public:
+    VkResult GetStatus() const noexcept {
+        return dld->vkGetEventStatus(owner, handle);
+    }
+};
+
 class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
    using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;

@@ -695,6 +709,8 @@ public:

    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;

+    Event CreateEvent() const;
+
    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;

    DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@@ -938,6 +954,10 @@ public:
        dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
    }

+    void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
+        dld->vkCmdSetEvent(handle, event, stage_flags);
+    }
+
    void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
                                         const VkDeviceSize* offsets,
                                         const VkDeviceSize* sizes) const noexcept {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
David Marcec	78e5f162e2	fs-srv: GetFreeSpaceSize & GetTotalSpaceSize Closes #3533 Turns out the functions were already implemented but just never added	2020-04-30 23:59:57 +10:00
David	3e9cafbee5	Merge pull request #3826 from MerryMage/update-dynarmic externals: Update dynarmic to e7166e8b	2020-04-29 23:30:00 +10:00
MerryMage	b7a69501cd	externals: Update dynarmic to e7166e8b	2020-04-29 14:25:53 +01:00
bunnei	8e64fb3225	Merge pull request #3771 from benru/dump-romfs-with-updates Dump RomFS command to include Updates	2020-04-28 21:54:06 -04:00
Mat M	c3c3e07263	Merge pull request #3818 from ogniK5377/err-log Don't fail silently for vi, sm, set and ns services	2020-04-28 21:41:13 -04:00
Mat M	5cb531b6cf	Merge pull request #3783 from lioncash/pointer physical_core: Make use of std::make_unique instead of std::make_shared in ctor	2020-04-28 21:38:02 -04:00
David Marcec	b4dbf1b9c7	Don't fail silently for vi, sm, set and ns services	2020-04-29 11:15:21 +10:00
bunnei	72b73d22ab	Merge pull request #3784 from ReinUsesLisp/shader-memory-util shader/memory_util: Deduplicate code	2020-04-28 12:05:50 -04:00
Mat M	961427037c	Merge pull request #3814 from ogniK5377/getinfo-err kernel: Bad GetInfo ids should not be marked as stubs	2020-04-28 11:25:34 -04:00
David Marcec	2261cf24af	kernel: Bad GetInfo ids should not be marked as stubs As we currently match hardware and don't return a successful result, these should be marked as errors instead of warnings and as stubs.	2020-04-29 01:17:59 +10:00
Mat M	5e19691e41	Merge pull request #3813 from ogniK5377/err-hex-2-dec style: Change AMs & Glues error codes to be dec instead of hex	2020-04-28 10:51:44 -04:00
David Marcec	0f6064e5c9	style: Change AMs & Glues error codes to be dec instead of hex Consistency for the rest of the error codes in the codebase	2020-04-29 00:49:49 +10:00
bunnei	4dca2298f9	Merge pull request #3785 from ogniK5377/set-buffer-count-unit vi: Don't let uninitialized data pass as a response for SetBufferCount	2020-04-27 17:10:28 -04:00
Fernando Sahmkow	1517cba8ca	Merge pull request #3766 from ReinUsesLisp/renderpass-cache-key vk_renderpass_cache: Pack renderpass cache key and unify keys	2020-04-27 16:05:14 -04:00
Fernando Sahmkow	a65e9ad552	Merge pull request #3756 from ReinUsesLisp/integrated-devices vk_memory_manager: Remove unified memory model flag	2020-04-27 16:04:22 -04:00
Mat M	e8e04a4b80	Merge pull request #3797 from slashiee/hid-stub services: hid: Stub StopSevenSixAxisSensor.	2020-04-27 15:37:08 -04:00
bunnei	6c7d8073be	Merge pull request #3742 from FernandoS27/command-list Optimize GPU Command Lists and Introduce Fast GPU Time Option	2020-04-27 00:18:46 -04:00
bunnei	378aed07e9	Merge pull request #3795 from vitor-k/fix-folder Fix "Port citra-emu/citra#4956: "Fixes to game list sorting" #3611"	2020-04-26 13:55:26 -04:00
bunnei	11e1629d89	Merge pull request #3744 from lioncash/table2 service: Update function tables	2020-04-26 04:15:47 -04:00
Rodrigo Locatti	7e38dd580f	Merge pull request #3753 from ReinUsesLisp/ac-vulkan {gl,vk}_rasterizer: Add lazy default buffer maker and use it for empty buffers	2020-04-26 01:55:43 -03:00
ReinUsesLisp	ddd82ef42b	shader/memory_util: Deduplicate code Deduplicate code shared between vk_pipeline_cache and gl_shader_cache as well as shader decoder code. While we are at it, fix a bug in gl_shader_cache where compute shaders had an start offset of a stage shader.	2020-04-26 01:38:51 -03:00
bunnei	9bd70c52e5	Merge pull request #3791 from Kewlan/hotkey-config-plus configuration: Add Restore Default and Clear options to hotkeys	2020-04-26 00:33:08 -04:00
bunnei	ccda5ffa58	Merge pull request #3761 from Kewlan/stick-modifier-slider configure_input_player: Use slider to edit modifier scale	2020-04-25 22:55:41 -04:00
M&M	c1ffaa8b29	services: hid: Stub StopSevenSixAxisSensor. - Used by The Legend of Zelda: Breath of the Wild v1.6.0	2020-04-25 15:38:56 -07:00
Vitor Kiguchi	dffcff9fec	Fix the mistake in the port and update the comment for clarity	2020-04-25 15:01:20 -03:00
bunnei	c5bf693882	Merge pull request #3721 from ReinUsesLisp/sort-devices vulkan/wrapper: Sort physical devices	2020-04-25 03:27:40 -04:00
bunnei	4e37825dab	Merge pull request #3734 from ReinUsesLisp/half-float-mods decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits	2020-04-25 00:41:43 -04:00
bunnei	d1e7cf3bdc	Merge pull request #3780 from lioncash/process svc: Re-add MapProcessCodeMemory/UnmapProcessCodeMemory	2020-04-24 23:22:26 -04:00
ReinUsesLisp	527a1574c3	vk_rasterizer: Pack texceptions and color formats on invalid formats Sometimes for unknown reasons NVN games can bind a render target format of 0. This may be a yuzu bug. With the commits before this the formats were specified without being "packed", assuming all formats and texceptions will be written like in the color_attachments vector. To address this issue, iterate all render targets and pack them as they are valid. This way they will match color_attachments. - Fixes validation errors and graphical issues on Breath of the Wild.	2020-04-24 22:21:29 -03:00
Kewlan	a19c6317ef	Add Restore Defaults and Clear options to hotkeys	2020-04-24 23:50:26 +02:00
bunnei	7c8acb0025	Merge pull request #3749 from ReinUsesLisp/lea-imm shader/arithmetic_integer: Fix LEA_IMM encoding	2020-04-24 14:30:13 -04:00
Zach Hilman	6ec965ef91	Merge pull request #3786 from degasus/fix_warnings Fix -Werror=conversion and -Wdeprecated-copy issues	2020-04-24 08:54:45 -04:00
Markus Wick	e717a1df20	Fix -Wdeprecated-copy warning.	2020-04-24 09:33:04 +02:00
Markus Wick	c499c22cf7	Fix -Werror=conversion error.	2020-04-24 09:33:04 +02:00
David Marcec	03a6f3b0f4	vi: Don't let uninitialized data pass as a response for SetBufferCount Currently SetBufferCount doesn't write to the out buffer which then contains uninitialized data. This leads to non-zero data which leads to responding with different error codes	2020-04-24 17:24:58 +10:00
bunnei	8f548266cd	Merge pull request #3760 from Morph1984/trailing-filedir-separator frontend/filesystem: Add a trailing separator to the string path	2020-04-24 01:44:32 -04:00
Lioncash	cc84b48ce5	physical_core: Make use of std::make_unique instead of std::make_shared in ctor We can also allow unicorn to be constructed in 32-bit mode or 64-bit mode to satisfy the need for both interpreter instances. Allows this code to compile successfully of non x86-64 architectures.	2020-04-24 00:20:58 -04:00
Rodrigo Locatti	f24c67877b	Merge pull request #3777 from lioncash/warn page_table: Remove unused captures	2020-04-23 21:47:54 -03:00
Rodrigo Locatti	db3dcb2f64	Merge pull request #3778 from lioncash/unused-var svc: Remove unused variable	2020-04-23 21:47:24 -03:00
Rodrigo Locatti	8aa92491d5	Merge pull request #3781 from lioncash/docs shared_memory: Amend doxygen reference	2020-04-23 21:46:35 -03:00
Lioncash	ce7c02735e	shared_memory: Amend doxygen reference Amends the parameter to match the documentation reference. Resolves a -Wdocumentation warning with clang.	2020-04-23 18:42:14 -04:00
Lioncash	4730347f8e	svc: Re-add MapProcessCodeMemory/UnmapProcessCodeMemory These were lost in the re-implementation of the virtual memory manager.	2020-04-23 18:12:04 -04:00
Lioncash	bed4865981	svc: Remove unused variable Since the VMM refactor, this is no longer used or needed.	2020-04-23 17:53:26 -04:00
Lioncash	f77b5dfe81	page_table: Remove unused captures Any time the lambda function is called, the permission being used in the capture would be passed in as an argument to the lambda, so the capture is unnecessary.	2020-04-23 17:33:08 -04:00
ReinUsesLisp	dbaebd8582	decode/arithmetic_half: Fix HADD2 and HMUL2 absolute and negation bits The encoding for negation and absolute value was wrong. Extracting is now done manually. Similar instructions having different encodings is the rule, not the exception. To keep sanity and readability I preferred to extract the desired bit manually. This is implemented against nxas: `8dbc389957/table.h (L68)` That is itself tested against nvdisasm (Nvidia's official disassembler).	2020-04-23 18:29:38 -03:00
ReinUsesLisp	3e35101895	vk_rasterizer: Fix framebuffer creation validation errors Framebuffer creation was ignoring the number of color attachments.	2020-04-23 17:34:16 -03:00
ReinUsesLisp	8c37cd1af6	vk_pipeline_cache: Unify pipeline cache keys into a single operation This allows us to call Common::CityHash and std::memcmp only once for GraphicsPipelineCacheKey. While we are at it, do the same for compute.	2020-04-23 17:34:16 -03:00
ReinUsesLisp	f665c92114	vk_renderpass_cache: Pack renderpass cache key to 12 bytes	2020-04-23 17:34:16 -03:00
Rodrigo Locatti	26f2820ae3	Merge pull request #3768 from H27CK/cmd-title-fmt Fix format error in performance statistics	2020-04-23 16:14:33 -03:00
bunnei	ff0c49e1ce	kernel: memory: Improve implementation of device shared memory. (#3707 ) * kernel: memory: Improve implementation of device shared memory. * fixup! kernel: memory: Improve implementation of device shared memory. * fixup! kernel: memory: Improve implementation of device shared memory.	2020-04-23 11:37:12 -04:00
Fernando Sahmkow	5c9feaebb6	Clang Format.	2020-04-23 08:52:58 -04:00
Fernando Sahmkow	b8aef40c56	GPU: Add Fast GPU Time Option.	2020-04-23 08:52:57 -04:00
Fernando Sahmkow	18a88d19dc	Maxwell3D: Process Macros on MultiMethod.	2020-04-23 08:52:56 -04:00
Fernando Sahmkow	3fedcc2f6e	DMAPusher: Propagate multimethod writes into the engines.	2020-04-23 08:52:55 -04:00
Ben Russell	bcd0444bb9	Update src/yuzu/main.cpp with missing const Co-Authored-By: Mat M. <mathew1800@gmail.com>	2020-04-23 13:10:06 +01:00
Ben Russell	dd43d725c6	Dump RomFS command to include Updates Patch the RomFS with the selected updates before dumping. Previously the resulting RomFS only contained data from the original title. To dump the RomFS without updates the user can disable the update under Properties before choosing Dump RomFS.	2020-04-23 13:06:18 +01:00
bunnei	eb26e9e711	Merge pull request #3730 from lioncash/time service/time: Remove reliance on the global system accessor	2020-04-23 02:41:38 -04:00
bunnei	2409fedacf	Merge pull request #3697 from lioncash/declarations CMakeLists: Enable -Wmissing-declarations on Linux builds	2020-04-23 02:18:52 -04:00
Kewlan	8d917e14f8	Edit modifier_scale with the deadzone slider	2020-04-23 06:32:39 +02:00
bunnei	bf2ddb8fd5	Merge pull request #3677 from FernandoS27/better-sync Introduce Predictive Flushing and Improve ASYNC GPU	2020-04-22 22:09:38 -04:00
Fernando Sahmkow	c043ac4f13	GL_Fence_Manager: use GL_TIMEOUT_IGNORED instead of a loop,	2020-04-22 20:34:32 -04:00
Fernando Sahmkow	4e37f1b113	Address Feedback.	2020-04-22 11:36:27 -04:00
Fernando Sahmkow	39e5b72948	Async GPU: Correct flushing behavior to be similar to old async GPU behavior.	2020-04-22 11:36:26 -04:00
Fernando Sahmkow	1b3be8a8f8	MaxwellDMA: Correct copying on accuracy level.	2020-04-22 11:36:25 -04:00
Fernando Sahmkow	644588fd88	ShaderCache/PipelineCache: Cache null shaders.	2020-04-22 11:36:25 -04:00
Fernando Sahmkow	f616dc0b59	Address Feedback.	2020-04-22 11:36:24 -04:00
Fernando Sahmkow	ec2f3e48e1	Fix GCC error.	2020-04-22 11:36:23 -04:00
Fernando Sahmkow	7f44f22451	Correct Linux Compile Error.	2020-04-22 11:36:22 -04:00
Fernando Sahmkow	d2d4a6cbcf	Clang format.	2020-04-22 11:36:22 -04:00
Fernando Sahmkow	b3e5f177ba	QueryCache: Only do async flushes on async gpu.	2020-04-22 11:36:21 -04:00
Fernando Sahmkow	f4ab223ef0	Async GPU: Only do reactive flushing on Extreme Level.	2020-04-22 11:36:20 -04:00
ReinUsesLisp	b752faf2d3	vk_fence_manager: Initial implementation	2020-04-22 11:36:19 -04:00
Fernando Sahmkow	0649f05900	QueryCache: Implement Async Flushes.	2020-04-22 11:36:18 -04:00
Fernando Sahmkow	131b342130	OpenGL: Guarantee writes to Buffers.	2020-04-22 11:36:18 -04:00
Fernando Sahmkow	1fb516cd97	GPU: Implement Flush Requests for Async mode.	2020-04-22 11:36:17 -04:00
Fernando Sahmkow	b7bc3c2549	FenceManager: Manage syncpoints and rename fences to semaphores.	2020-04-22 11:36:16 -04:00
Fernando Sahmkow	96bb961a64	BufferCache: Refactor async managing.	2020-04-22 11:36:15 -04:00
Fernando Sahmkow	b10db7e4a5	FenceManager: Implement async buffer cache flushes on High settings	2020-04-22 11:36:15 -04:00
Fernando Sahmkow	4adfc9bb08	Rasterizer: Document SignalFence & ReleaseFences and setup skeletons on Vulkan.	2020-04-22 11:36:14 -04:00
Fernando Sahmkow	a081a7c855	GPU: Fix rebase errors.	2020-04-22 11:36:13 -04:00
Fernando Sahmkow	e84eb64e51	Rasterizer: Disable fence managing in synchronous gpu.	2020-04-22 11:36:12 -04:00
Fernando Sahmkow	165ae823f5	ThreadManager: Sync async reads on accurate gpu.	2020-04-22 11:36:12 -04:00
Fernando Sahmkow	57fdbd9b89	FenceManager: Implement should wait.	2020-04-22 11:36:11 -04:00
Fernando Sahmkow	1f345ebe3a	GPU: Implement a Fence Manager.	2020-04-22 11:36:10 -04:00
Fernando Sahmkow	487379c593	OpenGL: Implement Fencing backend.	2020-04-22 11:36:10 -04:00
Fernando Sahmkow	ed7e965712	TextureCache: Flush linear textures after finishing rendering.	2020-04-22 11:36:09 -04:00
Fernando Sahmkow	339d0d9d6c	GPU: Delay Fences.	2020-04-22 11:36:08 -04:00
Fernando Sahmkow	8b1eb44b3e	BufferCache: Implement OnCPUWrite and SyncGuestHost	2020-04-22 11:36:07 -04:00
Fernando Sahmkow	da8f17715d	GPU: Refactor synchronization on Async GPU	2020-04-22 11:36:06 -04:00
Fernando Sahmkow	a60a22d9c2	Texture Cache: Implement OnCPUWrite and SyncGuestHost	2020-04-22 11:36:05 -04:00
Fernando Sahmkow	084ceb925a	UI: Replasce accurate GPU option for GPU Accuracy Level	2020-04-22 11:36:04 -04:00
Morph	91f1ffd283	Add a trailing separator to the string path Fixes #3643	2020-04-22 07:33:14 -04:00
ReinUsesLisp	6f47bd9641	vk_memory_manager: Remove unified memory model flag All drivers (even Intel) seem to have a device local memory type that is not host visible. Remove this flag so all devices follow the same path. This fixes a crash when trying to map to host device local memory on integrated devices.	2020-04-21 22:06:38 -03:00
ReinUsesLisp	488ed8bd02	vk_rasterizer: Add lazy default buffer maker and use it for empty buffers Introduce a default buffer getter that lazily constructs an empty buffer. This is intended to match OpenGL's buffer 0. Use this for disabled vertex and uniform buffers. While we are at it, include vertex buffer usages for staging buffers to silence validation errors.	2020-04-21 19:55:52 -03:00
ReinUsesLisp	0bbae63300	gl_rasterizer: Fix buffers without size On NVN buffers can be enabled but have no size. According to deko3d and the behavior we see in Animal Crossing: New Horizons these buffers get the special address of 0x1000 and limit themselves to 0xfff. Implement buffers without a size by binding a null buffer to OpenGL without a side. `1d1930beea/source/maxwell/gpu_3d_vbo.cpp (L62-L63)`	2020-04-21 19:55:44 -03:00
ReinUsesLisp	8734ccb0cb	shader/arithmetic_integer: Fix LEA_IMM encoding The operand order in LEA_IMM was flipped compared to nvdisasm. Fix that using nxas as reference: `8dbc389957/table.h (L122)`	2020-04-20 21:54:59 -03:00
Lioncash	99eaa2e6f2	service: Update function tables Keeps the service function tables up to date. Updated based off information on SwitchBrew.	2020-04-20 15:53:49 -04:00
Lioncash	bfee33cce3	service/time: Remove reliance on the global system accessor Eliminates usages of the global system accessor and instead passes the existing system instance into the interfaces.	2020-04-19 16:31:28 -04:00
ReinUsesLisp	c81bf06d03	vulkan/wrapper: Sort physical devices Sort discrete GPUs over the rest, Nvidia over AMD, AMD over Intel, Intel over the rest. This gives us a somewhat consistent order when Optimus is removed (renderdoc does this when it's attached). This can break the configuration of users with an Intel GPU that manually remove Optimus on yuzu. That said, it's a very unlikely to happen.	2020-04-18 21:31:15 -03:00
Lioncash	dcbb39cdae	CMakeLists: Make missing declarations a compile-time error Ensures that our code always has its linkage explicit.	2020-04-16 23:43:41 -04:00
Lioncash	e2d8be1ca2	General: Resolve warnings related to missing declarations	2020-04-16 23:43:34 -04:00
Lioncash	fc5df84581	CMakeLists: Enable -Wmissing-declarations on Linux builds Allows catching cases where internal linkage isn't specified for helper functions when they should be marked as such.	2020-04-16 22:07:16 -04:00